Merge pull request #1126 from lammps/patch-18-sep-2018

patch 18Sep18
Merge pull request #1127 from akohlmey/reax-bonds-typo
2018-09-18 21:50:30 -04:00 · 2018-09-18 18:05:57 -04:00 · 2018-09-18 17:41:02 -04:00 · 2018-09-18 17:24:10 -04:00 · 2018-09-18 16:57:10 -04:00 · 2018-09-18 15:50:17 -04:00
209 changed files with 7974 additions and 1216 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -17,6 +17,7 @@ src/GPU/*             @ndtrung81
 src/KOKKOS/*          @stanmoore1
 src/KIM/*             @ellio167
 src/LATTE/*           @cnegre
+src/MESSAGE/*         @sjplimp
 src/SPIN/*            @julient31
 src/USER-CGDNA/*      @ohenrich
 src/USER-CGSDK/*      @akohlmey
@ -32,16 +33,82 @@ src/USER-PHONON/*     @lingtikong
 src/USER-OMP/*        @akohlmey
 src/USER-QMMM/*       @akohlmey
 src/USER-REAXC/*      @hasanmetin
+src/USER-SCAFACOS/*   @rhalver
 src/USER-TALLY/*      @akohlmey
 src/USER-UEF/*        @danicholson
 src/USER-VTK/*        @rbberger

+
 # individual files in packages
 src/GPU/pair_vashishta_gpu.*        @andeplane
 src/KOKKOS/pair_vashishta_kokkos.*  @andeplane
 src/MANYBODY/pair_vashishta_table.* @andeplane
+src/MANYBODY/pair_atm.*             @sergeylishchuk
 src/USER-MISC/fix_bond_react.*      @jrgissing
 src/USER-MISC/*_grem.*              @dstelter92
+src/USER-MISC/compute_stress_mop*.* @RomainVermorel
+
+# core LAMMPS classes
+src/lammps.*              @sjplimp
+src/pointers.h            @sjplimp
+src/atom.*                @sjplimp
+src/atom_vec.*            @sjplimp
+src/angle.*               @sjplimp
+src/bond.*                @sjplimp
+src/comm*.*               @sjplimp
+src/compute.*             @sjplimp
+src/dihedral.*            @sjplimp
+src/domain.*              @sjplimp
+src/dump*.*               @sjplimp
+src/error.*               @sjplimp
+src/finish.*              @sjplimp
+src/fix.*                 @sjplimp
+src/force.*               @sjplimp
+src/group.*               @sjplimp
+src/improper.*            @sjplimp
+src/kspace.*              @sjplimp
+src/lmptyp.h              @sjplimp
+src/library.*             @sjplimp
+src/main.cpp              @sjplimp
+src/memory.*              @sjplimp
+src/modify.*              @sjplimp
+src/molecule.*            @sjplimp
+src/my_page.h             @sjplimp
+src/my_pool_chunk.h       @sjplimp
+src/npair*.*              @sjplimp
+src/ntopo*.*              @sjplimp
+src/nstencil*.*           @sjplimp
+src/neighbor.*            @sjplimp
+src/nbin*.*               @sjplimp
+src/neigh_*.*             @sjplimp
+src/output.*              @sjplimp
+src/pair.*                @sjplimp
+src/rcb.*                 @sjplimp
+src/random_*.*            @sjplimp
+src/region*.*             @sjplimp
+src/rcb.*                 @sjplimp
+src/read*.*               @sjplimp
+src/rerun.*               @sjplimp
+src/run.*                 @sjplimp
+src/respa.*               @sjplimp
+src/set.*                 @sjplimp
+src/special.*             @sjplimp
+src/suffix.h              @sjplimp
+src/thermo.*              @sjplimp
+src/universe.*            @sjplimp
+src/update.*              @sjplimp
+src/variable.*            @sjplimp
+src/verlet.*              @sjplimp
+src/velocity.*            @sjplimp
+src/write_data.*          @sjplimp
+src/write_restart.*       @sjplimp
+
+# overrides for specific files
+src/dump_movie.*          @akohlmey
+src/exceptions.h          @rbberger
+src/fix_nh.*              @athomps
+src/info.*                @akohlmey @rbberger
+src/timer.*               @akohlmey

 # tools
 tools/msi2lmp/*       @akohlmey
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -13,7 +13,7 @@ get_filename_component(LAMMPS_DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../doc ABSOLUT


 # To avoid conflicts with the conventional Makefile build system, we build everything here
-file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/*.cpp)
+file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/[^.]*.cpp)
 file(GLOB LMP_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp)
 list(REMOVE_ITEM LIB_SOURCES ${LMP_SOURCES})

@ -348,7 +348,7 @@ if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-QUIP OR PKG_LATTE)
  find_package(BLAS)
  if(NOT LAPACK_FOUND OR NOT BLAS_FOUND)
    enable_language(Fortran)
-    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.[fF])
+    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF])
    add_library(linalg STATIC ${LAPACK_SOURCES})
    set(LAPACK_LIBRARIES linalg)
  else()
@ -550,8 +550,9 @@ if(PKG_USER-SMD)
    set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR})
    list(APPEND LAMMPS_DEPS Eigen3_build)
  else()
-    find_package(Eigen3)
-    if(NOT Eigen3_FOUND)
+    find_package(Eigen3 NO_MODULE)
+    mark_as_advanced(Eigen3_DIR)
+    if(NOT EIGEN3_FOUND)
      message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it")
    endif()
  endif()
@ -603,8 +604,9 @@ endif()

 if(PKG_MESSAGE)
  option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF)
-  file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/*.F
-      ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/*.c ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/*.cpp)
+  file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F
+      ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c
+      ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp)

  if(BUILD_SHARED_LIBS)
      add_library(cslib SHARED ${cslib_SOURCES})
@ -720,8 +722,8 @@ RegisterStyles(${LAMMPS_SOURCE_DIR})
 foreach(PKG ${DEFAULT_PACKAGES})
  set(${PKG}_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/${PKG})

-  file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/*.cpp)
-  file(GLOB ${PKG}_HEADERS ${${PKG}_SOURCES_DIR}/*.h)
+  file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/[^.]*.cpp)
+  file(GLOB ${PKG}_HEADERS ${${PKG}_SOURCES_DIR}/[^.]*.h)

  # check for package files in src directory due to old make system
  DetectBuildSystemConflict(${LAMMPS_SOURCE_DIR} ${${PKG}_SOURCES} ${${PKG}_HEADERS})
@ -739,8 +741,8 @@ endforeach()
 foreach(PKG ${ACCEL_PACKAGES})
  set(${PKG}_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/${PKG})

-  file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/*.cpp)
-  file(GLOB ${PKG}_HEADERS ${${PKG}_SOURCES_DIR}/*.h)
+  file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/[^.]*.cpp)
+  file(GLOB ${PKG}_HEADERS ${${PKG}_SOURCES_DIR}/[^.]*.h)

  # check for package files in src directory due to old make system
  DetectBuildSystemConflict(${LAMMPS_SOURCE_DIR} ${${PKG}_SOURCES} ${${PKG}_HEADERS})
@ -754,8 +756,10 @@ foreach(SIMPLE_LIB REAX MEAM POEMS USER-ATC USER-AWPMD USER-COLVARS USER-H5MD
  if(PKG_${SIMPLE_LIB})
    string(REGEX REPLACE "^USER-" "" PKG_LIB "${SIMPLE_LIB}")
    string(TOLOWER "${PKG_LIB}" PKG_LIB)
-    file(GLOB_RECURSE ${PKG_LIB}_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/*.F
-      ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/*.c ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/*.cpp)
+    file(GLOB_RECURSE ${PKG_LIB}_SOURCES
+      ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/[^.]*.F
+      ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/[^.]*.c
+      ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/[^.]*.cpp)
    add_library(${PKG_LIB} STATIC ${${PKG_LIB}_SOURCES})
    list(APPEND LAMMPS_LINK_LIBS ${PKG_LIB})
    if(PKG_LIB STREQUAL awpmd)
@ -830,6 +834,7 @@ if(PKG_USER-OMP)
    set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP)
    set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp
                         ${USER-OMP_SOURCES_DIR}/thr_omp.cpp
+                         ${USER-OMP_SOURCES_DIR}/fix_omp.cpp
                         ${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp
                         ${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp
                         ${USER-OMP_SOURCES_DIR}/domain_omp.cpp)
@ -838,7 +843,7 @@ if(PKG_USER-OMP)

    # detects styles which have USER-OMP version
    RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES)
-
+    RegisterFixStyle("${USER-OMP_SOURCES_DIR}/fix_omp.h")

    get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES)

@ -1038,7 +1043,7 @@ if(PKG_GPU)
      set(GPU_PREC_SETTING "SINGLE_SINGLE")
    endif()

-    file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cpp)
+    file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp)
    file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)

    if(GPU_API STREQUAL "CUDA")
@ -1051,15 +1056,15 @@ if(PKG_GPU)

      set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM architecture (e.g. sm_60)")

-      file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cu)
+      file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
      list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)

      cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu)

      if(CUDPP_OPT)
        cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
-        file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cpp)
-        file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cu)
+        file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp)
+        file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu)
      endif()

      cuda_compile_cubin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS
@ -1108,7 +1113,7 @@ if(PKG_GPU)
      include(OpenCLUtils)
      set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)

-      file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cu)
+      file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
      list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)

      foreach(GPU_KERNEL ${GPU_LIB_CU})
@ -1235,7 +1240,7 @@ if(BUILD_DOC)

  set(VIRTUALENV ${PYTHON_EXECUTABLE} -m virtualenv)

-  file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/*.txt)
+  file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.txt)
  file(GLOB PDF_EXTRA_SOURCES ${LAMMPS_DOC_DIR}/src/lammps_commands*.txt ${LAMMPS_DOC_DIR}/src/lammps_support.txt ${LAMMPS_DOC_DIR}/src/lammps_tutorials.txt)
  list(REMOVE_ITEM DOC_SOURCES ${PDF_EXTRA_SOURCES})

--- a/cmake/Modules/StyleHeaderUtils.cmake
+++ b/cmake/Modules/StyleHeaderUtils.cmake
@ -85,19 +85,23 @@ function(RegisterNPairStyle path)
    AddStyleHeader(${path} NPAIR)
 endfunction(RegisterNPairStyle)

+function(RegisterFixStyle path)
+    AddStyleHeader(${path} FIX)
+endfunction(RegisterFixStyle)
+
 function(RegisterStyles search_path)
    FindStyleHeaders(${search_path} ANGLE_CLASS     angle_     ANGLE     ) # angle     ) # force
    FindStyleHeaders(${search_path} ATOM_CLASS      atom_vec_  ATOM_VEC  ) # atom      ) # atom      atom_vec_hybrid
    FindStyleHeaders(${search_path} BODY_CLASS      body_      BODY      ) # body      ) # atom_vec_body
    FindStyleHeaders(${search_path} BOND_CLASS      bond_      BOND      ) # bond      ) # force
-    FindStyleHeaders(${search_path} COMMAND_CLASS   ""         COMMAND   ) # command   ) # input
+    FindStyleHeaders(${search_path} COMMAND_CLASS   "[^.]"     COMMAND   ) # command   ) # input
    FindStyleHeaders(${search_path} COMPUTE_CLASS   compute_   COMPUTE   ) # compute   ) # modify
    FindStyleHeaders(${search_path} DIHEDRAL_CLASS  dihedral_  DIHEDRAL  ) # dihedral  ) # force
    FindStyleHeaders(${search_path} DUMP_CLASS      dump_      DUMP      ) # dump      ) # output    write_dump
    FindStyleHeaders(${search_path} FIX_CLASS       fix_       FIX       ) # fix       ) # modify
    FindStyleHeaders(${search_path} IMPROPER_CLASS  improper_  IMPROPER  ) # improper  ) # force
-    FindStyleHeaders(${search_path} INTEGRATE_CLASS ""         INTEGRATE ) # integrate ) # update
-    FindStyleHeaders(${search_path} KSPACE_CLASS    ""         KSPACE    ) # kspace    ) # force
+    FindStyleHeaders(${search_path} INTEGRATE_CLASS "[^.]"     INTEGRATE ) # integrate ) # update
+    FindStyleHeaders(${search_path} KSPACE_CLASS    "[^.]"     KSPACE    ) # kspace    ) # force
    FindStyleHeaders(${search_path} MINIMIZE_CLASS  min_       MINIMIZE  ) # minimize  ) # update
    FindStyleHeaders(${search_path} NBIN_CLASS      nbin_      NBIN      ) # nbin      ) # neighbor
    FindStyleHeaders(${search_path} NPAIR_CLASS     npair_     NPAIR     ) # npair     ) # neighbor
--- a/doc/src/Build_basics.txt
+++ b/doc/src/Build_basics.txt
@ -292,6 +292,10 @@ This will create a lammps/doc/html dir with the HTML doc pages so that
 you can browse them locally on your system.  Type "make" from the
 lammps/doc dir to see other options.

+NOTE: You can also download a tarball of the documention for the
+current LAMMPS version (HTML and PDF files), from the website
+"download page"_http://lammps.sandia.gov/download.html.
+
 :line

 Install LAMMPS after a build :h4,link(install)
--- a/doc/src/Build_package.txt
+++ b/doc/src/Build_package.txt
@ -42,7 +42,7 @@ packages:
 "KOKKOS"_Build_extras.html#kokkos,
 "LATTE"_Build_extras.html#latte,
 "MEAM"_Build_extras.html#meam,
-"MESSAGE"_#Build_extras.html#message,
+"MESSAGE"_Build_extras.html#message,
 "MSCG"_Build_extras.html#mscg,
 "OPT"_Build_extras.html#opt,
 "POEMS"_Build_extras.html#poems,
@ -59,7 +59,7 @@ packages:
 "USER-OMP"_Build_extras.html#user-omp,
 "USER-QMMM"_Build_extras.html#user-qmmm,
 "USER-QUIP"_Build_extras.html#user-quip,
-"USER-SCAFACOS"_#Build_extras.html#user-scafacos,
+"USER-SCAFACOS"_Build_extras.html#user-scafacos,
 "USER-SMD"_Build_extras.html#user-smd,
 "USER-VTK"_Build_extras.html#user-vtk :tb(c=6,ea=c,a=l)

--- a/doc/src/Commands_compute.txt
+++ b/doc/src/Commands_compute.txt
@ -35,6 +35,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "bond/local"_compute_bond_local.html,
 "centro/atom"_compute_centro_atom.html,
 "chunk/atom"_compute_chunk_atom.html,
+"chunk/spread/atom"_compute_chunk_spread_atom.html,
 "cluster/atom"_compute_cluster_atom.html,
 "cna/atom"_compute_cna_atom.html,
 "cnp/atom"_compute_cnp_atom.html,
@ -97,6 +98,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "property/local"_compute_property_local.html,
 "rdf"_compute_rdf.html,
 "reduce"_compute_reduce.html,
+"reduce/chunk"_compute_reduce_chunk.html,
 "reduce/region"_compute_reduce.html,
 "rigid/local"_compute_rigid_local.html,
 "saed"_compute_saed.html,
--- a/doc/src/Howto_chunk.txt
+++ b/doc/src/Howto_chunk.txt
@ -22,7 +22,7 @@ commands, to calculate various properties of a system:
 "fix ave/chunk"_fix_ave_chunk.html
 any of the "compute */chunk"_compute.html commands :ul

-Here, each of the 3 kinds of chunk-related commands is briefly
+Here, each of the 4 kinds of chunk-related commands is briefly
 overviewed.  Then some examples are given of how to compute different
 properties with chunk commands.

@ -83,8 +83,9 @@ chunk.

 Compute */chunk commands: :h4

-Currently the following computes operate on chunks of atoms to produce
-per-chunk values.
+The following computes operate on chunks of atoms to produce per-chunk
+values.  Any compute whose style name ends in "/chunk" is in this
+category:

 "compute com/chunk"_compute_com_chunk.html
 "compute gyration/chunk"_compute_gyration_chunk.html
@ -111,8 +112,8 @@ of a center of mass, which requires summing mass*position over the
 atoms and then dividing by summed mass.

 All of these computes produce a global vector or global array as
-output, wih one or more values per chunk.  They can be used
-in various ways:
+output, wih one or more values per chunk.  The output can be used in
+various ways:

 As input to the "fix ave/time"_fix_ave_time.html command, which can
 write the values to a file and optionally time average them. :ulb,l
@ -122,9 +123,27 @@ histogram values across chunks.  E.g. a histogram of cluster sizes or
 molecule diffusion rates. :l

 As input to special functions of "equal-style
-variables"_variable.html, like sum() and max().  E.g. to find the
-largest cluster or fastest diffusing molecule. :l
-:ule
+variables"_variable.html, like sum() and max() and ave().  E.g. to
+find the largest cluster or fastest diffusing molecule or average
+radius-of-gyration of a set of molecules (chunks). :l,ule
+
+Other chunk commands: :h4
+
+"compute chunk/spread/atom"_compute_chunk_spread_atom.html
+"compute reduce/chunk"_compute_reduce_chunk.html :ul
+
+The "compute chunk/spread/atom"_compute_chunk_spread_atom.html command
+spreads per-chunk values to each atom in the chunk, producing per-atom
+values as its output.  This can be useful for outputting per-chunk
+values to a per-atom "dump file"_dump.html.  Or for using an atom's
+associated chunk value in an "atom-style variable"_variable.html.
+
+The "compute reduce/chunk"_compute_reduce_chunk.html command reduces a
+peratom value across the atoms in each chunk to produce a value per
+chunk.  When used with the "compute
+chunk/spread/atom"_compute_chunk_spread_atom.html command it can
+create peratom values that induce a new set of chunks with a second
+"compute chunk/atom"_compute_chunk_atom.html command.

 Example calculations with chunks :h4

@ -164,3 +183,13 @@ compute cluster all cluster/atom 1.0
 compute cc1 all chunk/atom c_cluster compress yes
 compute size all property/chunk cc1 count
 fix 1 all ave/histo 100 1 100 0 20 20 c_size mode vector ave running beyond ignore file tmp.histo :pre
+
+(6) An example of using a per-chunk value to apply per-atom forces to
+compress individual polymer chains (molecules) in a mixture, is
+explained on the "compute
+chunk/spread/atom"_compute_chunk_spread_atom.html command doc page.
+
+(7) An example of using one set of per-chunk values for molecule
+chunks, to create a 2nd set of micelle-scale chunks (clustered
+molecules, due to hydrophobicity), is explained on the "compute
+chunk/reduce"_compute_reduce_chunk.html command doc page.
--- a/doc/src/Howto_client_server.txt
+++ b/doc/src/Howto_client_server.txt
@ -7,7 +7,7 @@ Documentation"_ld - "LAMMPS Commands"_lc :c

 :line

-Using LAMMPS in client/server mode
+Using LAMMPS in client/server mode :h3

 Client/server coupling of two codes is where one code is the "client"
 and sends request messages to a "server" code.  The server responds to
@ -61,7 +61,7 @@ client or server.
 "message"_message.html
 "fix client md"_fix_client_md.html = LAMMPS is a client for running MD
 "server md"_server_md.html = LAMMPS is a server for computing MD forces
-"server mc"_server_mc.html = LAMMPS is a server for computing a Monte Carlo energy
+"server mc"_server_mc.html = LAMMPS is a server for computing a Monte Carlo energy :ul

 The server doc files give details of the message protocols
 for data that is exchanged bewteen the client and server.
@ -119,7 +119,7 @@ For message exchange in {mpi/one} mode:

 Launch both codes in a single mpirun command:

-mpirun -np 2 lmp_mpi -mpicolor 0 -in in.message.client -log log.client : -np 4 lmp_mpi -mpicolor 1 -in in.message.server -log log.server
+mpirun -np 2 lmp_mpi -mpicolor 0 -in in.message.client -log log.client : -np 4 lmp_mpi -mpicolor 1 -in in.message.server -log log.server :pre

 The two -np values determine how many procs the client and the server
 run on.
--- a/doc/src/Howto_nemd.txt
+++ b/doc/src/Howto_nemd.txt
@ -24,6 +24,11 @@ by subtracting out the streaming velocity of the shearing atoms.  The
 velocity profile or other properties of the fluid can be monitored via
 the "fix ave/chunk"_fix_ave_chunk.html command.

+NOTE: A recent (2017) book by "(Daivis and Todd)"_#Daivis-nemd
+discusses use of the SLLOD method and non-equilibrium MD (NEMD)
+thermostatting generally, for both simple and complex fluids,
+e.g. molecular systems.  The latter can be tricky to do correctly.
+
 As discussed in the previous section on non-orthogonal simulation
 boxes, the amount of tilt or skew that can be applied is limited by
 LAMMPS for computational efficiency to be 1/2 of the parallel box
@ -46,3 +51,9 @@ An alternative method for calculating viscosities is provided via the
 NEMD simulations can also be used to measure transport properties of a fluid
 through a pore or channel. Simulations of steady-state flow can be performed
 using the "fix flow/gauss"_fix_flow_gauss.html command.
+
+:line
+
+:link(Daivis-nemd)
+[(Daivis and Todd)] Daivis and Todd, Nonequilibrium Molecular Dyanmics (book),
+Cambridge University Press, https://doi.org/10.1017/9781139017848, (2017).
--- a/doc/src/Howto_thermostat.txt
+++ b/doc/src/Howto_thermostat.txt
@ -43,6 +43,11 @@ nvt/asphere"_fix_nvt_asphere.html thermostat not only translation
 velocities but also rotational velocities for spherical and aspherical
 particles.

+NOTE: A recent (2017) book by "(Daivis and Todd)"_#Daivis-thermostat
+discusses use of the SLLOD method and non-equilibrium MD (NEMD)
+thermostatting generally, for both simple and complex fluids,
+e.g. molecular systems.  The latter can be tricky to do correctly.
+
 DPD thermostatting alters pairwise interactions in a manner analogous
 to the per-particle thermostatting of "fix
 langevin"_fix_langevin.html.
@ -87,3 +92,9 @@ specify them explicitly via the "thermo_style
 custom"_thermo_style.html command.  Or you can use the
 "thermo_modify"_thermo_modify.html command to re-define what
 temperature compute is used for default thermodynamic output.
+
+:line
+
+:link(Daivis-thermostat)
+[(Daivis and Todd)] Daivis and Todd, Nonequilibrium Molecular Dyanmics (book),
+Cambridge University Press, https://doi.org/10.1017/9781139017848, (2017).
--- a/doc/src/Howto_viscosity.txt
+++ b/doc/src/Howto_viscosity.txt
@ -37,6 +37,11 @@ used to shear the fluid in between them, again with some kind of
 thermostat that modifies only the thermal (non-shearing) components of
 velocity to prevent the fluid from heating up.

+NOTE: A recent (2017) book by "(Daivis and Todd)"_#Daivis-viscosity
+discusses use of the SLLOD method and non-equilibrium MD (NEMD)
+thermostatting generally, for both simple and complex fluids,
+e.g. molecular systems.  The latter can be tricky to do correctly.
+
 In both cases, the velocity profile setup in the fluid by this
 procedure can be monitored by the "fix ave/chunk"_fix_ave_chunk.html
 command, which determines grad(Vstream) in the equation above.
@ -131,3 +136,9 @@ mean-square-displacement formulation for self-diffusivity. The
 time-integrated momentum fluxes play the role of Cartesian
 coordinates, whose mean-square displacement increases linearly
 with time at sufficiently long times.
+
+:line
+
+:link(Daivis-viscosity)
+[(Daivis and Todd)] Daivis and Todd, Nonequilibrium Molecular Dyanmics (book),
+Cambridge University Press, https://doi.org/10.1017/9781139017848, (2017).
--- a/doc/src/Install_tarball.txt
+++ b/doc/src/Install_tarball.txt
@ -7,7 +7,7 @@ Documentation"_ld - "LAMMPS Commands"_lc :c

 :line

-Download source as a tarball :h3
+Download source and documentation as a tarball :h3

 You can download a current LAMMPS tarball from the "download page"_download
 of the "LAMMPS website"_lws.
@ -22,6 +22,10 @@ few times per year, and undergo more testing before release.  Patch
 releases occur a couple times per month.  The new contents in all
 releases are listed on the "bug and feature page"_bug of the website.

+Both tarballs include LAMMPS documentation (HTML and PDF files)
+corresponding to that version.  The download page also has an option
+to download the current-version LAMMPS documentation by itself.
+
 Older versions of LAMMPS can also be downloaded from "this
 page"_older.

--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="5 Sep 2018 version">
+<META NAME="docnumber" CONTENT="18 Sep 2018 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 :line

 LAMMPS Documentation :c,h1
-5 Sep 2018 version :c,h2
+18 Sep 2018 version :c,h2

 "What is a LAMMPS version?"_Manual_version.html

--- a/doc/src/Packages_details.txt
+++ b/doc/src/Packages_details.txt
@ -46,6 +46,7 @@ as contained in the file name.
 "MANYBODY"_#PKG-MANYBODY,
 "MC"_#PKG-MC,
 "MEAM"_#PKG-MEAM,
+"MESSAGE"_#PKG-MESSAGE,
 "MISC"_#PKG-MISC,
 "MOLECULE"_#PKG-MOLECULE,
 "MPIIO"_#PKG-MPIIO,
@ -92,7 +93,7 @@ as contained in the file name.
 "USER-QTB"_#PKG-USER-QTB,
 "USER-QUIP"_#PKG-USER-QUIP,
 "USER-REAXC"_#PKG-USER-REAXC,
-"USER-SCAFACOS"_#USER-SCAFACOS,
+"USER-SCAFACOS"_#PKG-USER-SCAFACOS,
 "USER-SMD"_#PKG-USER-SMD,
 "USER-SMTBQ"_#PKG-USER-SMTBQ,
 "USER-SPH"_#PKG-USER-SPH,
@ -1860,7 +1861,7 @@ examples/reax :ul

 :line

-USER-SCAFACOS package :link(USER-SCAFACOS),h4
+USER-SCAFACOS package :link(PKG-USER-SCAFACOS),h4

 [Contents:]

--- a/doc/src/Run_options.txt
+++ b/doc/src/Run_options.txt
@ -176,7 +176,7 @@ Option -plog will override the name of the partition log files file.N.

 :line

-[-mpicolor] color :link(mpi)
+[-mpicolor] color :link(mpicolor)

 If used, this must be the first command-line argument after the LAMMPS
 executable name.  It is only used when LAMMPS is launched by an mpirun
--- a/doc/src/Speed_kokkos.txt
+++ b/doc/src/Speed_kokkos.txt
@ -106,6 +106,11 @@ modification to the input script is needed. Alternatively, one can run
 with the KOKKOS package by editing the input script as described
 below.

+NOTE: When using a single OpenMP thread, the Kokkos Serial backend (i.e. 
+Makefile.kokkos_mpi_only) will give better performance than the OpenMP 
+backend (i.e. Makefile.kokkos_omp) because some of the overhead to make 
+the code thread-safe is removed. 
+
 NOTE: The default for the "package kokkos"_package.html command is to
 use "full" neighbor lists and set the Newton flag to "off" for both
 pairwise and bonded interactions. However, when running on CPUs, it
@ -122,6 +127,22 @@ mpirun -np 16 lmp_kokkos_mpi_only -k on -sf kk -pk kokkos newton on neigh half c
 If the "newton"_newton.html command is used in the input
 script, it can also override the Newton flag defaults.

+For half neighbor lists and OpenMP, the KOKKOS package uses data 
+duplication (i.e. thread-private arrays) by default to avoid 
+thread-level write conflicts in the force arrays (and other data 
+structures as necessary). Data duplication is typically fastest for 
+small numbers of threads (i.e. 8 or less) but does increase memory 
+footprint and is not scalable to large numbers of threads. An 
+alternative to data duplication is to use thread-level atomics, which 
+don't require duplication. The use of atomics can be forced by compiling 
+with the "-DLMP_KOKKOS_USE_ATOMICS" compile switch. Most but not all 
+Kokkos-enabled pair_styles support data duplication. Alternatively, full 
+neighbor lists avoid the need for duplication or atomics but require 
+more compute operations per atom. When using the Kokkos Serial backend 
+or the OpenMP backend with a single thread, no duplication or atomics are 
+used. For CUDA and half neighbor lists, the KOKKOS package always uses 
+atomics.
+
 [Core and Thread Affinity:]

 When using multi-threading, it is important for performance to bind
--- a/doc/src/commands_list.txt
+++ b/doc/src/commands_list.txt
@ -89,6 +89,8 @@ Commands :h1
   run
   run_style
   server
+   server_mc
+   server_md
   set
   shell
   special_bonds
--- a/doc/src/compute.txt
+++ b/doc/src/compute.txt
@ -183,6 +183,7 @@ compute"_Commands_compute.html doc page are followed by one or more of
 "bond/local"_compute_bond_local.html - distance and energy of each bond
 "centro/atom"_compute_centro_atom.html - centro-symmetry parameter for each atom
 "chunk/atom"_compute_chunk_atom.html - assign chunk IDs to each atom
+"chunk/spread/atom"_compute_chunk_spread_atom.html - spreads chunk values to each atom in chunk
 "cluster/atom"_compute_cluster_atom.html - cluster ID for each atom
 "cna/atom"_compute_cna_atom.html - common neighbor analysis (CNA) for each atom
 "com"_compute_com.html - center-of-mass of group of atoms
@ -225,6 +226,7 @@ compute"_Commands_compute.html doc page are followed by one or more of
 "property/chunk"_compute_property_chunk.html - extract various per-chunk attributes
 "rdf"_compute_rdf.html - radial distribution function g(r) histogram of group of atoms
 "reduce"_compute_reduce.html - combine per-atom quantities into a single global value
+"reduce/chunk"_compute_reduce_chunk.html - reduce per-atom quantities within each chunk
 "reduce/region"_compute_reduce.html - same as compute reduce, within a region
 "rigid/local"_compute_rigid_local.html - extract rigid body attributes
 "slice"_compute_slice.html - extract values from global vector or array
--- a/doc/src/compute_angle_local.txt
+++ b/doc/src/compute_angle_local.txt
@ -10,20 +10,27 @@ compute angle/local command :h3

 [Syntax:]

-compute ID group-ID angle/local value1 value2 ... :pre
+compute ID group-ID angle/local value1 value2 ... keyword args ... :pre

 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 angle/local = style name of this compute command :l
 one or more values may be appended :l
-value = {theta} or {eng} :l
+value = {theta} or {eng} or {v_name} :l
  {theta} = tabulate angles
-  {eng} = tabulate angle energies :pre
+  {eng} = tabulate angle energies
+  {v_name} = equal-style variable with name (see below) :pre
+zero or more keyword/args pairs may be appended :l
+keyword = {set} :l
+  {set} args = theta name
+    theta = only currently allowed arg
+    name = name of variable to set with theta :pre
 :ule

 [Examples:]

 compute 1 all angle/local theta
-compute 1 all angle/local eng theta :pre
+compute 1 all angle/local eng theta 
+compute 1 all angle/local theta v_cos set theta t :pre

 [Description:]

@ -36,6 +43,47 @@ The value {theta} is the angle for the 3 atoms in the interaction.

 The value {eng} is the interaction energy for the angle.

+The value {v_name} can be used together with the {set} keyword to
+compute a user-specified function of the angle theta.  The {name}
+specified for the {v_name} value is the name of an "equal-style
+variable"_variable.html which should evaluate a formula based on a
+variable which will store the angle theta.  This other variable must
+be an "internal-style variable"_variable.html defined in the input
+script; its initial numeric value can be anything.  It must be an
+internal-style variable, because this command resets its value
+directly.  The {set} keyword is used to identify the name of this
+other variable associated with theta.
+
+Note that the value of theta for each angle which stored in the
+internal variable is in radians, not degrees.
+
+As an example, these commands can be added to the bench/in.rhodo
+script to compute the cosine and cosine^2 of every angle in the system
+and output the statistics in various ways:
+
+variable t internal 0.0
+variable cos equal cos(v_t)
+variable cossq equal cos(v_t)*cos(v_t) :pre
+
+compute 1 all property/local aatom1 aatom2 aatom3 atype
+compute 2 all angle/local eng theta v_cos v_cossq set theta t
+dump 1 all local 100 tmp.dump c_1[*] c_2[*] :pre
+
+compute 3 all reduce ave c_2[*]
+thermo_style custom step temp press c_3[*] :pre
+
+fix 10 all ave/histo 10 10 100 -1 1 20 c_2[3] mode vector file tmp.histo :pre
+
+The "dump local"_dump.html command will output the energy, angle,
+cosine(angle), cosine^2(angle) for every angle in the system.  The
+"thermo_style"_thermo_style.html command will print the average of
+those quantities via the "compute reduce"_compute_reduce.html command
+with thermo output.  And the "fix ave/histo"_fix_ave_histo.html
+command will histogram the cosine(angle) values and write them to a
+file.
+
+:line
+
 The local data stored by this command is generated by looping over all
 the atoms owned on a processor and their angles.  An angle will only
 be included if all 3 atoms in the angle are in the specified compute
@ -65,12 +113,12 @@ dump 1 all local 1000 tmp.dump index c_1\[1\] c_1\[2\] c_1\[3\] c_1\[4\] c_2\[1\
 [Output info:]

 This compute calculates a local vector or local array depending on the
-number of keywords.  The length of the vector or number of rows in the
-array is the number of angles.  If a single keyword is specified, a
-local vector is produced.  If two or more keywords are specified, a
+number of values.  The length of the vector or number of rows in the
+array is the number of angles.  If a single value is specified, a
+local vector is produced.  If two or more values are specified, a
 local array is produced where the number of columns = the number of
-keywords.  The vector or array can be accessed by any command that
-uses local values from a compute as input.  See the "Howto
+values.  The vector or array can be accessed by any command that uses
+local values from a compute as input.  See the "Howto
 output"_Howto_output.html doc page for an overview of LAMMPS output
 options.

--- a/doc/src/compute_bond_local.txt
+++ b/doc/src/compute_bond_local.txt
@ -10,12 +10,12 @@ compute bond/local command :h3

 [Syntax:]

-compute ID group-ID bond/local value1 value2 ... :pre
+compute ID group-ID bond/local value1 value2 ... keyword args ... :pre

 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 bond/local = style name of this compute command :l
 one or more values may be appended :l
-value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} :l
+value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l
  {dist} = bond distance
  {engpot} = bond potential energy
  {force} = bond force :pre
@ -23,13 +23,22 @@ value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {
  {engrot} = bond kinetic energy of rotation
  {engtrans} = bond kinetic energy of translation
  {omega} = magnitude of bond angular velocity
-  {velvib} = vibrational velocity along the bond length :pre
+  {velvib} = vibrational velocity along the bond length
+  {v_name} = equal-style variable with name (see below) :pre
+zero or more keyword/args pairs may be appended :l
+keyword = {set} :l
+  {set} args = dist name
+    dist = only currently allowed arg
+    name = name of variable to set with distance (dist) :pre
+:ule
+
 :ule

 [Examples:]

 compute 1 all bond/local engpot
 compute 1 all bond/local dist engpot force :pre
+compute 1 all angle/local dist v_distsq set dist d :pre

 [Description:]

@ -38,6 +47,10 @@ interactions.  The number of datums generated, aggregated across all
 processors, equals the number of bonds in the system, modified by the
 group parameter as explained below.

+All these properties are computed for the pair of atoms in a bond,
+whether the 2 atoms represent a simple diatomic molecule, or are part
+of some larger molecule.
+
 The value {dist} is the current length of the bond.

 The value {engpot} is the potential energy for the bond,
@ -79,9 +92,41 @@ two atoms in the bond towards each other.  A negative value means the
 2 atoms are moving toward each other; a positive value means they are
 moving apart.

-Note that all these properties are computed for the pair of atoms in a
-bond, whether the 2 atoms represent a simple diatomic molecule, or are
-part of some larger molecule.
+The value {v_name} can be used together with the {set} keyword to
+compute a user-specified function of the bond distance.  The {name}
+specified for the {v_name} value is the name of an "equal-style
+variable"_variable.html which should evaluate a formula based on a
+variable which will store the bond distance.  This other variable must
+be an "internal-style variable"_variable.html defined in the input
+script; its initial numeric value can be anything.  It must be an
+internal-style variable, because this command resets its value
+directly.  The {set} keyword is used to identify the name of this
+other variable associated with theta.
+
+As an example, these commands can be added to the bench/in.rhodo
+script to compute the distance^2 of every bond in the system and
+output the statistics in various ways:
+
+variable d internal 0.0
+variable dsq equal v_d*v_d :pre
+
+compute 1 all property/local batom1 batom2 btype
+compute 2 all bond/local engpot dist v_dsq set dist d
+dump 1 all local 100 tmp.dump c_1[*] c_2[*] :pre
+
+compute 3 all reduce ave c_2[*]
+thermo_style custom step temp press c_3[*] :pre
+
+fix 10 all ave/histo 10 10 100 0 6 20 c_2[3] mode vector file tmp.histo :pre
+
+The "dump local"_dump.html command will output the energy, distance,
+distance^2 for every bond in the system.  The
+"thermo_style"_thermo_style.html command will print the average of
+those quantities via the "compute reduce"_compute_reduce.html command
+with thermo output.  And the "fix ave/histo"_fix_ave_histo.html
+command will histogram the distance^2 values and write them to a file.
+
+:line

 The local data stored by this command is generated by looping over all
 the atoms owned on a processor and their bonds.  A bond will only be
@ -111,12 +156,12 @@ dump 1 all local 1000 tmp.dump index c_1\[*\] c_2\[*\] :pre
 [Output info:]

 This compute calculates a local vector or local array depending on the
-number of keywords.  The length of the vector or number of rows in the
-array is the number of bonds.  If a single keyword is specified, a
-local vector is produced.  If two or more keywords are specified, a
-local array is produced where the number of columns = the number of
-keywords.  The vector or array can be accessed by any command that
-uses local values from a compute as input.  See the "Howto
+number of values.  The length of the vector or number of rows in the
+array is the number of bonds.  If a single value is specified, a local
+vector is produced.  If two or more values are specified, a local
+array is produced where the number of columns = the number of values.
+The vector or array can be accessed by any command that uses local
+values from a compute as input.  See the "Howto
 output"_Howto_output.html doc page for an overview of LAMMPS output
 options.

--- a/doc/src/compute_chunk_atom.txt
+++ b/doc/src/compute_chunk_atom.txt
@ -14,7 +14,7 @@ compute ID group-ID chunk/atom style args keyword values ... :pre

 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 chunk/atom = style name of this compute command :l
-style = {bin/1d} or {bin/2d} or {bin/3d} or {bin/sphere} or {type} or {molecule} or {compute/fix/variable}
+style = {bin/1d} or {bin/2d} or {bin/3d} or {bin/sphere} or {type} or {molecule} or c_ID, c_ID\[I\], f_ID, f_ID\[I\], v_name
  {bin/1d} args = dim origin delta
    dim = {x} or {y} or {z}
    origin = {lower} or {center} or {upper} or coordinate value (distance units)
@ -40,7 +40,7 @@ style = {bin/1d} or {bin/2d} or {bin/3d} or {bin/sphere} or {type} or {molecule}
    ncbin = # of concentric circle bins between rmin and rmax
  {type} args = none
  {molecule} args = none
-  {compute/fix/variable} = c_ID, c_ID\[I\], f_ID, f_ID\[I\], v_name with no args
+  c_ID, c_ID\[I\], f_ID, f_ID\[I\], v_name args = none
    c_ID = per-atom vector calculated by a compute with ID
    c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID
    f_ID = per-atom vector calculated by a fix with ID
@ -85,7 +85,8 @@ compute 1 all chunk/atom bin/1d z lower 0.02 units reduced
 compute 1 all chunk/atom bin/2d z lower 1.0 y 0.0 2.5
 compute 1 all chunk/atom molecule region sphere nchunk once ids once compress yes
 compute 1 all chunk/atom bin/sphere 5 5 5 2.0 5.0 5 discard yes
-compute 1 all chunk/atom bin/cylinder z lower 2 10 10 2.0 5.0 3 discard yes :pre
+compute 1 all chunk/atom bin/cylinder z lower 2 10 10 2.0 5.0 3 discard yes
+compute 1 all chunk/atom c_cluster :pre

 [Description:]

@ -386,8 +387,8 @@ described below, which resets {Nchunk}.  The {limit} keyword is then
 applied to the new {Nchunk} value, exactly as described in the
 preceding paragraph.  Note that in this case, all atoms will end up
 with chunk IDs <= {Nc}, but their original values (e.g. molecule ID or
-compute/fix/variable value) may have been > {Nc}, because of the
-compression operation.
+compute/fix/variable) may have been > {Nc}, because of the compression
+operation.

 If {compress yes} is set, and the {compress} keyword comes after the
 {limit} keyword, then the {limit} value of {Nc} is applied first to
--- a/doc/src/compute_chunk_spread_atom.txt
+++ b/doc/src/compute_chunk_spread_atom.txt
@ -0,0 +1,174 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Commands_all.html)
+
+:line
+
+compute chunk/spread/atom command :h3
+
+[Syntax:]
+
+compute ID group-ID chunk/spread/atom chunkID input1 input2 ... :pre
+
+ID, group-ID are documented in "compute"_compute.html command :ulb,l
+chunk/spread/atom = style name of this compute command :l
+chunkID = ID of "compute chunk/atom"_compute_chunk_atom.html command :l
+one or more inputs can be listed :l
+input = c_ID, c_ID\[N\], f_ID, f_ID\[N\] :l
+  c_ID = global vector calculated by a compute with ID
+  c_ID\[I\] = Ith column of global array calculated by a compute with ID, I can include wildcard (see below)
+  f_ID = global vector calculated by a fix with ID
+  f_ID\[I\] = Ith column of global array calculated by a fix with ID, I can include wildcard (see below) :pre
+:ule
+
+[Examples:]
+
+compute 1 all chunk/spread/atom mychunk c_com[*] c_gyration :pre
+
+[Description:]
+
+Define a calculation that "spreads" one or more per-chunk values to
+each atom in the chunk.  This can be useful for creating a "dump
+file"_dump.html where each atom lists info about the chunk it is in,
+e.g. for post-processing purposes.  It can also be used in "atom-style
+variables"_variable.html that need info about the chunk each atom is
+in.  Examples are given below.
+
+In LAMMPS, chunks are collections of atoms defined by a "compute
+chunk/atom"_compute_chunk_atom.html command, which assigns each atom
+to a single chunk (or no chunk).  The ID for this command is specified
+as chunkID.  For example, a single chunk could be the atoms in a
+molecule or atoms in a spatial bin.  See the "compute
+chunk/atom"_compute_chunk_atom.html and "Howto chunk"_Howto_chunk.html
+doc pages for details of how chunks can be defined and examples of how
+they can be used to measure properties of a system.
+
+For inputs that are computes, they must be a compute that calculates
+per-chunk values.  These are computes whose style names end in
+"/chunk".
+
+For inputs that are fixes, they should be a a fix that calculates
+per-chunk values.  For example, "fix ave/chunk"_fix_ave_chunk.html or
+"fix ave/time"_fix_ave_time.html (assuming it is time-averaging
+per-chunk data).
+
+For each atom, this compute accesses its chunk ID from the specified
+{chunkID} compute, then accesses the per-chunk value in each input.
+Those values are copied to this compute to become the output for that
+atom.
+
+The values generated by this compute will be 0.0 for atoms not in the
+specified compute group {group-ID}.  They will also be 0.0 if the atom
+is not in a chunk, as assigned by the {chunkID} compute.  They will
+also be 0.0 if the current chunk ID for the atom is out-of-bounds with
+respect to the number of chunks stored by a particular input compute
+or fix.
+
+NOTE: LAMMPS does not check that a compute or fix which calculates
+per-chunk values uses the same definition of chunks as this compute.
+It's up to you to be consistent.  Likewise, for a fix input, LAMMPS
+does not check that it is per-chunk data.  It only checks that the fix
+produces a global vector or array.
+
+:line
+
+Each listed input is operated on independently.  
+
+If a bracketed index I is used, it can be specified using a wildcard
+asterisk with the index to effectively specify multiple values.  This
+takes the form "*" or "*n" or "n*" or "m*n".  If N = the number of
+columns in the array, then an asterisk with no numeric values means
+all indices from 1 to N.  A leading asterisk means all indices from 1
+to n (inclusive).  A trailing asterisk means all indices from n to N
+(inclusive).  A middle asterisk means all indices from m to n
+(inclusive).
+
+Using a wildcard is the same as if the individual columns of the array
+had been listed one by one.  E.g. these 2 compute chunk/spread/atom
+commands are equivalent, since the "compute
+com/chunk"_compute_com_chunk.html command creates a per-atom array
+with 3 columns:
+
+compute com all com/chunk mychunk
+compute 10 all chunk/spread/atom mychunk c_com\[*\]
+compute 10 all chunk/spread/atom mychunk c_com\[1\] c_com\[2\] c_com\[3\] :pre
+
+:line
+
+Here is an example of writing a dump file the with the center-of-mass
+(COM) for the chunk each atom is in.  The commands below can be added
+to the bench/in.chain script.
+
+compute         cmol all chunk/atom molecule
+compute         com all com/chunk cmol
+compute         comchunk all chunk/spread/atom cmol c_com[*]
+dump            1 all custom 50 tmp.dump id mol type x y z c_comchunk[*]
+dump_modify     1 sort id :pre
+
+The same per-chunk data for each atom could be used to define per-atom
+forces for the "fix addforce"_fix_addforce.html command.  In this
+example the forces act to pull atoms of an extended polymer chain
+towards its COM in an attractive manner.
+
+compute         prop all property/atom xu yu zu
+variable        k equal 0.1
+variable        fx atom v_k*(c_comchunk\[1\]-c_prop\[1\])
+variable        fy atom v_k*(c_comchunk\[2\]-c_prop\[2\])
+variable        fz atom v_k*(c_comchunk\[3\]-c_prop\[3\])
+fix             3 all addforce v_fx v_fy v_fz :pre
+
+Note that "compute property/atom"_compute_property_atom.html is used
+to generate unwrapped coordinates for use in the per-atom force
+calculation, so that the effect of periodic boundaries is accounted
+for properly.
+
+Over time this applied force could shrink each polymer chain's radius
+of gyration in a polymer mixture simulation.  Here is output from the
+bench/in.chain script.  Thermo output is shown for 1000 steps, where
+the last column is the average radius of gyration over all 320 chains
+in the 32000 atom system:
+
+compute         gyr all gyration/chunk cmol
+variable        ave equal ave(c_gyr)
+thermo_style    custom step etotal press v_ave :pre
+
+       0    22.394765    4.6721833     5.128278 
+     100    22.445002    4.8166709    5.0348372 
+     200    22.500128    4.8790392    4.9364875 
+     300    22.534686    4.9183766    4.8590693 
+     400    22.557196    4.9492211    4.7937849 
+     500    22.571017    4.9161853    4.7412008 
+     600    22.573944    5.0229708    4.6931243 
+     700    22.581804    5.0541301    4.6440647 
+     800    22.584683    4.9691734    4.6000016 
+     900     22.59128    5.0247538    4.5611513 
+    1000    22.586832      4.94697    4.5238362 :pre
+
+:line
+
+[Output info:]
+
+This compute calculates a per-atom vector or array, which can be
+accessed by any command that uses per-atom values from a compute as
+input.  See the "Howto output"_Howto_output.html doc page for an
+overview of LAMMPS output options.
+
+The output is a per-atom vector if a single input value is specified,
+otherwise a per-atom array is output.  The number of columns in the
+array is the number of inputs provided.  The per-atom values for the
+vector or each column of the array will be in whatever
+"units"_units.html the corresponding input value is in.
+
+The vector or array values are "intensive".
+
+[Restrictions:] none
+
+[Related commands:]
+
+"compute chunk/atom"_compute_chunk_atom.html, "fix
+ave/chunk"_fix_ave_chunk.html, "compute
+reduce/chunk"_compute_reduce_chunk.html
+
+[Default:] none
--- a/doc/src/compute_dihedral_local.txt
+++ b/doc/src/compute_dihedral_local.txt
@ -10,18 +10,25 @@ compute dihedral/local command :h3

 [Syntax:]

-compute ID group-ID dihedral/local value1 value2 ... :pre
+compute ID group-ID dihedral/local value1 value2 ... keyword args ... :pre

 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 dihedral/local = style name of this compute command :l
 one or more values may be appended :l
-value = {phi} :l
-  {phi} = tabulate dihedral angles :pre
+value = {phi} or {v_name} :l
+  {phi} = tabulate dihedral angles
+  {v_name} = equal-style variable with name (see below) :pre
+zero or more keyword/args pairs may be appended :l
+keyword = {set} :l
+  {set} args = phi name
+    phi = only currently allowed arg
+    name = name of variable to set with phi :pre
 :ule

 [Examples:]

 compute 1 all dihedral/local phi :pre
+compute 1 all dihedral/local phi v_cos set phi p :pre

 [Description:]

@ -33,6 +40,47 @@ by the group parameter as explained below.
 The value {phi} is the dihedral angle, as defined in the diagram on
 the "dihedral_style"_dihedral_style.html doc page.

+The value {v_name} can be used together with the {set} keyword to
+compute a user-specified function of the dihedral angle phi.  The
+{name} specified for the {v_name} value is the name of an "equal-style
+variable"_variable.html which should evaluate a formula based on a
+variable which will store the angle phi.  This other variable must
+be an "internal-style variable"_variable.html defined in the input
+script; its initial numeric value can be anything.  It must be an
+internal-style variable, because this command resets its value
+directly.  The {set} keyword is used to identify the name of this
+other variable associated with phi.
+
+Note that the value of phi for each angle which stored in the internal
+variable is in radians, not degrees.
+
+As an example, these commands can be added to the bench/in.rhodo
+script to compute the cosine and cosine^2 of every dihedral angle in
+the system and output the statistics in various ways:
+
+variable p internal 0.0
+variable cos equal cos(v_p)
+variable cossq equal cos(v_p)*cos(v_p) :pre
+
+compute 1 all property/local datom1 datom2 datom3 datom4 dtype
+compute 2 all dihedral/local phi v_cos v_cossq set phi p
+dump 1 all local 100 tmp.dump c_1[*] c_2[*] :pre
+
+compute 3 all reduce ave c_2[*]
+thermo_style custom step temp press c_3[*] :pre
+
+fix 10 all ave/histo 10 10 100 -1 1 20 c_2[2] mode vector file tmp.histo :pre
+
+The "dump local"_dump.html command will output the angle,
+cosine(angle), cosine^2(angle) for every dihedral in the system.  The
+"thermo_style"_thermo_style.html command will print the average of
+those quantities via the "compute reduce"_compute_reduce.html command
+with thermo output.  And the "fix ave/histo"_fix_ave_histo.html
+command will histogram the cosine(angle) values and write them to a
+file.
+
+:line
+
 The local data stored by this command is generated by looping over all
 the atoms owned on a processor and their dihedrals.  A dihedral will
 only be included if all 4 atoms in the dihedral are in the specified
@ -57,12 +105,12 @@ dump 1 all local 1000 tmp.dump index c_1\[1\] c_1\[2\] c_1\[3\] c_1\[4\] c_1\[5\
 [Output info:]

 This compute calculates a local vector or local array depending on the
-number of keywords.  The length of the vector or number of rows in the
-array is the number of dihedrals.  If a single keyword is specified, a
-local vector is produced.  If two or more keywords are specified, a
+number of values.  The length of the vector or number of rows in the
+array is the number of dihedrals.  If a single value is specified, a
+local vector is produced.  If two or more values are specified, a
 local array is produced where the number of columns = the number of
-keywords.  The vector or array can be accessed by any command that
-uses local values from a compute as input.  See the "Howto
+values.  The vector or array can be accessed by any command that uses
+local values from a compute as input.  See the "Howto
 output"_Howto_output.html doc page for an overview of LAMMPS output
 options.

--- a/doc/src/compute_reduce.txt
+++ b/doc/src/compute_reduce.txt
@ -97,9 +97,9 @@ equivalent, since the "compute stress/atom"_compute_stress_atom.html
 command creates a per-atom array with 6 columns:

 compute myPress all stress/atom NULL
-compute 2 all reduce min myPress\[*\]
-compute 2 all reduce min myPress\[1\] myPress\[2\] myPress\[3\] &
-                         myPress\[4\] myPress\[5\] myPress\[6\] :pre
+compute 2 all reduce min c_myPress\[*\]
+compute 2 all reduce min c_myPress\[1\] c_myPress\[2\] c_myPress\[3\] &
+                         c_myPress\[4\] c_myPress\[5\] c_myPress\[6\] :pre

 :line

--- a/doc/src/compute_reduce_chunk.txt
+++ b/doc/src/compute_reduce_chunk.txt
@ -0,0 +1,177 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Commands_all.html)
+
+:line
+
+compute reduce/chunk command :h3
+
+[Syntax:]
+
+compute ID group-ID reduce/chunk chunkID mode input1 input2 ... :pre
+
+ID, group-ID are documented in "compute"_compute.html command :ulb,l
+reduce/chunk = style name of this compute command :l
+chunkID = ID of "compute chunk/atom"_compute_chunk_atom.html command :l
+mode = {sum} or {min} or {max} :l
+one or more inputs can be listed :l
+input = c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_ID :l
+  c_ID = per-atom vector calculated by a compute with ID
+  c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID, I can include wildcard (see below)
+  f_ID = per-atom vector calculated by a fix with ID
+  f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID, I can include wildcard (see below)
+  v_name = per-atom vector calculated by an atom-style variable with name :pre
+:ule
+
+[Examples:]
+
+compute 1 all reduce/chunk/atom mychunk min c_cluster :pre
+
+[Description:]
+
+Define a calculation that reduces one or more per-atom vectors into
+per-chunk values.  This can be useful for diagnostic output.  Or when
+used in conjunction with the "compute
+chunk/spread/atom"_compute_chunk_spread_atom.html command it can be
+used ot create per-atom values that induce a new set of chunks with a
+second "compute chunk/atom"_compute_chunk_atom.html command.  An
+example is given below.
+
+In LAMMPS, chunks are collections of atoms defined by a "compute
+chunk/atom"_compute_chunk_atom.html command, which assigns each atom
+to a single chunk (or no chunk).  The ID for this command is specified
+as chunkID.  For example, a single chunk could be the atoms in a
+molecule or atoms in a spatial bin.  See the "compute
+chunk/atom"_compute_chunk_atom.html and "Howto chunk"_Howto_chunk.html
+doc pages for details of how chunks can be defined and examples of how
+they can be used to measure properties of a system.
+
+For each atom, this compute accesses its chunk ID from the specified
+{chunkID} compute.  The per-atom value from an input contributes
+to a per-chunk value corresponding the the chunk ID.
+
+The reduction operation is specified by the {mode} setting and is
+performed over all the per-atom values from the atoms in each chunk.
+The {sum} option adds the pre-atom values to a per-chunk total.  The
+{min} or {max} options find the minimum or maximum value of the
+per-atom values for each chunk.
+
+Note that only atoms in the specified group contribute to the
+reduction operation.  If the {chunkID} compute returns a 0 for the
+chunk ID of an atom (i.e. the atom is not in a chunk defined by the
+"compute chunk/atom"_compute_chunk_atom.html command), that atom will
+also not contribute to the reduction operation.  An input that is a
+compute or fix may define its own group which affects the quantities
+it returns.  For example, a compute with return a zero value for atoms
+that are not in the group specified for that compute.
+
+Each listed input is operated on independently.  Each input can be the
+result of a "compute"_compute.html or "fix"_fix.html or the evaluation
+of an atom-style "variable"_variable.html.
+
+Note that for values from a compute or fix, the bracketed index I can
+be specified using a wildcard asterisk with the index to effectively
+specify multiple values.  This takes the form "*" or "*n" or "n*" or
+"m*n".  If N = the size of the vector (for {mode} = scalar) or the
+number of columns in the array (for {mode} = vector), then an asterisk
+with no numeric values means all indices from 1 to N.  A leading
+asterisk means all indices from 1 to n (inclusive).  A trailing
+asterisk means all indices from n to N (inclusive).  A middle asterisk
+means all indices from m to n (inclusive).
+
+Using a wildcard is the same as if the individual columns of the array
+had been listed one by one.  E.g. these 2 compute reduce/chunk
+commands are equivalent, since the "compute
+property/chunk"_compute_property_chunk.html command creates a per-atom
+array with 3 columns:
+
+compute prop all property/atom vx vy vz
+compute 10 all reduce/chunk mychunk max c_prop\[*\]
+compute 10 all reduce/chunk mychunk max c_prop\[1\] c_prop\[2\] c_prop\[3\] :pre
+
+:line
+
+Here is an example of using this compute, in conjunction with the
+compute chunk/spread/atom command to identify self-assembled micelles.
+The commands below can be added to the examples/in.micelle script.
+
+Imagine a collection of polymer chains or small molecules with
+hydrophobic end groups.  All the hydrophobic (HP) atoms are assigned
+to a group called "phobic".
+
+These commands will assign a unique cluster ID to all HP atoms within
+a specified distance of each other.  A cluster will contain all HP
+atoms in a single molecule, but also the HP atoms in nearby molecules,
+e.g. molecules that have clumped to form a micelle due to the
+attraction induced by the hydrophobicity.  The output of the
+chunk/reduce command will be a cluster ID per chunk (molecule).
+Molecules with the same cluster ID are in the same micelle.
+
+group phobic type 4     # specific to in.micelle model
+compute cluster phobic cluster/atom 2.0
+compute cmol all chunk/atom molecule
+compute reduce phobic reduce/chunk cmol min c_cluster :pre
+
+This per-chunk info could be output in at least two ways:
+
+fix 10 all ave/time 1000 1 1000 c_reduce file tmp.phobic mode vector :pre
+
+compute spread all chunk/spread/atom cmol c_reduce
+dump 1 all custom 1000 tmp.dump id type mol x y z c_cluster c_spread
+dump_modify 1 sort id :pre
+
+In the first case, each snapshot in the tmp.phobic file will contain
+one line per molecule.  Molecules with the same value are in the same
+micelle.  In the second case each dump snapshot contains all atoms,
+each with a final field with the cluster ID of the micelle that the HP
+atoms of that atom's molecule belong to.
+
+The result from compute chunk/spread/atom can be used to define a new
+set of chunks, where all the atoms in all the molecules in the same
+micelle are assigned to the same chunk, i.e. one chunk per micelle.
+
+compute micelle all chunk/atom c_spread compress yes :pre
+
+Further analysis on a per-micelle basis can now be performed using any
+of the per-chunk computes listed on the "Howto chunk"_Howto_chunk.html
+doc page.  E.g. count the number of atoms in each micelle, calculate
+its center or mass, shape (moments of intertia), radius of gyration,
+etc.
+
+compute prop all property/chunk micelle count
+fix 20 all ave/time 1000 1 1000 c_prop file tmp.micelle mode vector :pre
+
+Each snapshot in the tmp.micelle file will have one line per micelle
+with its count of atoms, plus a first line for a chunk with all the
+solvent atoms.  By the time 50000 steps have elapsed there are a
+handful of large micelles.
+
+:line
+
+[Output info:]
+
+This compute calculates a global vector if a single input value is
+specified, otherwise a global array is output.  The number of columns
+in the array is the number of inputs provided.  The length of the
+vector or the number of vector elements or array rows = the number of
+chunks {Nchunk} as calculated by the specified "compute
+chunk/atom"_compute_chunk_atom.html command.  The vector or array can
+be accessed by any command that uses global values from a compute as
+input.  See the "Howto output"_Howto_output.html doc page for an
+overview of LAMMPS output options.
+
+The per-atom values for the vector or each column of the array will be
+in whatever "units"_units.html the corresponding input value is in.
+The vector or array values are "intensive".
+
+[Restrictions:] none
+
+[Related commands:]
+
+"compute chunk/atom"_compute_chunk_atom.html, "compute
+reduce"_compute_reduce.html, "compute
+chunk/spread/atom"_compute_chunk_spread_atom.html
+
+[Default:] none
--- a/doc/src/compute_stress_mop.txt
+++ b/doc/src/compute_stress_mop.txt
@ -0,0 +1,111 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+compute stress/mop command :h3
+compute stress/mop/profile command :h3
+
+
+[Syntax:]
+
+compute ID group-ID style dir args keywords ... :pre
+
+ID, group-ID are documented in "compute"_compute.html command
+style = {stress/mop} or {stress/mop/profile}
+dir = {x} or {y} or {z} is the direction normal to the plane
+args = argument specific to the compute style
+keywords = {kin} or {conf} or {total} (one of more can be specified) :ul
+  {stress/mop} args = pos
+    pos = {lower} or {center} or {upper} or coordinate value (distance units) is the position of the plane
+  {stress/mop/profile} args = origin delta
+    origin = {lower} or {center} or {upper} or coordinate value (distance units) is the position of the first plane
+    delta = value (distance units) is the distance between planes :pre
+
+compute 1 all stress/mop x lower total
+compute 1 liquid stress/mop z 0.0 kin conf
+fix 1 all ave/time 10 1000 10000 c_1\[*\] file mop.time
+fix 1 all ave/time 10 1000 10000 c_1\[2\] file mop.time :pre
+
+compute 1 all stress/mop/profile x lower 0.1 total
+compute 1 liquid stress/mop/profile z 0.0 0.25 kin conf
+fix 1 all ave/time 500 20 10000 c_1\[*\] ave running overwrite file mopp.time mode vector :pre
+
+
+[Description:]
+
+Compute {stress/mop} and compute {stress/mop/profile} define computations that
+calculate components of the local stress tensor using the method of
+planes "(Todd)"_#mop-todd.  Specifically in compute {stress/mop} calculates 3
+components are computed in directions {dir},{x}; {dir},{y}; and
+{dir},{z}; where {dir} is the direction normal to the plane, while
+in compute {stress/mop/profile} the profile of the stress is computed.
+
+Contrary to methods based on histograms of atomic stress (i.e. using
+"compute stress/atom"_compute_stress_atom.html), the method of planes is
+compatible with mechanical balance in heterogeneous systems and at
+interfaces "(Todd)"_#mop-todd.
+
+The stress tensor is the sum of a kinetic term and a configurational
+term, which are given respectively by Eq. (21) and Eq. (16) in
+"(Todd)"_#mop-todd. For the kinetic part, the algorithm considers that
+atoms have crossed the plane if their positions at times t-dt and t are
+one on either side of the plane, and uses the velocity at time t-dt/2
+given by the velocity-Verlet algorithm.
+
+Between one and three keywords can be used to indicate which
+contributions to the stress must be computed: kinetic stress (kin),
+configurational stress (conf), and/or total stress (total).
+
+NOTE 1: The configurational stress is computed considering all pairs of atoms where at least one atom belongs to group group-ID. 
+
+NOTE 2: The local stress does not include any Lennard-Jones tail
+corrections to the pressure added by the "pair_modify tail
+yes"_pair_modify.html command, since those are contributions to the global system pressure.
+
+[Output info:]
+
+Compute {stress/mop} calculates a global vector (indices starting at 1), with 3
+values for each declared keyword (in the order the keywords have been
+declared). For each keyword, the stress tensor components are ordered as
+follows: stress_dir,x, stress_dir,y, and stress_dir,z.
+
+Compute {stress/mop/profile} instead calculates a global array, with 1 column
+giving the position of the planes where the stress tensor was computed,
+and with 3 columns of values for each declared keyword (in the order the
+keywords have been declared). For each keyword, the profiles of stress
+tensor components are ordered as follows: stress_dir,x; stress_dir,y;
+and stress_dir,z.
+
+The values are in pressure "units"_units.html. 
+
+The values produced by this compute can be accessed by various "output commands"_Howto_output.html. For instance, the results can be written to a file using the "fix ave/time"_fix_ave_time.html command. Please see the example in the examples/USER/mop folder.
+
+[Restrictions:] 
+
+These styles are part of the USER-MISC package. They are only enabled if
+LAMMPS is built with that package. See the "Build package"_Build_package.html
+doc page on for more info.
+
+The method is only implemented for 3d orthogonal simulation boxes whose
+size does not change in time, and axis-aligned planes.
+
+The method only works with two-body pair interactions, because it
+requires the class method pair->single() to be implemented. In
+particular, it does not work with more than two-body pair interactions,
+intra-molecular interactions, and long range (kspace) interactions.
+
+[Related commands:]
+
+"compute stress/atom"_compute_stress_atom.html
+
+[Default:] none
+
+:line
+
+:link(mop-todd)
+[(Todd)] B. D. Todd, Denis J. Evans, and Peter J. Daivis: "Pressure tensor for inhomogeneous fluids", 
+Phys. Rev. E 52, 1627 (1995).
--- a/doc/src/computes.txt
+++ b/doc/src/computes.txt
@ -15,6 +15,7 @@ Computes :h1
   compute_bond_local
   compute_centro_atom
   compute_chunk_atom
+   compute_chunk_spread_atom
   compute_cluster_atom
   compute_cna_atom
   compute_cnp_atom
@ -72,6 +73,7 @@ Computes :h1
   compute_property_local
   compute_rdf
   compute_reduce
+   compute_reduce_chunk
   compute_rigid_local
   compute_saed
   compute_slice
@ -98,6 +100,7 @@ Computes :h1
   compute_sna_atom
   compute_spin
   compute_stress_atom
+   compute_stress_mop
   compute_tally
   compute_tdpd_cc_atom
   compute_temp
--- a/doc/src/fix_box_relax.txt
+++ b/doc/src/fix_box_relax.txt
@ -221,8 +221,8 @@ This equation only applies when the box dimensions are equal to those
 of the reference dimensions. If this is not the case, then the
 converged stress tensor will not equal that specified by the user.  We
 can resolve this problem by periodically resetting the reference
-dimensions. The keyword {nreset_ref} controls how often this is done.
-If this keyword is not used, or is given a value of zero, then the
+dimensions. The keyword {nreset} controls how often this is done.  If
+this keyword is not used, or is given a value of zero, then the
 reference dimensions are set to those of the initial simulation domain
 and are never changed. A value of {nstep} means that every {nstep}
 minimization steps, the reference dimensions are set to those of the
--- a/doc/src/fix_client_md.txt
+++ b/doc/src/fix_client_md.txt
@ -50,7 +50,7 @@ md"_server_md.html doc page.

 Note that when using LAMMPS as an MD client, your LAMMPS input script
 should not normally contain force field commands, like a
-"pair_style"_doc/pair_style.html, "bond_style"_doc/bond_style.html, or
+"pair_style"_pair_style.html, "bond_style"_bond_style.html, or
 "kspace_style"_kspace_style.html commmand.  However it is possible for
 a server code to only compute a portion of the full force-field, while
 LAMMPS computes the remaining part.  Your LAMMPS script can also
--- a/doc/src/fix_nvt_sllod.txt
+++ b/doc/src/fix_nvt_sllod.txt
@ -63,6 +63,11 @@ implemented in LAMMPS, they are coupled to a Nose/Hoover chain
 thermostat in a velocity Verlet formulation, closely following the
 implementation used for the "fix nvt"_fix_nh.html command.

+NOTE: A recent (2017) book by "(Daivis and Todd)"_#Daivis-sllod
+discusses use of the SLLOD method and non-equilibrium MD (NEMD)
+thermostatting generally, for both simple and complex fluids,
+e.g. molecular systems.  The latter can be tricky to do correctly.
+
 Additional parameters affecting the thermostat are specified by
 keywords and values documented with the "fix nvt"_fix_nh.html
 command.  See, for example, discussion of the {temp} and {drag}
@ -177,3 +182,7 @@ Same as "fix nvt"_fix_nh.html, except tchain = 1.

 :link(Daivis)
 [(Daivis and Todd)] Daivis and Todd, J Chem Phys, 124, 194103 (2006).
+
+:link(Daivis-sllod)
+[(Daivis and Todd)] Daivis and Todd, Nonequilibrium Molecular Dyanmics (book),
+Cambridge University Press, https://doi.org/10.1017/9781139017848, (2017).
--- a/doc/src/fix_shake.txt
+++ b/doc/src/fix_shake.txt
@ -214,8 +214,10 @@ which can lead to poor energy conservation.  You can test for this in
 your system by running a constant NVE simulation with a particular set
 of SHAKE parameters and monitoring the energy versus time.

-SHAKE or RATTLE should not be used to constrain an angle at 180 degrees
-(e.g. linear CO2 molecule).  This causes numeric difficulties.
+SHAKE or RATTLE should not be used to constrain an angle at 180
+degrees (e.g. linear CO2 molecule).  This causes numeric difficulties.
+You can use "fix rigid or fix rigid/small"_fix_rigid.html instead to
+make a linear molecule rigid.

 [Related commands:] none

--- a/doc/src/fixes.txt
+++ b/doc/src/fixes.txt
@ -26,6 +26,7 @@ Fixes :h1
   fix_bond_swap
   fix_bond_react
   fix_box_relax
+   fix_client_md
   fix_cmap
   fix_colvars
   fix_controller
--- a/doc/src/kspace_modify.txt
+++ b/doc/src/kspace_modify.txt
@ -133,7 +133,7 @@ the code will stop with an error message. When this option is set to
 For a typical application, using the automatic parameter generation
 will provide simulations that are either inaccurate or slow. Using this
 option is thus not recommended. For guidelines on how to obtain good
-parameters, see the "How-To"_Section_howto.html#howto_24 discussion.
+parameters, see the "How-To"_Howto_dispersion.html discussion.

 :line

--- a/doc/src/kspace_style.txt
+++ b/doc/src/kspace_style.txt
@ -383,8 +383,8 @@ dimensions.  The only exception is if the slab option is set with
 must be periodic and the z dimension must be non-periodic.

 The scafacos KSpace style will only be enabled if LAMMPS is built with
-the USER-SCAFACOS package.  See the "Making
-LAMMPS"_Section_start.html#start_3 section for more info.
+the USER-SCAFACOS package.  See the "Build package"_Build_package.html
+doc page for more info.

 The use of ScaFaCos in LAMMPS does not yet support molecular charged
 systems where the short-range Coulombic interactions between atoms in
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@ -67,6 +67,7 @@ Howto_multiple.html
 Howto_replica.html
 Howto_library.html
 Howto_couple.html
+Howto_client_server.html
 Howto_output.html
 Howto_chunk.html
 Howto_2d.html
@ -411,6 +412,7 @@ compute_bond.html
 compute_bond_local.html
 compute_centro_atom.html
 compute_chunk_atom.html
+compute_chunk_spread_atom.html
 compute_cluster_atom.html
 compute_cna_atom.html
 compute_cnp_atom.html
@ -468,6 +470,7 @@ compute_property_chunk.html
 compute_property_local.html
 compute_rdf.html
 compute_reduce.html
+compute_reduce_chunk.html
 compute_rigid_local.html
 compute_saed.html
 compute_slice.html
@ -494,6 +497,7 @@ compute_smd_vol.html
 compute_sna_atom.html
 compute_spin.html
 compute_stress_atom.html
+compute_stress_mop.html
 compute_tally.html
 compute_tdpd_cc_atom.html
 compute_temp.html
--- a/doc/src/minimize.txt
+++ b/doc/src/minimize.txt
@ -216,10 +216,10 @@ The "fix box/relax"_fix_box_relax.html command can be used to apply an
 external pressure to the simulation box and allow it to shrink/expand
 during the minimization.

-Only a few other fixes (typically those that apply force constraints)
-are invoked during minimization.  See the doc pages for individual
-"fix"_fix.html commands to see which ones are relevant.  Current
-examples of fixes that can be used include:
+Only a few other fixes (typically those that add forces) are invoked
+during minimization.  See the doc pages for individual "fix"_fix.html
+commands to see which ones are relevant.  Current examples of fixes
+that can be used include:

 "fix addforce"_fix_addforce.html
 "fix addtorque"_fix_addtorque.html
@ -242,6 +242,11 @@ you MUST enable the "fix_modify"_fix_modify.html {energy} option for
 that fix.  The doc pages for individual "fix"_fix.html commands
 specify if this should be done.

+NOTE: The minimizers in LAMMPS do not allow for bonds (or angles, etc)
+to be held fixed while atom coordinates are being relaxed, e.g. via
+"fix shake"_fix_shake.html or "fix rigid"_fix_rigid.html.  See more
+info in the Restrictions section below.
+
 :line

 [Restrictions:]
--- a/doc/src/server_mc.txt
+++ b/doc/src/server_mc.txt
@ -63,7 +63,7 @@ See the src/MESSAGE/server_mc.cpp file for details on how LAMMPS uses
 these messages.  See the examples/COUPLE/lammmps_mc/mc.cpp file for an
 example of how an MC driver code can use these messages.

-Let NATOMS=1, EINIT=2, DISPLACE=3, ACCEPT=4, RUN=5.
+Define NATOMS=1, EINIT=2, DISPLACE=3, ACCEPT=4, RUN=5.

 [Client sends one of these kinds of message]:

@ -93,9 +93,9 @@ cs->pack(2,3*natoms,x)    # 2nd field = 3N coords of Natoms :pre
 cs->send(DISPLACE,1)      # msgID = 3 with 1 field
 cs->pack_double(1,poteng) # 1st field = new potential energy of system :pre

-cs->send(ACCEPT,0)      # msgID = 4 with no fields
+cs->send(ACCEPT,0)      # msgID = 4 with no fields :pre

-cs->send(RUN,0)         # msgID = 5 with no fields
+cs->send(RUN,0)         # msgID = 5 with no fields :pre

 :line

--- a/doc/src/server_md.txt
+++ b/doc/src/server_md.txt
@ -51,9 +51,9 @@ can complete the timestep.  This command could also be used with a
 client code that performs energy minimization, using the server to
 compute forces and energy each iteration of its minimizer.

-When using the "fix client/md" command, LAMMPS (as the client code)
-does the timestepping and receives needed energy, forces, and pressure
-values from the server code.
+When using the "fix client/md"_fix_client_md.html command, LAMMPS (as
+the client code) does the timestepping and receives needed energy,
+forces, and pressure values from the server code.

 The format and content of the exchanged messages are explained here in
 a conceptual sense.  Python-style pseudo code for the library calls to
@ -69,9 +69,11 @@ a quantum code (VASP) can use use these messages.

 The following pseudo-code uses these values, defined as enums.

-enum{SETUP=1,STEP};
-enum{DIM=1,PERIODICITY,ORIGIN,BOX,NATOMS,NTYPES,TYPES,COORDS,UNITS,CHARGE};
-enum{FORCES=1,ENERGY,PRESSURE,ERROR}; :pre
+Define:
+
+SETUP=1, STEP=2
+DIM=1, PERIODICITY=2, ORIGIN=3, BOX=4, NATOMS=5, NTYPES=6, TYPES=7, COORDS=8, UNITS-9, CHARGE=10
+FORCES=1, ENERGY=2, PRESSURE=3, ERROR=4 :pre

 [Client sends 2 kinds of messages]:

@ -98,7 +100,7 @@ cs->send(STEP,nfields)         # msgID with nfields :pre

 cs->pack(COORDS,3*natoms,x)    # vector of 3N atom coords
 cs->pack(ORIGIN,3,origin)      # lower-left corner of simulation box
-cs->pack(BOX,9,box)            # 3 edge vectors of simulation box
+cs->pack(BOX,9,box)            # 3 edge vectors of simulation box :pre

 [Server replies to either kind of message]:

--- a/examples/USER/misc/mop/data.mop
+++ b/examples/USER/misc/mop/data.mop
--- a/examples/USER/misc/mop/in.compute_stress_mop
+++ b/examples/USER/misc/mop/in.compute_stress_mop
@ -0,0 +1,40 @@
+variable T equal 0.8
+variable p_solid equal 0.05
+
+read_data data.mop
+
+pair_style lj/cut 2.5
+pair_coeff * * 1.0 1.0
+pair_coeff 1 2 0.5 1.0 
+pair_coeff 2 2 0.0 0.0
+neigh_modify delay 0
+
+group liquid type 1
+group solid type 2
+region bottom block INF INF INF INF INF 7.0
+group bottom region bottom
+group solid_bottom intersect solid bottom
+group solid_up subtract solid solid_bottom
+
+variable faSolid equal ${p_solid}*lx*ly/count(solid_up)
+fix piston_up solid_up aveforce NULL NULL -${faSolid} 
+fix freeze_up solid_up setforce 0.0 0.0 NULL 
+fix freeze_bottom solid_bottom setforce 0.0 0.0 0.0
+fix nvesol solid nve
+compute Tliq liquid temp
+fix nvtliq liquid nvt temp $T $T 0.5
+fix_modify nvtliq temp Tliq
+
+thermo 1000
+thermo_modify flush yes temp Tliq
+
+fix fxbal all balance 1000 1.05 shift z 10 1.05
+
+compute mopz0 all stress/mop z center kin conf
+fix mopz0t all ave/time 1 1 1 c_mopz0[*] file mopz0.time
+
+compute moppz liquid stress/mop/profile z 0.0 0.1 kin conf
+fix moppzt all ave/time 1 1 1 c_moppz[*] ave running overwrite file moppz.time mode vector
+
+run 0
+
--- a/examples/USER/misc/mop/log.5Sep18.compute_stress_mop.g++.1
+++ b/examples/USER/misc/mop/log.5Sep18.compute_stress_mop.g++.1
@ -0,0 +1,111 @@
+LAMMPS (5 Sep 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+variable T equal 0.8
+variable p_solid equal 0.05
+
+read_data data.mop
+  orthogonal box = (0 0 -2) to (9.52441 9.52441 16)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  1224 atoms
+  reading velocities ...
+  1224 velocities
+
+pair_style lj/cut 2.5
+pair_coeff * * 1.0 1.0
+pair_coeff 1 2 0.5 1.0
+pair_coeff 2 2 0.0 0.0
+neigh_modify delay 0
+
+group liquid type 1
+792 atoms in group liquid
+group solid type 2
+432 atoms in group solid
+region bottom block INF INF INF INF INF 7.0
+group bottom region bottom
+630 atoms in group bottom
+group solid_bottom intersect solid bottom
+216 atoms in group solid_bottom
+group solid_up subtract solid solid_bottom
+216 atoms in group solid_up
+
+variable faSolid equal ${p_solid}*lx*ly/count(solid_up)
+variable faSolid equal 0.05*lx*ly/count(solid_up)
+fix piston_up solid_up aveforce NULL NULL -${faSolid}
+fix piston_up solid_up aveforce NULL NULL -0.0209986841649146
+fix freeze_up solid_up setforce 0.0 0.0 NULL
+fix freeze_bottom solid_bottom setforce 0.0 0.0 0.0
+fix nvesol solid nve
+compute Tliq liquid temp
+fix nvtliq liquid nvt temp $T $T 0.5
+fix nvtliq liquid nvt temp 0.8 $T 0.5
+fix nvtliq liquid nvt temp 0.8 0.8 0.5
+fix_modify nvtliq temp Tliq
+WARNING: Temperature for fix modify is not for group all (src/fix_nh.cpp:1404)
+
+thermo 1000
+thermo_modify flush yes temp Tliq
+WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:488)
+
+fix fxbal all balance 1000 1.05 shift z 10 1.05
+
+compute mopz0 all stress/mop z center kin conf
+fix mopz0t all ave/time 1 1 1 c_mopz0[*] file mopz0.time
+
+compute moppz liquid stress/mop/profile z 0.0 0.1 kin conf
+fix moppzt all ave/time 1 1 1 c_moppz[*] ave running overwrite file moppz.time mode vector
+
+run 0
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4, bins = 7 7 13
+  3 neighbor lists, perpetual/occasional/extra = 1 2 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute stress/mop, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+  (3) compute stress/mop/profile, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 3.596 | 3.596 | 3.596 Mbytes
+Step Temp E_pair E_mol TotEng Press Volume 
+       0   0.82011245   -3.0642111            0   -2.2692246   0.16906107    1632.8577 
+Loop time of 1.19209e-06 on 1 procs for 0 steps with 1224 atoms
+
+167.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 1.192e-06  |            |       |100.00
+
+Nlocal:    1224 ave 1224 max 1224 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    2975 ave 2975 max 2975 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    40241 ave 40241 max 40241 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 40241
+Ave neighs/atom = 32.8766
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/USER/misc/mop/log.5Sep18.compute_stress_mop.g++.4
+++ b/examples/USER/misc/mop/log.5Sep18.compute_stress_mop.g++.4
@ -0,0 +1,111 @@
+LAMMPS (5 Sep 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+variable T equal 0.8
+variable p_solid equal 0.05
+
+read_data data.mop
+  orthogonal box = (0 0 -2) to (9.52441 9.52441 16)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1224 atoms
+  reading velocities ...
+  1224 velocities
+
+pair_style lj/cut 2.5
+pair_coeff * * 1.0 1.0
+pair_coeff 1 2 0.5 1.0
+pair_coeff 2 2 0.0 0.0
+neigh_modify delay 0
+
+group liquid type 1
+792 atoms in group liquid
+group solid type 2
+432 atoms in group solid
+region bottom block INF INF INF INF INF 7.0
+group bottom region bottom
+630 atoms in group bottom
+group solid_bottom intersect solid bottom
+216 atoms in group solid_bottom
+group solid_up subtract solid solid_bottom
+216 atoms in group solid_up
+
+variable faSolid equal ${p_solid}*lx*ly/count(solid_up)
+variable faSolid equal 0.05*lx*ly/count(solid_up)
+fix piston_up solid_up aveforce NULL NULL -${faSolid}
+fix piston_up solid_up aveforce NULL NULL -0.0209986841649146
+fix freeze_up solid_up setforce 0.0 0.0 NULL
+fix freeze_bottom solid_bottom setforce 0.0 0.0 0.0
+fix nvesol solid nve
+compute Tliq liquid temp
+fix nvtliq liquid nvt temp $T $T 0.5
+fix nvtliq liquid nvt temp 0.8 $T 0.5
+fix nvtliq liquid nvt temp 0.8 0.8 0.5
+fix_modify nvtliq temp Tliq
+WARNING: Temperature for fix modify is not for group all (src/fix_nh.cpp:1404)
+
+thermo 1000
+thermo_modify flush yes temp Tliq
+WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:488)
+
+fix fxbal all balance 1000 1.05 shift z 10 1.05
+
+compute mopz0 all stress/mop z center kin conf
+fix mopz0t all ave/time 1 1 1 c_mopz0[*] file mopz0.time
+
+compute moppz liquid stress/mop/profile z 0.0 0.1 kin conf
+fix moppzt all ave/time 1 1 1 c_moppz[*] ave running overwrite file moppz.time mode vector
+
+run 0
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4, bins = 7 7 13
+  3 neighbor lists, perpetual/occasional/extra = 1 2 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute stress/mop, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+  (3) compute stress/mop/profile, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 3.509 | 3.51 | 3.511 Mbytes
+Step Temp E_pair E_mol TotEng Press Volume 
+       0   0.82011245   -3.0642111            0   -2.2692246   0.16906107    1632.8577 
+Loop time of 4.06504e-05 on 4 procs for 0 steps with 1224 atoms
+
+65.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 4.065e-05  |            |       |100.00
+
+Nlocal:    306 ave 320 max 295 min
+Histogram: 1 1 0 0 0 0 1 0 0 1
+Nghost:    1450.25 ave 1485 max 1422 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Neighs:    10060.2 ave 10866 max 9507 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+
+Total # of neighbors = 40241
+Ave neighs/atom = 32.8766
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/USER/misc/mop/moppz.time.reference
+++ b/examples/USER/misc/mop/moppz.time.reference
@ -0,0 +1,185 @@
+# Time-averaged data for fix moppzt
+# TimeStep Number-of-rows
+# Row c_moppz[1] c_moppz[2] c_moppz[3] c_moppz[4] c_moppz[5] c_moppz[6] c_moppz[7]
+0 181
+1 -2 0 0 0 0 0 0
+2 -1.9 0 0 0 0 0 0
+3 -1.8 0 0 0 0 0 0
+4 -1.7 0 0 0 0 0 0
+5 -1.6 0 0 0 0 0 0
+6 -1.5 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+7 -1.4 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+8 -1.3 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+9 -1.2 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+10 -1.1 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+11 -1 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+12 -0.9 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+13 -0.8 0 0 0 -9.81273e-05 0.000228605 -0.00421138
+14 -0.7 0 0 0 -0.000370675 -0.00240125 -0.26848
+15 -0.6 0 0 0 -0.000370675 -0.00240125 -0.26848
+16 -0.5 0 0 0 -0.000370675 -0.00240125 -0.26848
+17 -0.4 0 0 0 -0.000370675 -0.00240125 -0.26848
+18 -0.3 0 0 0 -0.000370675 -0.00240125 -0.26848
+19 -0.2 0 0 0 -0.000370675 -0.00240125 -0.26848
+20 -0.1 0 0 0 -0.000370675 -0.00240125 -0.26848
+21 0 0 0 0 -0.000370675 -0.00240125 -0.26848
+22 0.1 0 0 0 0.190761 -0.491728 0.287704
+23 0.2 0 0 0 0.190761 -0.491728 0.287704
+24 0.3 0 0 0 0.190761 -0.491728 0.287704
+25 0.4 0 0 0 0.190761 -0.491728 0.287704
+26 0.5 0 0 0 0.190761 -0.491728 0.287704
+27 0.6 0 0 0 0.190761 -0.491728 0.287704
+28 0.7 0 0 0 0.190761 -0.491728 0.287704
+29 0.8 0 0 0 -0.181602 -0.198457 -0.0964774
+30 0.9 0 0 0 -0.15138 0.183353 0.206848
+31 1 0 0 0 0.174362 1.27701 0.600545
+32 1.1 0 0 0 0.160987 0.563442 0.494994
+33 1.2 0 0 0 0.218876 0.59796 0.398527
+34 1.3 0 0 0 0.187614 0.558909 0.372353
+35 1.4 0 0 0 0.118586 0.410013 0.331945
+36 1.5 0 0 0 -0.0514208 0.40381 0.128097
+37 1.6 3.08628 0.241189 5.90817 -0.198262 0.324128 -0.0449302
+38 1.7 0 0 0 -0.104542 0.256677 -0.332854
+39 1.8 0.222123 2.43524 1.10089 -0.324638 -0.168682 -1.06238
+40 1.9 0 0 0 -0.175732 -0.186846 -0.163062
+41 2 0 0 0 -0.137995 0.0920401 -0.260106
+42 2.1 -0.179621 -2.59775 1.80077 -0.480624 -0.0439511 -0.0824913
+43 2.2 0 0 0 -0.499868 -0.0106185 -0.108924
+44 2.3 0 0 0 -0.703301 0.124555 -0.0880158
+45 2.4 0 0 0 -0.581211 -0.244281 -0.250071
+46 2.5 1.05274 -2.86043 3.36339 -0.575104 -0.148715 -0.249092
+47 2.6 0 0 0 0.66061 -0.157649 -0.357141
+48 2.7 0 0 0 0.299971 -0.302298 -0.572714
+49 2.8 0 0 0 0.33107 -0.201699 -0.470466
+50 2.9 0 0 0 0.822686 1.08427 -0.390511
+51 3 0 0 0 0.716428 0.750998 -0.698174
+52 3.1 0.805189 0.571878 4.31938 0.121891 0.922727 -0.932582
+53 3.2 0 0 0 0.0442642 1.02537 -1.03066
+54 3.3 2.54289 -1.93701 4.88355 0.0731321 1.09091 -0.83075
+55 3.4 0 0 0 0.426589 0.821174 -0.765855
+56 3.5 0 0 0 0.445135 0.299996 -1.48972
+57 3.6 0 0 0 0.362916 -1.28673 -0.853897
+58 3.7 0.952867 -1.07044 1.04141 0.12517 -1.00353 -0.785272
+59 3.8 0.617661 0.991499 1.80973 -0.182369 -1.04057 -1.00435
+60 3.9 0.60295 -2.41888 3.98011 0.0347345 -1.01302 -0.88314
+61 4 -2.97421 -2.01531 2.98586 0.43463 -0.465643 -0.801128
+62 4.1 -3.23318 -3.31281 0.956525 0.732752 0.140718 -1.10583
+63 4.2 0 0 0 0.969872 0.298566 -0.823464
+64 4.3 0 0 0 0.7707 0.557002 -0.836549
+65 4.4 0 0 0 0.395828 0.66755 -1.53454
+66 4.5 0 0 0 0.104451 0.46777 -1.32358
+67 4.6 0 0 0 0.402084 0.464983 -1.22051
+68 4.7 0 0 0 0.352808 0.0794986 -1.31292
+69 4.8 0 0 0 0.0215512 0.284343 -0.975326
+70 4.9 0 0 0 -0.133637 0.250925 -1.33918
+71 5 0 0 0 -0.066208 0.104514 -1.27412
+72 5.1 0 0 0 -0.184391 0.479805 -1.15139
+73 5.2 0 0 0 -0.200251 0.527142 -1.34307
+74 5.3 0 0 0 0.043532 -0.0788824 -0.998406
+75 5.4 0 0 0 -0.531846 0.126289 -1.05818
+76 5.5 0 0 0 -0.259593 0.0818463 -1.58939
+77 5.6 0 0 0 -0.373828 -0.343977 -1.50908
+78 5.7 -0.294161 -1.07567 3.46536 -0.0644873 -0.424333 -1.28548
+79 5.8 0 0 0 -0.293233 -0.201133 -1.19085
+80 5.9 0.961568 -1.44949 2.42101 -0.632816 -0.0669315 -0.85119
+81 6 0 0 0 -0.0559892 -0.0194478 -1.04541
+82 6.1 0 0 0 -0.339753 0.286693 -1.24366
+83 6.2 0 0 0 -0.376208 0.444053 -1.7662
+84 6.3 0 0 0 -0.718923 0.555398 -1.93862
+85 6.4 0 0 0 -1.10631 0.263525 -1.79723
+86 6.5 0 0 0 -0.217948 -0.0489491 -2.07833
+87 6.6 0 0 0 -0.376248 -0.0588682 -2.45322
+88 6.7 -2.12742 4.22609 2.36568 -0.236703 -0.279582 -1.56434
+89 6.8 0.869072 -0.141389 3.92123 0.0540986 -0.00271606 -0.930143
+90 6.9 0 0 0 1.08829 -1.11737 -0.808187
+91 7 1.62633 1.08234 0.844097 1.18575 -0.408792 -0.752394
+92 7.1 0 0 0 1.03324 -0.470631 -0.486767
+93 7.2 0 0 0 0.950164 -0.112451 -0.479409
+94 7.3 -2.66121 -0.326607 7.83093 0.359 -0.482493 0.154384
+95 7.4 0 0 0 0.359089 -1.12337 0.409711
+96 7.5 -1.88971 1.34806 3.56893 0.394677 -1.0109 0.548348
+97 7.6 -1.34494 -0.896214 2.06959 0.231398 -0.728529 0.313513
+98 7.7 0 0 0 0.415681 -0.45268 0.507181
+99 7.8 0 0 0 0.259423 -0.11638 0.464208
+100 7.9 -1.97572 -1.20836 3.95731 0.252257 -0.0845701 -0.249345
+101 8 0 0 0 0.0688154 0.290386 -0.462467
+102 8.1 0.25925 -0.458269 3.33086 0.360399 -0.0409494 -0.656911
+103 8.2 0 0 0 -0.0587033 0.347698 -0.340604
+104 8.3 0 0 0 -0.377192 0.153096 -0.914654
+105 8.4 0 0 0 -0.431553 0.274996 -0.946252
+106 8.5 0 0 0 -0.898366 0.146653 -1.36383
+107 8.6 0 0 0 -0.889593 0.385951 0.125116
+108 8.7 0 0 0 -0.0139171 -0.162302 -0.0287854
+109 8.8 0 0 0 -0.266284 -0.148945 0.393533
+110 8.9 0 0 0 -0.00920376 -0.0770818 0.334642
+111 9 0 0 0 -0.0949156 0.0113352 -0.0761263
+112 9.1 0 0 0 0.0688045 0.104558 -0.101891
+113 9.2 3.79773 0.0255401 3.75032 0.419832 0.295402 0.652533
+114 9.3 0 0 0 0.594267 0.70396 0.836434
+115 9.4 0 0 0 0.174722 1.00483 1.42787
+116 9.5 0 0 0 0.0626835 0.518952 0.269158
+117 9.6 0 0 0 -0.302859 -0.265212 -0.0145578
+118 9.7 0 0 0 -0.114026 -0.201336 -0.539522
+119 9.8 0 0 0 0.104008 -0.30236 -0.0789062
+120 9.9 0 0 0 -0.0482778 -0.553118 0.45214
+121 10 0 0 0 -0.0554938 -0.402692 0.141112
+122 10.1 0 0 0 0.174338 0.556958 -0.0922154
+123 10.2 0 0 0 -1.06045 0.541565 -0.0409312
+124 10.3 0 0 0 -1.20782 0.464574 -0.413871
+125 10.4 0 0 0 -0.891701 0.327653 -0.286438
+126 10.5 0 0 0 0.231227 -0.064277 -0.89684
+127 10.6 -1.27989 -4.87365 9.40433 0.211278 0.230826 -1.23536
+128 10.7 -2.1001 -0.417817 1.17745 0.425856 0.078728 -1.44229
+129 10.8 0 0 0 0.30965 0.450884 -1.74985
+130 10.9 0 0 0 0.36735 0.990032 -1.19971
+131 11 0.253834 -1.84303 3.91828 1.01826 0.0660896 -0.481086
+132 11.1 0 0 0 0.744006 0.0906555 -0.897417
+133 11.2 0 0 0 0.339073 0.361038 -0.545084
+134 11.3 -1.9974 -0.431998 3.46296 0.611295 0.17282 0.0341483
+135 11.4 0 0 0 -0.491432 -0.958871 1.28001
+136 11.5 0 0 0 0.0431048 -1.50924 1.24037
+137 11.6 0 0 0 -0.684419 -0.0163951 1.06179
+138 11.7 0 0 0 -0.425278 -0.127741 0.757298
+139 11.8 -2.09164 0.00894897 2.22812 -0.0955178 -0.310572 0.661289
+140 11.9 0 0 0 0.156959 -0.233409 0.802568
+141 12 0 0 0 -0.05541 -0.346448 0.541571
+142 12.1 0 0 0 0.706767 0.182767 0.25767
+143 12.2 0 0 0 0.4791 0.464612 -0.212887
+144 12.3 0 0 0 0.81454 0.440323 -0.461359
+145 12.4 0 0 0 -0.110025 0.200698 -0.996706
+146 12.5 0 0 0 -0.149791 0.165599 -1.02233
+147 12.6 0 0 0 -0.170933 0.0644682 -0.866174
+148 12.7 0 0 0 -0.122869 -0.0196287 -0.801348
+149 12.8 0 0 0 -0.0693832 -0.0673091 -0.382802
+150 12.9 0 0 0 -0.0693832 -0.0673091 -0.382802
+151 13 0 0 0 -0.0693832 -0.0673091 -0.382802
+152 13.1 0 0 0 -0.0693832 -0.0673091 -0.382802
+153 13.2 0 0 0 -0.0693832 -0.0673091 -0.382802
+154 13.3 0 0 0 -0.0693832 -0.0673091 -0.382802
+155 13.4 0 0 0 -0.0693832 -0.0673091 -0.382802
+156 13.5 0 0 0 -0.000502433 0.000137492 -0.227425
+157 13.6 0 0 0 -0.000502433 0.000137492 -0.227425
+158 13.7 0 0 0 -0.000502433 0.000137492 -0.227425
+159 13.8 0 0 0 -0.000502433 0.000137492 -0.227425
+160 13.9 0 0 0 -0.000502433 0.000137492 -0.227425
+161 14 0 0 0 -0.000502433 0.000137492 -0.227425
+162 14.1 0 0 0 -0.000502433 0.000137492 -0.227425
+163 14.2 0 0 0 -0.000502433 0.000137492 -0.227425
+164 14.3 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+165 14.4 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+166 14.5 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+167 14.6 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+168 14.7 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+169 14.8 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+170 14.9 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+171 15 0 0 0 5.79042e-05 4.68687e-05 -0.00286094
+172 15.1 0 0 0 0 0 0
+173 15.2 0 0 0 0 0 0
+174 15.3 0 0 0 0 0 0
+175 15.4 0 0 0 0 0 0
+176 15.5 0 0 0 0 0 0
+177 15.6 0 0 0 0 0 0
+178 15.7 0 0 0 0 0 0
+179 15.8 0 0 0 0 0 0
+180 15.9 0 0 0 0 0 0
+181 16 0 0 0 0 0 0
--- a/examples/USER/misc/mop/mopz0.time.reference
+++ b/examples/USER/misc/mop/mopz0.time.reference
@ -0,0 +1,3 @@
+# Time-averaged data for fix mopz0t
+# TimeStep c_mopz0[1] c_mopz0[2] c_mopz0[3] c_mopz0[4] c_mopz0[5] c_mopz0[6]
+0 1.62633 1.08234 0.844097 1.18575 -0.408792 -0.752394
--- a/src/.gitignore
+++ b/src/.gitignore
@ -302,6 +302,10 @@
 /compute_rigid_local.h
 /compute_spec_atom.cpp
 /compute_spec_atom.h
+/compute_stress_mop.cpp
+/compute_stress_mop.h
+/compute_stress_mop.profile.cpp
+/compute_stress_mop.profile.h
 /compute_stress_tally.cpp
 /compute_stress_tally.h
 /compute_temp_asphere.cpp
--- a/src/GPU/pair_eam_alloy_gpu.cpp
+++ b/src/GPU/pair_eam_alloy_gpu.cpp
@ -364,7 +364,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
    fptr = fopen(filename,"r");
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/GPU/pair_eam_fs_gpu.cpp
+++ b/src/GPU/pair_eam_fs_gpu.cpp
@ -364,7 +364,7 @@ void PairEAMFSGPU::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/GRANULAR/fix_wall_gran_region.cpp
+++ b/src/GRANULAR/fix_wall_gran_region.cpp
@ -113,16 +113,16 @@ void FixWallGranRegion::init()
      strcmp(region_style,region->style) != 0 ||
      nregion != region->nregion) {
    char str[256];
-    sprintf(str,"Region properties for region %s changed between runs, "
-            "resetting its motion",idregion);
+    snprintf(str,256,"Region properties for region %s changed between runs, "
+             "resetting its motion",idregion);
    error->warning(FLERR,str);
    region->reset_vel();
  }

  if (motion_resetflag){
    char str[256];
-    sprintf(str,"Region properties for region %s are inconsistent "
-            "with restart file, resetting its motion",idregion);
+    snprintf(str,256,"Region properties for region %s are inconsistent "
+             "with restart file, resetting its motion",idregion);
    error->warning(FLERR,str);
    region->reset_vel();
  }
--- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
@ -247,6 +247,13 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
  k_t.template modify<LMPHostType>();
  k_t.template sync<DeviceType>();

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+
+  if (need_dup)
+    dup_o = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated> (d_o); // allocate duplicated memory
+  else
+    ndup_o = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated> (d_o);
+
  // 1st cg solve over b_s, s
  cg_solve1();

@ -262,6 +269,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)

  if (!allocated_flag)
    allocated_flag = 1;
+
+  // free duplicated memory
+  if (need_dup)
+    dup_o = decltype(dup_o)();
 }

 /* ---------------------------------------------------------------------- */
@ -480,10 +491,12 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
    if (neighflag == HALF) {
      FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this);
      Kokkos::parallel_for(inum,sparse13_functor);
-    } else {
+    } else if (neighflag == HALFTHREAD) {
      FixQEqReaxKokkosSparse13Functor<DeviceType,HALFTHREAD> sparse13_functor(this);
      Kokkos::parallel_for(inum,sparse13_functor);
    }
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_o, dup_o);
  } else {
    Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this);
  }
@ -531,18 +544,21 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
    Kokkos::parallel_for(inum,sparse22_functor);
    if (neighflag != FULL) {
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
+      if (need_dup)
+        dup_o.reset_except(d_o);
      if (neighflag == HALF) {
        FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
        Kokkos::parallel_for(inum,sparse23_functor);
-      } else {
+      } else if (neighflag == HALFTHREAD) {
        FixQEqReaxKokkosSparse23Functor<DeviceType,HALFTHREAD> sparse23_functor(this);
        Kokkos::parallel_for(inum,sparse23_functor);
      }
+      if (need_dup)
+        Kokkos::Experimental::contribute(d_o, dup_o);
    } else {
      Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
    }

-
    if (neighflag != FULL) {
      k_o.template modify<DeviceType>();
      k_o.template sync<LMPHostType>();
@ -607,13 +623,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
  Kokkos::parallel_for(inum,sparse32_functor);
  if (neighflag != FULL) {
    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
+    if (need_dup)
+      dup_o.reset_except(d_o);
    if (neighflag == HALF) {
      FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this);
      Kokkos::parallel_for(inum,sparse33_functor);
-    } else {
+    } else if (neighflag == HALFTHREAD) {
      FixQEqReaxKokkosSparse33Functor<DeviceType,HALFTHREAD> sparse33_functor(this);
      Kokkos::parallel_for(inum,sparse33_functor);
    }
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_o, dup_o);
  } else {
    Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this);
  }
@ -661,13 +681,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
    Kokkos::parallel_for(inum,sparse22_functor);
    if (neighflag != FULL) {
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
+      if (need_dup)
+        dup_o.reset_except(d_o);
      if (neighflag == HALF) {
        FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
        Kokkos::parallel_for(inum,sparse23_functor);
-      } else {
+      } else if (neighflag == HALFTHREAD) {
        FixQEqReaxKokkosSparse23Functor<DeviceType,HALFTHREAD> sparse23_functor(this);
        Kokkos::parallel_for(inum,sparse23_functor);
      }
+      if (need_dup)
+        Kokkos::Experimental::contribute(d_o, dup_o);
    } else {
      Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
    }
@ -779,8 +803,9 @@ template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse13_item(int ii) const
 {
-  // The q array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
+  // The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+  auto v_o = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
+  auto a_o = v_o.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
@ -831,8 +856,9 @@ template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse23_item(int ii) const
 {
-  // The q array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
+  // The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+  auto v_o = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
+  auto a_o = v_o.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
@ -890,8 +916,9 @@ template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse33_item(int ii) const
 {
-  // The q array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
+  // The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+  auto v_o = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
+  auto a_o = v_o.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
--- a/src/KOKKOS/fix_qeq_reax_kokkos.h
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.h
@ -148,6 +148,7 @@ class FixQEqReaxKokkos : public FixQEqReax {
 private:
  int inum;
  int allocated_flag;
+  int need_dup;

  typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
  Kokkos::DualView<params_qeq*,Kokkos::LayoutRight,DeviceType> k_params;
@ -192,6 +193,9 @@ class FixQEqReaxKokkos : public FixQEqReax {
  HAT::t_ffloat_2d h_s_hist, h_t_hist;
  typename AT::t_ffloat_2d_randomread r_s_hist, r_t_hist;

+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename AT::t_ffloat_1d::array_layout, DeviceType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated> dup_o;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename AT::t_ffloat_1d::array_layout, DeviceType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated> ndup_o;
+
  void init_shielding_k();
  void init_hist();
  void allocate_matrix();
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -166,6 +166,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  }
 #endif

+#ifndef KOKKOS_HAVE_SERIAL
+  if (num_threads == 1)
+    error->warning(FLERR,"When using a single thread, the Kokkos Serial backend "
+                         "(i.e. Makefile.kokkos_mpi_only) gives better performance "
+                         "than the OpenMP backend");
+#endif
+
  Kokkos::InitArguments args;
  args.num_threads = num_threads;
  args.num_numa = numa;
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@ -16,6 +16,7 @@

 #include "pointers.h"
 #include "kokkos_type.h"
+#include "pair_kokkos.h"

 namespace LAMMPS_NS {

@ -40,6 +41,18 @@ class KokkosLMP : protected Pointers {
  ~KokkosLMP();
  void accelerator(int, char **);
  int neigh_count(int);
+
+  template<class DeviceType>
+  int need_dup()
+  {
+    int value = 0;
+  
+    if (neighflag == HALFTHREAD)
+      value = NeedDup<HALFTHREAD,DeviceType>::value;
+  
+    return value;
+  }
+
 private:
  static void my_signal_handler(int);
 };
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -20,6 +20,9 @@
 #include <Kokkos_DualView.hpp>
 #include <impl/Kokkos_Timer.hpp>
 #include <Kokkos_Vectorization.hpp>
+#include <Kokkos_ScatterView.hpp>
+
+enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};

 #if defined(KOKKOS_HAVE_CXX11)
 #undef ISFINITE
@ -205,6 +208,100 @@ struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
 };
 #endif

+
+// Determine memory traits for force array
+// Do atomic trait when running HALFTHREAD neighbor list style
+template<int NEIGHFLAG>
+struct AtomicF {
+  enum {value = Kokkos::Unmanaged};
+};
+
+template<>
+struct AtomicF<HALFTHREAD> {
+  enum {value = Kokkos::Atomic|Kokkos::Unmanaged};
+};
+
+
+// Determine memory traits for force array
+// Do atomic trait when running HALFTHREAD neighbor list style with CUDA
+template<int NEIGHFLAG, class DeviceType>
+struct AtomicDup {
+  enum {value = Kokkos::Experimental::ScatterNonAtomic};
+};
+
+#ifdef KOKKOS_ENABLE_CUDA
+template<>
+struct AtomicDup<HALFTHREAD,Kokkos::Cuda> {
+  enum {value = Kokkos::Experimental::ScatterAtomic};
+};
+#endif
+
+#ifdef LMP_KOKKOS_USE_ATOMICS
+
+#ifdef KOKKOS_ENABLE_OPENMP
+template<>
+struct AtomicDup<HALFTHREAD,Kokkos::OpenMP> {
+  enum {value = Kokkos::Experimental::ScatterAtomic};
+};
+#endif
+
+#ifdef KOKKOS_ENABLE_THREADS
+template<>
+struct AtomicDup<HALFTHREAD,Kokkos::Threads> {
+  enum {value = Kokkos::Experimental::ScatterAtomic};
+};
+#endif
+
+#endif
+
+
+// Determine duplication traits for force array
+// Use duplication when running threaded and not using atomics
+template<int NEIGHFLAG, class DeviceType>
+struct NeedDup {
+  enum {value = Kokkos::Experimental::ScatterNonDuplicated};
+};
+
+#ifndef LMP_KOKKOS_USE_ATOMICS
+
+#ifdef KOKKOS_ENABLE_OPENMP
+template<>
+struct NeedDup<HALFTHREAD,Kokkos::OpenMP> {
+  enum {value = Kokkos::Experimental::ScatterDuplicated};
+};
+#endif
+
+#ifdef KOKKOS_ENABLE_THREADS
+template<>
+struct NeedDup<HALFTHREAD,Kokkos::Threads> {
+  enum {value = Kokkos::Experimental::ScatterDuplicated};
+};
+#endif
+
+#endif
+
+template<int value, typename T1, typename T2>
+class ScatterViewHelper {};
+
+template<typename T1, typename T2>
+class ScatterViewHelper<Kokkos::Experimental::ScatterDuplicated,T1,T2> {
+public:
+  KOKKOS_INLINE_FUNCTION
+  static T1 get(const T1 &dup, const T2 &nondup) {
+    return dup;
+  }
+};
+
+template<typename T1, typename T2>
+class ScatterViewHelper<Kokkos::Experimental::ScatterNonDuplicated,T1,T2> {
+public:
+  KOKKOS_INLINE_FUNCTION
+  static T2 get(const T1 &dup, const T2 &nondup) {
+    return nondup;
+  }
+};
+
+
 // define precision
 // handle global precision, force, energy, positions, kspace separately

--- a/src/KOKKOS/neigh_list_kokkos.h
+++ b/src/KOKKOS/neigh_list_kokkos.h
@ -20,8 +20,6 @@

 namespace LAMMPS_NS {

-enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
-
 class AtomNeighbors
 {
 public:
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@ -109,7 +109,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
-  v_rho = k_rho.view<DeviceType>();
  type = atomKK->k_type.view<DeviceType>();
  tag = atomKK->k_tag.view<DeviceType>();
  nlocal = atom->nlocal;
@ -122,6 +121,19 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_ilist = k_list->d_ilist;
  int inum = list->inum;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho);
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho);
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  // zero out density
@ -233,6 +245,9 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    }
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
@ -244,11 +259,15 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -256,6 +275,14 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_rho   = decltype(dup_rho)();
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ----------------------------------------------------------------------
@ -503,8 +530,10 @@ void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelA<NEIGHFLAG
  // rho = density at each atom
  // loop over neighbors of my atoms

-  // The rho array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > rho = v_rho;
+  // The rho array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_rho = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho);
+  auto a_rho = v_rho.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -672,8 +701,10 @@ template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -780,18 +811,22 @@ void PairEAMAlloyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
  const int EFLAG = eflag;
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (EFLAG) {
    if (eflag_atom) {
      const E_FLOAT epairhalf = 0.5 * epair;
      if (NEIGHFLAG!=FULL) {
-        if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
-        if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+        if (NEWTON_PAIR || i < nlocal) a_eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < nlocal) a_eatom[j] += epairhalf;
      } else {
-        v_eatom[i] += epairhalf;
+        a_eatom[i] += epairhalf;
      }
    }
  }
@ -835,28 +870,28 @@ void PairEAMAlloyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
    if (vflag_atom) {
      if (NEIGHFLAG!=FULL) {
        if (NEWTON_PAIR || i < nlocal) {
-          v_vatom(i,0) += 0.5*v0;
-          v_vatom(i,1) += 0.5*v1;
-          v_vatom(i,2) += 0.5*v2;
-          v_vatom(i,3) += 0.5*v3;
-          v_vatom(i,4) += 0.5*v4;
-          v_vatom(i,5) += 0.5*v5;
+          a_vatom(i,0) += 0.5*v0;
+          a_vatom(i,1) += 0.5*v1;
+          a_vatom(i,2) += 0.5*v2;
+          a_vatom(i,3) += 0.5*v3;
+          a_vatom(i,4) += 0.5*v4;
+          a_vatom(i,5) += 0.5*v5;
        }
        if (NEWTON_PAIR || j < nlocal) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
        }
      } else {
-        v_vatom(i,0) += 0.5*v0;
-        v_vatom(i,1) += 0.5*v1;
-        v_vatom(i,2) += 0.5*v2;
-        v_vatom(i,3) += 0.5*v3;
-        v_vatom(i,4) += 0.5*v4;
-        v_vatom(i,5) += 0.5*v5;
+        a_vatom(i,0) += 0.5*v0;
+        a_vatom(i,1) += 0.5*v1;
+        a_vatom(i,2) += 0.5*v2;
+        a_vatom(i,3) += 0.5*v3;
+        a_vatom(i,4) += 0.5*v4;
+        a_vatom(i,5) += 0.5*v5;
      }
    }
  }
@ -957,7 +992,7 @@ void PairEAMAlloyKokkos<DeviceType>::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
@ -1165,4 +1200,3 @@ template class PairEAMAlloyKokkos<LMPDeviceType>;
 template class PairEAMAlloyKokkos<LMPHostType>;
 #endif
 }
-
--- a/src/KOKKOS/pair_eam_alloy_kokkos.h
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.h
@ -129,10 +129,19 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  DAT::tdual_ffloat_1d k_rho;
  DAT::tdual_ffloat_1d k_fp;
  typename AT::t_ffloat_1d d_rho;
-  typename AT::t_ffloat_1d v_rho;
  typename AT::t_ffloat_1d d_fp;
  HAT::t_ffloat_1d h_rho;
  HAT::t_ffloat_1d h_fp;
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@ -109,7 +109,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
-  v_rho = k_rho.view<DeviceType>();
  type = atomKK->k_type.view<DeviceType>();
  tag = atomKK->k_tag.view<DeviceType>();
  nlocal = atom->nlocal;
@ -122,6 +121,19 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_ilist = k_list->d_ilist;
  int inum = list->inum;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho);
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho);
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  // zero out density
@ -233,6 +245,9 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    }
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
@ -246,16 +261,28 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_rho   = decltype(dup_rho)();
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ----------------------------------------------------------------------
@ -503,8 +530,10 @@ void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelA<NEIGHFLAG,NEWTO
  // rho = density at each atom
  // loop over neighbors of my atoms

-  // The rho array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > rho = v_rho;
+  // The rho array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_rho = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho);
+  auto a_rho = v_rho.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -672,8 +701,10 @@ template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -780,18 +811,22 @@ void PairEAMFSKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int
  const int EFLAG = eflag;
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (EFLAG) {
    if (eflag_atom) {
      const E_FLOAT epairhalf = 0.5 * epair;
      if (NEIGHFLAG!=FULL) {
-        if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
-        if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+        if (NEWTON_PAIR || i < nlocal) a_eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < nlocal) a_eatom[j] += epairhalf;
      } else {
-        v_eatom[i] += epairhalf;
+        a_eatom[i] += epairhalf;
      }
    }
  }
@ -835,28 +870,28 @@ void PairEAMFSKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int
    if (vflag_atom) {
      if (NEIGHFLAG!=FULL) {
        if (NEWTON_PAIR || i < nlocal) {
-          v_vatom(i,0) += 0.5*v0;
-          v_vatom(i,1) += 0.5*v1;
-          v_vatom(i,2) += 0.5*v2;
-          v_vatom(i,3) += 0.5*v3;
-          v_vatom(i,4) += 0.5*v4;
-          v_vatom(i,5) += 0.5*v5;
+          a_vatom(i,0) += 0.5*v0;
+          a_vatom(i,1) += 0.5*v1;
+          a_vatom(i,2) += 0.5*v2;
+          a_vatom(i,3) += 0.5*v3;
+          a_vatom(i,4) += 0.5*v4;
+          a_vatom(i,5) += 0.5*v5;
        }
        if (NEWTON_PAIR || j < nlocal) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
        }
      } else {
-        v_vatom(i,0) += 0.5*v0;
-        v_vatom(i,1) += 0.5*v1;
-        v_vatom(i,2) += 0.5*v2;
-        v_vatom(i,3) += 0.5*v3;
-        v_vatom(i,4) += 0.5*v4;
-        v_vatom(i,5) += 0.5*v5;
+        a_vatom(i,0) += 0.5*v0;
+        a_vatom(i,1) += 0.5*v1;
+        a_vatom(i,2) += 0.5*v2;
+        a_vatom(i,3) += 0.5*v3;
+        a_vatom(i,4) += 0.5*v4;
+        a_vatom(i,5) += 0.5*v5;
      }
    }
  }
@ -957,7 +992,7 @@ void PairEAMFSKokkos<DeviceType>::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
@ -1174,4 +1209,3 @@ template class PairEAMFSKokkos<LMPDeviceType>;
 template class PairEAMFSKokkos<LMPHostType>;
 #endif
 }
-
--- a/src/KOKKOS/pair_eam_fs_kokkos.h
+++ b/src/KOKKOS/pair_eam_fs_kokkos.h
@ -129,10 +129,19 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  DAT::tdual_ffloat_1d k_rho;
  DAT::tdual_ffloat_1d k_fp;
  typename AT::t_ffloat_1d d_rho;
-  typename AT::t_ffloat_1d v_rho;
  typename AT::t_ffloat_1d d_fp;
  HAT::t_ffloat_1d h_rho;
  HAT::t_ffloat_1d h_fp;
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@ -34,7 +34,6 @@

 using namespace LAMMPS_NS;

-
 /* ---------------------------------------------------------------------- */

 template<class DeviceType>
@ -104,7 +103,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
-  v_rho = k_rho.view<DeviceType>();
  type = atomKK->k_type.view<DeviceType>();
  tag = atomKK->k_tag.view<DeviceType>();
  nlocal = atom->nlocal;
@ -117,6 +115,19 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_ilist = k_list->d_ilist;
  int inum = list->inum;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho);
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho);
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  // zero out density
@ -228,6 +239,9 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    }
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
@ -241,16 +255,28 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_rho   = decltype(dup_rho)();
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ----------------------------------------------------------------------
@ -503,8 +529,10 @@ void PairEAMKokkos<DeviceType>::operator()(TagPairEAMKernelA<NEIGHFLAG,NEWTON_PA
  // rho = density at each atom
  // loop over neighbors of my atoms

-  // The rho array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*, typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > rho = v_rho;
+  // The rho array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_rho = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho);
+  auto a_rho = v_rho.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -538,13 +566,13 @@ void PairEAMKokkos<DeviceType>::operator()(TagPairEAMKernelA<NEIGHFLAG,NEWTON_PA
                  d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
      if (NEWTON_PAIR || j < nlocal) {
        const int d_type2rhor_ij = d_type2rhor(itype,jtype);
-        rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
-                    d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
+        a_rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
+                      d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
      }
    }

  }
-  rho[i] += rhotmp;
+  a_rho[i] += rhotmp;
 }

 /* ---------------------------------------------------------------------- */
@ -670,8 +698,10 @@ template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairEAMKokkos<DeviceType>::operator()(TagPairEAMKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -778,18 +808,22 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
  const int EFLAG = eflag;
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (EFLAG) {
    if (eflag_atom) {
      const E_FLOAT epairhalf = 0.5 * epair;
      if (NEIGHFLAG!=FULL) {
-        if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
-        if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+        if (NEWTON_PAIR || i < nlocal) a_eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < nlocal) a_eatom[j] += epairhalf;
      } else {
-        v_eatom[i] += epairhalf;
+        a_eatom[i] += epairhalf;
      }
    }
  }
@ -833,28 +867,28 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
    if (vflag_atom) {
      if (NEIGHFLAG!=FULL) {
        if (NEWTON_PAIR || i < nlocal) {
-          v_vatom(i,0) += 0.5*v0;
-          v_vatom(i,1) += 0.5*v1;
-          v_vatom(i,2) += 0.5*v2;
-          v_vatom(i,3) += 0.5*v3;
-          v_vatom(i,4) += 0.5*v4;
-          v_vatom(i,5) += 0.5*v5;
+          a_vatom(i,0) += 0.5*v0;
+          a_vatom(i,1) += 0.5*v1;
+          a_vatom(i,2) += 0.5*v2;
+          a_vatom(i,3) += 0.5*v3;
+          a_vatom(i,4) += 0.5*v4;
+          a_vatom(i,5) += 0.5*v5;
        }
        if (NEWTON_PAIR || j < nlocal) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
        }
      } else {
-        v_vatom(i,0) += 0.5*v0;
-        v_vatom(i,1) += 0.5*v1;
-        v_vatom(i,2) += 0.5*v2;
-        v_vatom(i,3) += 0.5*v3;
-        v_vatom(i,4) += 0.5*v4;
-        v_vatom(i,5) += 0.5*v5;
+        a_vatom(i,0) += 0.5*v0;
+        a_vatom(i,1) += 0.5*v1;
+        a_vatom(i,2) += 0.5*v2;
+        a_vatom(i,3) += 0.5*v3;
+        a_vatom(i,4) += 0.5*v4;
+        a_vatom(i,5) += 0.5*v5;
      }
    }
  }
--- a/src/KOKKOS/pair_eam_kokkos.h
+++ b/src/KOKKOS/pair_eam_kokkos.h
@ -126,10 +126,19 @@ class PairEAMKokkos : public PairEAM, public KokkosBase {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  DAT::tdual_ffloat_1d k_rho;
  DAT::tdual_ffloat_1d k_fp;
  typename AT::t_ffloat_1d d_rho;
-  typename AT::t_ffloat_1d v_rho;
  typename AT::t_ffloat_1d d_fp;
  HAT::t_ffloat_1d h_rho;
  HAT::t_ffloat_1d h_fp;
--- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp
+++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp
@ -1725,7 +1725,7 @@ void PairExp6rxKokkos<DeviceType>::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open exp6/rx potential file %s",file);
+      snprintf(str,128,"Cannot open exp6/rx potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@ -23,6 +23,7 @@
 #include "neighbor_kokkos.h"
 #include "neigh_list_kokkos.h"
 #include "Kokkos_Vectorization.hpp"
+#include "Kokkos_ScatterView.hpp"

 namespace LAMMPS_NS {

@ -47,45 +48,48 @@ struct DoCoul<1> {
  typedef CoulTag type;
 };

-// Determine memory traits for force array
-// Do atomic trait when running HALFTHREAD neighbor list style
-template<int NEIGHFLAG>
-struct AtomicF {
-  enum {value = Kokkos::Unmanaged};
-};
-
-template<>
-struct AtomicF<HALFTHREAD> {
-  enum {value = Kokkos::Atomic|Kokkos::Unmanaged};
-};

 //Specialisation for Neighborlist types Half, HalfThread, Full
 template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
 struct PairComputeFunctor  {
  typedef typename PairStyle::device_type device_type ;
+  typedef ArrayTypes<device_type> AT;

  // Reduction type, contains evdwl, ecoul and virial[6]
  typedef EV_FLOAT value_type;

  // The copy of the pair style
  PairStyle c;
+  typename AT::t_f_array f;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;

  // The force array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
-               device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
+  //Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
+  //             device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,device_type,Kokkos::Experimental::ScatterSum,NeedDup<NEIGHFLAG,device_type>::value > dup_f;

  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,
-               device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > eatom;
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,
-               device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > vatom;
+  //Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,
+  //             device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > eatom;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,device_type,Kokkos::Experimental::ScatterSum,NeedDup<NEIGHFLAG,device_type>::value > dup_eatom;
+
+  //Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,
+  //             device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,device_type,Kokkos::Experimental::ScatterSum,NeedDup<NEIGHFLAG,device_type>::value > dup_vatom;
+
+

  NeighListKokkos<device_type> list;

  PairComputeFunctor(PairStyle* c_ptr,
                          NeighListKokkos<device_type>* list_ptr):
-  c(*c_ptr),f(c.f),eatom(c.d_eatom),
-  vatom(c.d_vatom),list(*list_ptr) {};
+  c(*c_ptr),list(*list_ptr) {
+    // allocate duplicated memory
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.d_vatom);
+  };

  // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
  ~PairComputeFunctor() {c.cleanup_copy();list.copymode = 1;};
@ -94,12 +98,25 @@ struct PairComputeFunctor  {
    return j >> SBBITS & 3;
  }

+  void contribute() {
+    Kokkos::Experimental::contribute(c.f, dup_f);
+
+    if (c.eflag_atom)
+      Kokkos::Experimental::contribute(c.d_eatom, dup_eatom);
+
+    if (c.vflag_atom)
+      Kokkos::Experimental::contribute(c.d_vatom, dup_vatom);
+  }
+
  // Loop over neighbors of one atom without coulomb interaction
  // This function is called in parallel
  template<int EVFLAG, int NEWTON_PAIR>
  KOKKOS_FUNCTION
  EV_FLOAT compute_item(const int& ii,
                        const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
+
+    auto a_f = dup_f.template access<AtomicDup<NEIGHFLAG,device_type>::value>();
+
    EV_FLOAT ev;
    const int i = list.d_ilist[ii];
    const X_FLOAT xtmp = c.x(i,0);
@ -133,9 +150,9 @@ struct PairComputeFunctor  {
        fztmp += delz*fpair;

        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
-          f(j,0) -= delx*fpair;
-          f(j,1) -= dely*fpair;
-          f(j,2) -= delz*fpair;
+          a_f(j,0) -= delx*fpair;
+          a_f(j,1) -= dely*fpair;
+          a_f(j,2) -= delz*fpair;
        }

        if (EVFLAG) {
@ -151,9 +168,9 @@ struct PairComputeFunctor  {

    }

-    f(i,0) += fxtmp;
-    f(i,1) += fytmp;
-    f(i,2) += fztmp;
+    a_f(i,0) += fxtmp;
+    a_f(i,1) += fytmp;
+    a_f(i,2) += fztmp;

    return ev;
  }
@ -164,6 +181,9 @@ struct PairComputeFunctor  {
  KOKKOS_FUNCTION
  EV_FLOAT compute_item(const int& ii,
                        const NeighListKokkos<device_type> &list, const CoulTag& ) const {
+
+    auto a_f = dup_f.template access<AtomicDup<NEIGHFLAG,device_type>::value>();
+
    EV_FLOAT ev;
    const int i = list.d_ilist[ii];
    const X_FLOAT xtmp = c.x(i,0);
@ -204,9 +224,9 @@ struct PairComputeFunctor  {
        fztmp += delz*fpair;

        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
-          f(j,0) -= delx*fpair;
-          f(j,1) -= dely*fpair;
-          f(j,2) -= delz*fpair;
+          a_f(j,0) -= delx*fpair;
+          a_f(j,1) -= dely*fpair;
+          a_f(j,2) -= delz*fpair;
        }

        if (EVFLAG) {
@ -228,9 +248,9 @@ struct PairComputeFunctor  {
      }
    }

-    f(i,0) += fxtmp;
-    f(i,1) += fytmp;
-    f(i,2) += fztmp;
+    a_f(i,0) += fxtmp;
+    a_f(i,1) += fytmp;
+    a_f(i,2) += fztmp;

    return ev;
  }
@ -240,6 +260,9 @@ struct PairComputeFunctor  {
      const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
                  const F_FLOAT &dely, const F_FLOAT &delz) const
  {
+    auto a_eatom = dup_eatom.template access<AtomicDup<NEIGHFLAG,device_type>::value>();
+    auto a_vatom = dup_vatom.template access<AtomicDup<NEIGHFLAG,device_type>::value>();
+
    const int EFLAG = c.eflag;
    const int NEWTON_PAIR = c.newton_pair;
    const int VFLAG = c.vflag_either;
@ -247,8 +270,8 @@ struct PairComputeFunctor  {
    if (EFLAG) {
      if (c.eflag_atom) {
        const E_FLOAT epairhalf = 0.5 * epair;
-        if (NEWTON_PAIR || i < c.nlocal) eatom[i] += epairhalf;
-        if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) eatom[j] += epairhalf;
+        if (NEWTON_PAIR || i < c.nlocal) a_eatom[i] += epairhalf;
+        if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
      }
    }

@ -299,20 +322,20 @@ struct PairComputeFunctor  {

      if (c.vflag_atom) {
        if (NEWTON_PAIR || i < c.nlocal) {
-          vatom(i,0) += 0.5*v0;
-          vatom(i,1) += 0.5*v1;
-          vatom(i,2) += 0.5*v2;
-          vatom(i,3) += 0.5*v3;
-          vatom(i,4) += 0.5*v4;
-          vatom(i,5) += 0.5*v5;
+          a_vatom(i,0) += 0.5*v0;
+          a_vatom(i,1) += 0.5*v1;
+          a_vatom(i,2) += 0.5*v2;
+          a_vatom(i,3) += 0.5*v3;
+          a_vatom(i,4) += 0.5*v4;
+          a_vatom(i,5) += 0.5*v5;
        }
        if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) {
-          vatom(j,0) += 0.5*v0;
-          vatom(j,1) += 0.5*v1;
-          vatom(j,2) += 0.5*v2;
-          vatom(j,3) += 0.5*v3;
-          vatom(j,4) += 0.5*v4;
-          vatom(j,5) += 0.5*v5;
+          a_vatom(j,0) += 0.5*v0;
+          a_vatom(j,1) += 0.5*v1;
+          a_vatom(j,2) += 0.5*v2;
+          a_vatom(j,3) += 0.5*v3;
+          a_vatom(j,4) += 0.5*v4;
+          a_vatom(j,5) += 0.5*v5;
        }
      }
    }
@ -351,6 +374,9 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation>  {
    return j >> SBBITS & 3;
  }

+
+  void contribute() {}
+
  template<int EVFLAG, int NEWTON_PAIR>
  KOKKOS_FUNCTION
  EV_FLOAT compute_item(const int& ii,
@ -489,10 +515,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
    PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
    else                              Kokkos::parallel_for(list->inum,ff);
+    ff.contribute();
  } else {
    PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
    else                              Kokkos::parallel_for(list->inum,ff);
+    ff.contribute();
  }
  return ev;
 }
--- a/src/KOKKOS/pair_reaxc_kokkos.cpp
+++ b/src/KOKKOS/pair_reaxc_kokkos.cpp
@ -708,6 +708,19 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_neighbors = k_list->d_neighbors;
  d_ilist = k_list->d_ilist;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+
+  // allocate duplicated memory
+  if (need_dup) {
+    dup_f            = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom        = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom        = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f            = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom        = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom        = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  if (eflag_global) {
    for (int i = 0; i < 14; i++)
      pvector[i] = 0.0;
@ -777,6 +790,15 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    allocate_array();
  }

+  // allocate duplicated memory
+  if (need_dup) {
+    dup_dDeltap_self = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_dDeltap_self);
+    dup_total_bo     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_total_bo);
+  } else {
+    ndup_dDeltap_self = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_dDeltap_self);
+    ndup_total_bo     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_total_bo);
+  }
+
  // Neighbor lists for bond and hbond

  // try, resize if necessary
@ -799,7 +821,7 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    if (neighflag == HALF)
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this);
    else if (neighflag == HALFTHREAD)
-      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this);
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALFTHREAD> >(0,ignum),*this);
    else //(neighflag == FULL)
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this);

@ -814,14 +836,40 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    if (resize_hb) maxhb++;

    resize = resize_bo || resize_hb;
-    if (resize) allocate_array();
+    if (resize) {
+      allocate_array();
+      if (need_dup) {
+        dup_dDeltap_self = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_dDeltap_self);
+        dup_total_bo     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_total_bo);
+      } else {
+        ndup_dDeltap_self = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_dDeltap_self);
+        ndup_total_bo     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_total_bo);
+      }
+    }
  }

+  // allocate duplicated memory
+  if (need_dup) {
+    dup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_CdDelta);
+    //dup_Cdbo    = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbo);
+    //dup_Cdbopi  = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi);
+    //dup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi2);
+  } else { 
+    ndup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_CdDelta);
+    //ndup_Cdbo    = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbo);
+    //ndup_Cdbopi  = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi);
+    //ndup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi2);
+  }
+
+  // reduction over duplicated memory
+  if (need_dup)
+    Kokkos::Experimental::contribute(d_total_bo, dup_total_bo); // needed in BondOrder1
+
  // Bond order
  if (neighflag == HALF) {
    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
  } else if (neighflag == HALFTHREAD) {
-    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this);
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
  }
  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this);
  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this);
@ -920,9 +968,30 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  pvector[7] = ev.ereax[8];
  ev_all.evdwl += ev.ereax[8];

+  // reduction over duplicated memory
+  if (need_dup) {
+    Kokkos::Experimental::contribute(d_dDeltap_self, dup_dDeltap_self); // needed in ComputeBond2
+    Kokkos::Experimental::contribute(d_CdDelta, dup_CdDelta); // needed in ComputeBond2
+
+    //Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in UpdateBond, but also used in UpdateBond
+    //Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in UpdateBond, but also used in UpdateBond
+    //Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in UpdateBond, but also used in UpdateBond
+    //dup_Cdbo.reset_except(d_Cdbo);
+    //dup_Cdbopi.reset_except(d_Cdbopi);
+    //dup_Cdbopi2.reset_except(d_Cdbopi2);
+  }
+
  // Bond force
  if (neighflag == HALF) {
    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this);
+
+    // reduction over duplicated memory
+    //if (need_dup) {
+    //  Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
+    //  Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
+    //  Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
+    //}
+
    if (evflag)
      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev);
    else
@ -931,6 +1000,14 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    pvector[0] += ev.evdwl;
  } else { //if (neighflag == HALFTHREAD) {
    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this);
+
+    // reduction over duplicated memory
+    //if (need_dup) {
+    //  Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
+    //  Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
+    //  Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
+    //}
+
    if (evflag)
      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev);
    else
@ -939,6 +1016,10 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    pvector[0] += ev.evdwl;
  }

+  // reduction over duplicated memory
+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) {
    eng_vdwl += ev_all.evdwl;
    eng_coul += ev_all.ecoul;
@ -955,11 +1036,15 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -968,6 +1053,19 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    FindBondSpecies();

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f            = decltype(dup_f)();
+    dup_dDeltap_self = decltype(dup_dDeltap_self)();
+    dup_total_bo     = decltype(dup_total_bo)();
+    dup_CdDelta      = decltype(dup_CdDelta)();
+    //dup_Cdbo         = decltype(dup_Cdbo)();
+    //dup_Cdbopi       = decltype(dup_Cdbopi)();
+    //dup_Cdbopi2      = decltype(dup_Cdbopi2)();
+    dup_eatom        = decltype(dup_eatom)();
+    dup_vatom        = decltype(dup_vatom)();
+  }
 }

 /* ---------------------------------------------------------------------- */
@ -1006,8 +1104,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT powr_vdw, powgi_vdw, fn13, dfn13, exp1, exp2, etmp;
  F_FLOAT evdwl, fvdwl;
@ -1165,8 +1265,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTabulatedLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -1366,18 +1468,18 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxZero, const int &n) const {
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroEAtom, const int &i) const {
-  v_eatom(i) = 0.0;
+  d_eatom(i) = 0.0;
 }

 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroVAtom, const int &i) const {
-  v_vatom(i,0) = 0.0;
-  v_vatom(i,1) = 0.0;
-  v_vatom(i,2) = 0.0;
-  v_vatom(i,3) = 0.0;
-  v_vatom(i,4) = 0.0;
-  v_vatom(i,5) = 0.0;
+  d_vatom(i,0) = 0.0;
+  d_vatom(i,1) = 0.0;
+  d_vatom(i,2) = 0.0;
+  d_vatom(i,3) = 0.0;
+  d_vatom(i,4) = 0.0;
+  d_vatom(i,5) = 0.0;
 }

 /* ---------------------------------------------------------------------- */
@ -1547,8 +1649,11 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsHalf<NEIGHFLAG>,
  if (d_resize_bo() || d_resize_hb())
    return;

-  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_dDeltap_self = d_dDeltap_self;
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_total_bo = d_total_bo;
+  auto v_dDeltap_self = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_dDeltap_self),decltype(ndup_dDeltap_self)>::get(dup_dDeltap_self,ndup_dDeltap_self);
+  auto a_dDeltap_self = v_dDeltap_self.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_total_bo = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_total_bo),decltype(ndup_total_bo)>::get(dup_total_bo,ndup_total_bo);
+  auto a_total_bo = v_total_bo.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
@ -2239,10 +2344,8 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeMulti2<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
-  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
-  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi = d_Cdbopi;
-  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi2 = d_Cdbopi2;
+  auto v_CdDelta = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta);
+  auto a_CdDelta = v_CdDelta.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const int itype = type(i);
@ -2393,9 +2496,12 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
+
+  auto v_CdDelta = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta);
+  auto a_CdDelta = v_CdDelta.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const int itype = type(i);
@ -2702,9 +2808,13 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTorsion<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
-  Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_CdDelta = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta);
+  auto a_CdDelta = v_CdDelta.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
+  //auto a_Cdbo = dup_Cdbo.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  // in reaxc_torsion_angles: j = i, k = j, i = k;

@ -3074,7 +3184,8 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeHydrogen<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  int hblist[MAX_BONDS];
  F_FLOAT theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
@ -3224,6 +3335,9 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxUpdateBond<NEIGHFLAG>, cons
  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi = d_Cdbopi;
  Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi2 = d_Cdbopi2;
+  //auto a_Cdbo = dup_Cdbo.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+  //auto a_Cdbopi = dup_Cdbopi.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+  //auto a_Cdbopi2 = dup_Cdbopi2.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  const tagint itag = tag(i);
@ -3270,8 +3384,11 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond1<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
-  Kokkos::View<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_CdDelta = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta);
+  auto a_CdDelta = v_CdDelta.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT delij[3];
  F_FLOAT p_be1, p_be2, De_s, De_p, De_pp, pow_BOs_be2, exp_be12, CEbo, ebond;
@ -3408,7 +3525,8 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond2<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT delij[3], delik[3], deljk[3], tmpvec[3];
  F_FLOAT dBOp_i[3], dBOp_k[3], dln_BOp_pi[3], dln_BOp_pi2[3];
@ -3620,9 +3738,13 @@ void PairReaxCKokkos<DeviceType>::ev_tally(EV_FLOAT_REAX &ev, const int &i, cons
 {
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    const E_FLOAT epairhalf = 0.5 * epair;
@ -3685,10 +3807,13 @@ void PairReaxCKokkos<DeviceType>::e_tally(EV_FLOAT_REAX &ev, const int &i, const
      const F_FLOAT &epair) const
 {

-  // The eatom array is atomic for Half/Thread neighbor style
+  // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+

  if (eflag_atom) {
-    Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
+    auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+    auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
    const E_FLOAT epairhalf = 0.5 * epair;
    a_eatom[i] += epairhalf;
    a_eatom[j] += epairhalf;
@ -3703,8 +3828,9 @@ KOKKOS_INLINE_FUNCTION
 void PairReaxCKokkos<DeviceType>::e_tally_single(EV_FLOAT_REAX &ev, const int &i,
      const F_FLOAT &epair) const
 {
-  // The eatom array is atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
+  // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  a_eatom[i] += epair;
 }
@ -3737,7 +3863,9 @@ void PairReaxCKokkos<DeviceType>::v_tally(EV_FLOAT_REAX &ev, const int &i,
  }

  if (vflag_atom) {
-    Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
+    auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+    auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
    a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
    a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
  }
@ -3752,8 +3880,9 @@ void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons
  F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
 {

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT v[6];

@ -3793,7 +3922,8 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
  const int &l, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *dril, F_FLOAT *drjl, F_FLOAT *drkl) const
 {

-  // The vatom array is atomic for Half/Thread neighbor style
+  // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
  F_FLOAT v[6];

  v[0] = dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0];
@ -3813,7 +3943,9 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
  }

  if (vflag_atom) {
-    Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
+    auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+    auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
    a_vatom(i,0) += 0.25 * v[0]; a_vatom(i,1) += 0.25 * v[1]; a_vatom(i,2) += 0.25 * v[2];
    a_vatom(i,3) += 0.25 * v[3]; a_vatom(i,4) += 0.25 * v[4]; a_vatom(i,5) += 0.25 * v[5];
    a_vatom(j,0) += 0.25 * v[0]; a_vatom(j,1) += 0.25 * v[1]; a_vatom(j,2) += 0.25 * v[2];
@ -3910,13 +4042,13 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag, int)
    maxeatom = atom->nmax;
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
-    v_eatom = k_eatom.view<DeviceType>();
+    d_eatom = k_eatom.view<DeviceType>();
  }
  if (vflag_atom && atom->nmax > maxvatom) {
    maxvatom = atom->nmax;
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
-    v_vatom = k_vatom.view<DeviceType>();
+    d_vatom = k_vatom.view<DeviceType>();
  }

  // zero accumulators
--- a/src/KOKKOS/pair_reaxc_kokkos.h
+++ b/src/KOKKOS/pair_reaxc_kokkos.h
@ -380,11 +380,10 @@ class PairReaxCKokkos : public PairReaxC {
  typename AT::t_tagint_1d_randomread molecule;

  DAT::tdual_efloat_1d k_eatom;
-  typename AT::t_efloat_1d v_eatom;
+  typename AT::t_efloat_1d d_eatom;

  DAT::tdual_virial_array k_vatom;
-  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
-  typename AT::t_virial_array v_vatom;
+  typename AT::t_virial_array d_vatom;
  HAT::t_virial_array h_vatom;

  DAT::tdual_float_1d k_tap;
@ -401,6 +400,28 @@ class PairReaxCKokkos : public PairReaxC {
  typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2;
  typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self;

+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_total_bo;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_CdDelta;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_dDeltap_self;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbo;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbopi;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbopi2;
+
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_total_bo;
+  Kokkos::Experimental::ScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_CdDelta;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_dDeltap_self;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbo;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbopi;
+  Kokkos::Experimental::ScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbopi2;
+
+  int need_dup;
+
  typedef Kokkos::DualView<F_FLOAT**[7],typename DeviceType::array_layout,DeviceType> tdual_ffloat_2d_n7;
  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
--- a/src/KOKKOS/pair_snap_kokkos.h
+++ b/src/KOKKOS/pair_snap_kokkos.h
@ -129,6 +129,12 @@ inline double dist2(double* x,double* y);
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  friend void pair_virial_fdotr_compute<PairSNAPKokkos>(PairSNAPKokkos*);

 };
--- a/src/KOKKOS/pair_snap_kokkos_impl.h
+++ b/src/KOKKOS/pair_snap_kokkos_impl.h
@ -170,6 +170,15 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_ilist = k_list->d_ilist;
  int inum = list->inum;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  /*
  for (int i = 0; i < nlocal; i++) {
    typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i);
@ -232,6 +241,9 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 //if (step%10==0)
 //        printf(" %e %e %e %e %e (%e %e): %e\n",t1,t2,t3,t4,t5,t6,t7,t1+t2+t3+t4+t5);

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
@ -244,18 +256,28 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (vflag_fdotr) pair_virial_fdotr_compute(this);

+
  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }

  atomKK->modified(execution_space,F_MASK);
+
  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f            = decltype(dup_f)();
+    dup_vatom        = decltype(dup_vatom)();
+  }
 }

 /* ----------------------------------------------------------------------
@ -349,8 +371,11 @@ template<class DeviceType>
 template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int ii = team.league_rank();
  const int i = d_ilist[ii];
@ -591,8 +616,10 @@ void PairSNAPKokkos<DeviceType>::v_tally_xyz(EV_FLOAT &ev, const int &i, const i
      const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
      const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
 {
-  // The vatom array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const E_FLOAT v0 = delx*fx;
  const E_FLOAT v1 = dely*fy;
@ -611,18 +638,18 @@ void PairSNAPKokkos<DeviceType>::v_tally_xyz(EV_FLOAT &ev, const int &i, const i
  }

  if (vflag_atom) {
-    v_vatom(i,0) += 0.5*v0;
-    v_vatom(i,1) += 0.5*v1;
-    v_vatom(i,2) += 0.5*v2;
-    v_vatom(i,3) += 0.5*v3;
-    v_vatom(i,4) += 0.5*v4;
-    v_vatom(i,5) += 0.5*v5;
-    v_vatom(j,0) += 0.5*v0;
-    v_vatom(j,1) += 0.5*v1;
-    v_vatom(j,2) += 0.5*v2;
-    v_vatom(j,3) += 0.5*v3;
-    v_vatom(j,4) += 0.5*v4;
-    v_vatom(j,5) += 0.5*v5;
+    a_vatom(i,0) += 0.5*v0;
+    a_vatom(i,1) += 0.5*v1;
+    a_vatom(i,2) += 0.5*v2;
+    a_vatom(i,3) += 0.5*v3;
+    a_vatom(i,4) += 0.5*v4;
+    a_vatom(i,5) += 0.5*v5;
+    a_vatom(j,0) += 0.5*v0;
+    a_vatom(j,1) += 0.5*v1;
+    a_vatom(j,2) += 0.5*v2;
+    a_vatom(j,3) += 0.5*v3;
+    a_vatom(j,4) += 0.5*v4;
+    a_vatom(j,5) += 0.5*v5;
  }
 }

--- a/src/KOKKOS/pair_sw_kokkos.cpp
+++ b/src/KOKKOS/pair_sw_kokkos.cpp
@ -115,6 +115,17 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_numneigh = k_list->d_numneigh;
  d_neighbors = k_list->d_neighbors;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  EV_FLOAT ev;
@ -160,6 +171,9 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    ev_all += ev;
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev_all.evdwl;
  if (vflag_global) {
    virial[0] += ev_all.v[0];
@ -171,11 +185,15 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -183,6 +201,13 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f            = decltype(dup_f)();
+    dup_eatom        = decltype(dup_eatom)();
+    dup_vatom        = decltype(dup_vatom)();
+  }
 }


@ -222,9 +247,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairSWKokkos<DeviceType>::operator()(TagPairSWComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT delr1[3],delr2[3],fj[3],fk[3];
  F_FLOAT evdwl = 0.0;
@ -777,17 +803,19 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
 {
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for half/thread neighbor list
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    const E_FLOAT epairhalf = 0.5 * epair;
-    v_eatom[i] += epairhalf;
+    a_eatom[i] += epairhalf;
    if (NEIGHFLAG != FULL)
-      v_eatom[j] += epairhalf;
+      a_eatom[j] += epairhalf;
  }

  if (VFLAG) {
@ -817,20 +845,20 @@ void PairSWKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j
    }

    if (vflag_atom) {
-      v_vatom(i,0) += 0.5*v0;
-      v_vatom(i,1) += 0.5*v1;
-      v_vatom(i,2) += 0.5*v2;
-      v_vatom(i,3) += 0.5*v3;
-      v_vatom(i,4) += 0.5*v4;
-      v_vatom(i,5) += 0.5*v5;
+      a_vatom(i,0) += 0.5*v0;
+      a_vatom(i,1) += 0.5*v1;
+      a_vatom(i,2) += 0.5*v2;
+      a_vatom(i,3) += 0.5*v3;
+      a_vatom(i,4) += 0.5*v4;
+      a_vatom(i,5) += 0.5*v5;

      if (NEIGHFLAG != FULL) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
      }
    }
  }
@ -853,17 +881,20 @@ void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &

  const int VFLAG = vflag_either;

-// The eatom and vatom arrays are atomic for half/thread neighbor list
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    epairthird = THIRD * (evdwl + ecoul);
-    v_eatom[i] += epairthird;
+    a_eatom[i] += epairthird;
    if (NEIGHFLAG != FULL) {
-      v_eatom[j] += epairthird;
-      v_eatom[k] += epairthird;
+      a_eatom[j] += epairthird;
+      a_eatom[k] += epairthird;
    }
  }

@ -885,18 +916,18 @@ void PairSWKokkos<DeviceType>::ev_tally3(EV_FLOAT &ev, const int &i, const int &
    }

    if (vflag_atom) {
-      v_vatom(i,0) += THIRD*v[0]; v_vatom(i,1) += THIRD*v[1];
-      v_vatom(i,2) += THIRD*v[2]; v_vatom(i,3) += THIRD*v[3];
-      v_vatom(i,4) += THIRD*v[4]; v_vatom(i,5) += THIRD*v[5];
+      a_vatom(i,0) += THIRD*v[0]; a_vatom(i,1) += THIRD*v[1];
+      a_vatom(i,2) += THIRD*v[2]; a_vatom(i,3) += THIRD*v[3];
+      a_vatom(i,4) += THIRD*v[4]; a_vatom(i,5) += THIRD*v[5];

      if (NEIGHFLAG != FULL) {
-        v_vatom(j,0) += THIRD*v[0]; v_vatom(j,1) += THIRD*v[1];
-        v_vatom(j,2) += THIRD*v[2]; v_vatom(j,3) += THIRD*v[3];
-        v_vatom(j,4) += THIRD*v[4]; v_vatom(j,5) += THIRD*v[5];
+        a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1];
+        a_vatom(j,2) += THIRD*v[2]; a_vatom(j,3) += THIRD*v[3];
+        a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];

-        v_vatom(k,0) += THIRD*v[0]; v_vatom(k,1) += THIRD*v[1];
-        v_vatom(k,2) += THIRD*v[2]; v_vatom(k,3) += THIRD*v[3];
-        v_vatom(k,4) += THIRD*v[4]; v_vatom(k,5) += THIRD*v[5];
+        a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1];
+        a_vatom(k,2) += THIRD*v[2]; a_vatom(k,3) += THIRD*v[3];
+        a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
      }
    }
  }
--- a/src/KOKKOS/pair_sw_kokkos.h
+++ b/src/KOKKOS/pair_sw_kokkos.h
@ -134,6 +134,14 @@ class PairSWKokkos : public PairSW {
  typename AT::t_efloat_1d d_eatom;
  typename AT::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  typename AT::t_int_1d_randomread d_type2frho;
  typename AT::t_int_2d_randomread d_type2rhor;
  typename AT::t_int_2d_randomread d_type2z2r;
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@ -128,21 +128,25 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
        ff(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else Kokkos::parallel_for(list->inum,ff);
+      ff.contribute();
    } else if (neighflag == HALFTHREAD) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,S_TableCompute<DeviceType,TABSTYLE> >
        ff(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else Kokkos::parallel_for(list->inum,ff);
+      ff.contribute();
    } else if (neighflag == HALF) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    } else if (neighflag == N2) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,N2,false,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    }
  } else {
    if (neighflag == FULL) {
@ -150,21 +154,25 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    } else if (neighflag == HALFTHREAD) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    } else if (neighflag == HALF) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    } else if (neighflag == N2) {
      PairComputeFunctor<PairTableKokkos<DeviceType>,N2,true,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
+      f.contribute();
    }
  }

--- a/src/KOKKOS/pair_tersoff_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_kokkos.cpp
@ -200,6 +200,17 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_neighbors = k_list->d_neighbors;
  d_ilist = k_list->d_ilist;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  EV_FLOAT ev;
@ -243,6 +254,9 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    ev_all += ev;
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev_all.evdwl;
  if (vflag_global) {
    virial[0] += ev_all.v[0];
@ -254,11 +268,15 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -266,6 +284,13 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ---------------------------------------------------------------------- */
@ -304,8 +329,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (i >= nlocal) return;
@ -1117,14 +1144,18 @@ void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const i
 {
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    const E_FLOAT epairhalf = 0.5 * epair;
-    v_eatom[i] += epairhalf;
-    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+    a_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
  }

  if (VFLAG) {
@ -1154,20 +1185,20 @@ void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const i
    }

    if (vflag_atom) {
-      v_vatom(i,0) += 0.5*v0;
-      v_vatom(i,1) += 0.5*v1;
-      v_vatom(i,2) += 0.5*v2;
-      v_vatom(i,3) += 0.5*v3;
-      v_vatom(i,4) += 0.5*v4;
-      v_vatom(i,5) += 0.5*v5;
+      a_vatom(i,0) += 0.5*v0;
+      a_vatom(i,1) += 0.5*v1;
+      a_vatom(i,2) += 0.5*v2;
+      a_vatom(i,3) += 0.5*v3;
+      a_vatom(i,4) += 0.5*v4;
+      a_vatom(i,5) += 0.5*v5;

      if (NEIGHFLAG != FULL) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
      }
    }
  }
@ -1181,9 +1212,10 @@ KOKKOS_INLINE_FUNCTION
 void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
 {
+  // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT v[6];

@ -1204,13 +1236,13 @@ void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const i
  }

  if (vflag_atom) {
-    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
-    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
+    a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
    if (NEIGHFLAG != FULL) {
-      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
-      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
-      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
-      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+      a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
+      a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
+      a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
+      a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
    }
  }

--- a/src/KOKKOS/pair_tersoff_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_kokkos.h
@ -202,6 +202,14 @@ class PairTersoffKokkos : public PairTersoff {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
--- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
@ -200,6 +200,17 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_neighbors = k_list->d_neighbors;
  d_ilist = k_list->d_ilist;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  EV_FLOAT ev;
@ -243,6 +254,9 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    ev_all += ev;
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev_all.evdwl;
  if (vflag_global) {
    virial[0] += ev_all.v[0];
@ -254,11 +268,15 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -266,6 +284,13 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ---------------------------------------------------------------------- */
@ -304,8 +329,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairTersoffMODKokkos<DeviceType>::operator()(TagPairTersoffMODComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (i >= nlocal) return;
@ -1120,14 +1147,18 @@ void PairTersoffMODKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, cons
 {
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    const E_FLOAT epairhalf = 0.5 * epair;
-    v_eatom[i] += epairhalf;
-    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+    a_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
  }

  if (VFLAG) {
@ -1157,20 +1188,20 @@ void PairTersoffMODKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, cons
    }

    if (vflag_atom) {
-      v_vatom(i,0) += 0.5*v0;
-      v_vatom(i,1) += 0.5*v1;
-      v_vatom(i,2) += 0.5*v2;
-      v_vatom(i,3) += 0.5*v3;
-      v_vatom(i,4) += 0.5*v4;
-      v_vatom(i,5) += 0.5*v5;
+      a_vatom(i,0) += 0.5*v0;
+      a_vatom(i,1) += 0.5*v1;
+      a_vatom(i,2) += 0.5*v2;
+      a_vatom(i,3) += 0.5*v3;
+      a_vatom(i,4) += 0.5*v4;
+      a_vatom(i,5) += 0.5*v5;

      if (NEIGHFLAG != FULL) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
      }
    }
  }
@ -1184,9 +1215,10 @@ KOKKOS_INLINE_FUNCTION
 void PairTersoffMODKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
 {
+  // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT v[6];

@ -1207,13 +1239,13 @@ void PairTersoffMODKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, cons
  }

  if (vflag_atom) {
-    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
-    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
+    a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
    if (NEIGHFLAG != FULL) {
-      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
-      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
-      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
-      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+      a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
+      a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
+      a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
+      a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
    }
  }

--- a/src/KOKKOS/pair_tersoff_mod_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h
@ -202,6 +202,14 @@ class PairTersoffMODKokkos : public PairTersoffMOD {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
--- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
@ -214,6 +214,17 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_neighbors = k_list->d_neighbors;
  d_ilist = k_list->d_ilist;

+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
  copymode = 1;

  EV_FLOAT ev;
@ -257,6 +268,9 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    ev_all += ev;
  }

+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
  if (eflag_global) eng_vdwl += ev_all.evdwl;
  if (vflag_global) {
    virial[0] += ev_all.v[0];
@ -268,11 +282,15 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }

  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }

  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
@ -280,6 +298,13 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  if (vflag_fdotr) pair_virial_fdotr_compute(this);

  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f     = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
 }

 /* ---------------------------------------------------------------------- */
@ -318,8 +343,10 @@ template<int NEIGHFLAG, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {

-  // The f array is atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  const int i = d_ilist[ii];
  if (i >= nlocal) return;
@ -1214,14 +1241,18 @@ void PairTersoffZBLKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, cons
 {
  const int VFLAG = vflag_either;

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
+
+  auto v_eatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
+  auto a_eatom = v_eatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
+
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  if (eflag_atom) {
    const E_FLOAT epairhalf = 0.5 * epair;
-    v_eatom[i] += epairhalf;
-    if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf;
+    a_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
  }

  if (VFLAG) {
@ -1251,20 +1282,20 @@ void PairTersoffZBLKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, cons
    }

    if (vflag_atom) {
-      v_vatom(i,0) += 0.5*v0;
-      v_vatom(i,1) += 0.5*v1;
-      v_vatom(i,2) += 0.5*v2;
-      v_vatom(i,3) += 0.5*v3;
-      v_vatom(i,4) += 0.5*v4;
-      v_vatom(i,5) += 0.5*v5;
+      a_vatom(i,0) += 0.5*v0;
+      a_vatom(i,1) += 0.5*v1;
+      a_vatom(i,2) += 0.5*v2;
+      a_vatom(i,3) += 0.5*v3;
+      a_vatom(i,4) += 0.5*v4;
+      a_vatom(i,5) += 0.5*v5;

      if (NEIGHFLAG != FULL) {
-        v_vatom(j,0) += 0.5*v0;
-        v_vatom(j,1) += 0.5*v1;
-        v_vatom(j,2) += 0.5*v2;
-        v_vatom(j,3) += 0.5*v3;
-        v_vatom(j,4) += 0.5*v4;
-        v_vatom(j,5) += 0.5*v5;
+        a_vatom(j,0) += 0.5*v0;
+        a_vatom(j,1) += 0.5*v1;
+        a_vatom(j,2) += 0.5*v2;
+        a_vatom(j,3) += 0.5*v3;
+        a_vatom(j,4) += 0.5*v4;
+        a_vatom(j,5) += 0.5*v5;
      }
    }
  }
@ -1278,9 +1309,10 @@ KOKKOS_INLINE_FUNCTION
 void PairTersoffZBLKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
        F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
 {
+  // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial

-  // The eatom and vatom arrays are atomic for Half/Thread neighbor style
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
+  auto v_vatom = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
+  auto a_vatom = v_vatom.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();

  F_FLOAT v[6];

@ -1301,13 +1333,13 @@ void PairTersoffZBLKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, cons
  }

  if (vflag_atom) {
-    v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2];
-    v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5];
+    a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
+    a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
    if (NEIGHFLAG != FULL) {
-      v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2];
-      v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5];
-      v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2];
-      v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5];
+      a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
+      a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
+      a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
+      a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
    }
  }

--- a/src/KOKKOS/pair_tersoff_zbl_kokkos.h
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h
@ -207,6 +207,14 @@ class PairTersoffZBLKokkos : public PairTersoffZBL {
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

+  int need_dup;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f;
+  Kokkos::Experimental::ScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom;
+  Kokkos::Experimental::ScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom;
+
  typedef Kokkos::DualView<F_FLOAT**[7],Kokkos::LayoutRight,DeviceType> tdual_ffloat_2d_n7;
  typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
  typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
--- a/src/MANYBODY/fix_qeq_comb.cpp
+++ b/src/MANYBODY/fix_qeq_comb.cpp
@ -67,7 +67,7 @@ FixQEQComb::FixQEQComb(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg),
        fp = fopen(arg[iarg+1],"w");
        if (fp == NULL) {
          char str[128];
-          sprintf(str,"Cannot open fix qeq/comb file %s",arg[iarg+1]);
+          snprintf(str,128,"Cannot open fix qeq/comb file %s",arg[iarg+1]);
          error->one(FLERR,str);
        }
      }
--- a/src/MANYBODY/pair_adp.cpp
+++ b/src/MANYBODY/pair_adp.cpp
@ -551,7 +551,7 @@ void PairADP::read_file(char *filename)
    fp = force->open_potential(filename);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open ADP potential file %s",filename);
+      snprintf(str,128,"Cannot open ADP potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@ -3373,9 +3373,9 @@ void PairAIREBO::read_file(char *filename)
    if (fp == NULL) {
      char str[128];
      if (morseflag)
-        sprintf(str,"Cannot open AIREBO-M potential file %s",filename);
+        snprintf(str,128,"Cannot open AIREBO-M potential file %s",filename);
      else
-        sprintf(str,"Cannot open AIREBO potential file %s",filename);
+        snprintf(str,128,"Cannot open AIREBO potential file %s",filename);
      error->one(FLERR,str);
    }

--- a/src/MANYBODY/pair_bop.cpp
+++ b/src/MANYBODY/pair_bop.cpp
@ -4976,7 +4976,7 @@ void PairBOP::read_table(char *filename)
    FILE *fp = force->open_potential(filename);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open BOP potential file %s",filename);
+      snprintf(str,128,"Cannot open BOP potential file %s",filename);
      error->one(FLERR,str);
    }
    fgets(s,MAXLINE,fp);  // skip first comment line
@ -5079,7 +5079,7 @@ void PairBOP::read_table(char *filename)
    FILE *fp = force->open_potential(filename);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open BOP potential file %s",filename);
+      snprintf(str,128,"Cannot open BOP potential file %s",filename);
      error->one(FLERR,str);
    }
    fgets(s,MAXLINE,fp);  // skip first comment line
--- a/src/MANYBODY/pair_comb.cpp
+++ b/src/MANYBODY/pair_comb.cpp
@ -597,7 +597,7 @@ void PairComb::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open COMB potential file %s",file);
+      snprintf(str,128,"Cannot open COMB potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_comb3.cpp
+++ b/src/MANYBODY/pair_comb3.cpp
@ -320,11 +320,7 @@ void PairComb3::read_lib()

  if (comm->me == 0) {
    FILE *fp = force->open_potential("lib.comb3");
-    if (fp == NULL) {
-      char str[128];
-      sprintf(str,"Cannot open COMB3 lib.comb3 file");
-      error->one(FLERR,str);
-    }
+    if (fp == NULL) error->one(FLERR,"Cannot open COMB3 lib.comb3 file");

    // read and store at the same time
    fgets(s,MAXLIB,fp);
@ -607,7 +603,7 @@ void PairComb3::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open COMB3 potential file %s",file);
+      snprintf(str,128,"Cannot open COMB3 potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_eam.cpp
+++ b/src/MANYBODY/pair_eam.cpp
@ -460,7 +460,7 @@ void PairEAM::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_eam_alloy.cpp
+++ b/src/MANYBODY/pair_eam_alloy.cpp
@ -127,7 +127,7 @@ void PairEAMAlloy::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_eam_cd.cpp
+++ b/src/MANYBODY/pair_eam_cd.cpp
@ -504,7 +504,7 @@ void PairEAMCD::read_h_coeff(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s", filename);
+      snprintf(str,128,"Cannot open EAM potential file %s", filename);
      error->one(FLERR,str);
    }

--- a/src/MANYBODY/pair_eam_fs.cpp
+++ b/src/MANYBODY/pair_eam_fs.cpp
@ -127,7 +127,7 @@ void PairEAMFS::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EAM potential file %s",filename);
+      snprintf(str,128,"Cannot open EAM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_eim.cpp
+++ b/src/MANYBODY/pair_eim.cpp
@ -461,7 +461,7 @@ void PairEIM::read_file(char *filename)
    fptr = force->open_potential(filename);
    if (fptr == NULL) {
      char str[128];
-      sprintf(str,"Cannot open EIM potential file %s",filename);
+      snprintf(str,128,"Cannot open EIM potential file %s",filename);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_gw.cpp
+++ b/src/MANYBODY/pair_gw.cpp
@ -381,7 +381,7 @@ void PairGW::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open GW potential file %s",file);
+      snprintf(str,128,"Cannot open GW potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_gw_zbl.cpp
+++ b/src/MANYBODY/pair_gw_zbl.cpp
@ -77,7 +77,7 @@ void PairGWZBL::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open GW potential file %s",file);
+      snprintf(str,128,"Cannot open GW potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_lcbop.cpp
+++ b/src/MANYBODY/pair_lcbop.cpp
@ -978,7 +978,7 @@ void PairLCBOP::read_file(char *filename)
    FILE *fp = force->open_potential(filename);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open LCBOP potential file %s",filename);
+      snprintf(str,128,"Cannot open LCBOP potential file %s",filename);
      error->one(FLERR,str);
    }

--- a/src/MANYBODY/pair_nb3b_harmonic.cpp
+++ b/src/MANYBODY/pair_nb3b_harmonic.cpp
@ -299,7 +299,7 @@ void PairNb3bHarmonic::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open nb3b/harmonic potential file %s",file);
+      snprintf(str,128,"Cannot open nb3b/harmonic potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_polymorphic.cpp
+++ b/src/MANYBODY/pair_polymorphic.cpp
@ -573,7 +573,7 @@ void PairPolymorphic::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open polymorphic potential file %s",file);
+      snprintf(str,128,"Cannot open polymorphic potential file %s",file);
      error->one(FLERR,str);
    }
    // move past comments to first data line
--- a/src/MANYBODY/pair_sw.cpp
+++ b/src/MANYBODY/pair_sw.cpp
@ -363,7 +363,7 @@ void PairSW::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Stillinger-Weber potential file %s",file);
+      snprintf(str,128,"Cannot open Stillinger-Weber potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_tersoff.cpp
+++ b/src/MANYBODY/pair_tersoff.cpp
@ -404,7 +404,7 @@ void PairTersoff::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Tersoff potential file %s",file);
+      snprintf(str,128,"Cannot open Tersoff potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_tersoff_mod.cpp
+++ b/src/MANYBODY/pair_tersoff_mod.cpp
@ -60,7 +60,7 @@ void PairTersoffMOD::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Tersoff potential file %s",file);
+      snprintf(str,128,"Cannot open Tersoff potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_tersoff_mod_c.cpp
+++ b/src/MANYBODY/pair_tersoff_mod_c.cpp
@ -55,7 +55,7 @@ void PairTersoffMODC::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Tersoff potential file %s",file);
+      snprintf(str,128,"Cannot open Tersoff potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_tersoff_zbl.cpp
+++ b/src/MANYBODY/pair_tersoff_zbl.cpp
@ -77,7 +77,7 @@ void PairTersoffZBL::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Tersoff potential file %s",file);
+      snprintf(str,128,"Cannot open Tersoff potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MANYBODY/pair_vashishta.cpp
+++ b/src/MANYBODY/pair_vashishta.cpp
@ -369,7 +369,7 @@ void PairVashishta::read_file(char *file)
    fp = force->open_potential(file);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open Vashishta potential file %s",file);
+      snprintf(str,128,"Cannot open Vashishta potential file %s",file);
      error->one(FLERR,str);
    }
  }
--- a/src/MEAM/pair_meam.cpp
+++ b/src/MEAM/pair_meam.cpp
@ -464,7 +464,7 @@ void PairMEAM::read_files(char *globalfile, char *userfile)
    fp = force->open_potential(globalfile);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open MEAM potential file %s",globalfile);
+      snprintf(str,128,"Cannot open MEAM potential file %s",globalfile);
      error->one(FLERR,str);
    }
  }
@ -645,7 +645,7 @@ void PairMEAM::read_files(char *globalfile, char *userfile)
    fp = force->open_potential(userfile);
    if (fp == NULL) {
      char str[128];
-      sprintf(str,"Cannot open MEAM potential file %s",userfile);
+      snprintf(str,128,"Cannot open MEAM potential file %s",userfile);
      error->one(FLERR,str);
    }
  }
@ -694,8 +694,8 @@ void PairMEAM::read_files(char *globalfile, char *userfile)
      if (strcmp(params[0],keywords[which]) == 0) break;
    if (which == nkeywords) {
      char str[128];
-      sprintf(str,"Keyword %s in MEAM parameter file not recognized",
-              params[0]);
+      snprintf(str,128,"Keyword %s in MEAM parameter file not recognized",
+               params[0]);
      error->all(FLERR,str);
    }
    nindex = nparams - 2;
--- a/src/MISC/fix_ttm.cpp
+++ b/src/MISC/fix_ttm.cpp
@ -71,7 +71,7 @@ FixTTM::FixTTM(LAMMPS *lmp, int narg, char **arg) :
  fpr = fopen(arg[13],"r");
  if (fpr == NULL) {
    char str[128];
-    sprintf(str,"Cannot open file %s",arg[13]);
+    snprintf(str,128,"Cannot open file %s",arg[13]);
    error->one(FLERR,str);
  }

@ -84,7 +84,7 @@ FixTTM::FixTTM(LAMMPS *lmp, int narg, char **arg) :
      fp = fopen(arg[15],"w");
      if (fp == NULL) {
        char str[128];
-        sprintf(str,"Cannot open fix ttm file %s",arg[15]);
+        snprintf(str,128,"Cannot open fix ttm file %s",arg[15]);
        error->one(FLERR,str);
      }
    }
--- a/src/MOLECULE/angle_table.cpp
+++ b/src/MOLECULE/angle_table.cpp
@ -369,7 +369,7 @@ void AngleTable::read_table(Table *tb, char *file, char *keyword)
  FILE *fp = force->open_potential(file);
  if (fp == NULL) {
    char str[128];
-    sprintf(str,"Cannot open file %s",file);
+    snprintf(str,128,"Cannot open file %s",file);
    error->one(FLERR,str);
  }

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Axel Kohlmeyer	c2758a0b55	Merge pull request #1126 from lammps/patch-18-sep-2018 patch 18Sep18	2018-09-18 21:50:30 -04:00
Axel Kohlmeyer	9c58834af2	Merge pull request #1127 from akohlmey/reax-bonds-typo fix typo in gzip support for fix reax/bonds	2018-09-18 18:05:57 -04:00
Axel Kohlmeyer	4bfac61b1a	fix typo in gzip support for fix reax/bonds	2018-09-18 17:41:02 -04:00
Axel Kohlmeyer	8dff5fd5d7	patch 18Sep18	2018-09-18 17:24:10 -04:00
Axel Kohlmeyer	e9ed95c2db	Merge pull request #1108 from akohlmey/fix-sprintf-buffer-overflows Avoid more buffer overflows due to using snprintf() on fixed size buffers	2018-09-18 16:57:10 -04:00
Axel Kohlmeyer	33e33048bf	Merge pull request #1125 from athomps/sna-atom-leakfix The sna*/atom computes were not destroying SNA objects. Fixed.	2018-09-18 15:50:17 -04:00
Aidan Thompson	d753c51c45	The sna*/atom computes were not destroying SNA objects. Fixed.	2018-09-17 19:44:32 -06:00
Axel Kohlmeyer	e2293cc7df	Merge pull request #1119 from lammps/doc-clarify doc page clarifications	2018-09-17 06:10:25 -04:00
Axel Kohlmeyer	0c287a55cd	Merge pull request #1092 from RomainVermorel/master New stress/mop and stress/mop/profile computes for USER-MISC	2018-09-17 05:16:42 -04:00
Axel Kohlmeyer	5f6b5c5400	fix typo	2018-09-17 05:11:59 -04:00
Axel Kohlmeyer	494b149272	fix typo	2018-09-17 05:10:56 -04:00
Axel Kohlmeyer	39ee7876c4	fix typo	2018-09-17 05:09:23 -04:00
Axel Kohlmeyer	8fa80081df	fix typo	2018-09-17 05:08:22 -04:00
Steve Plimpton	e14db00d52	doc link adjustments	2018-09-14 17:31:33 -06:00
Richard Berger	6ae4bdead5	Merge pull request #1089 from lammps/spread 2 new computes: chunk/spread/atom and reduce/chunk	2018-09-11 21:21:02 -04:00
Richard Berger	1f5885fc45	Merge pull request #1099 from jrgissing/bond/react-efficient-competing_reactions bond/react: efficient competing reactions	2018-09-11 21:18:41 -04:00
Richard Berger	92b508f14c	Merge pull request #1097 from lammps/localvars add variable option to compute bond/angle/dihedral local	2018-09-11 21:18:15 -04:00
Steve Plimpton	c3ece2f086	typo in fix relax/box doc page	2018-09-11 12:44:35 -06:00
Steve Plimpton	7f328d3f79	cite a good book on SLLOD thermostatting for molecular systems	2018-09-11 11:49:46 -06:00
Steve Plimpton	983e8bb110	doc page clarifications	2018-09-11 11:36:37 -06:00
Axel Kohlmeyer	0305cca1df	make certain that *_freq flags are initialized. use -1 instead of 0 to avoid division by zero	2018-09-10 22:47:45 -04:00
Axel Kohlmeyer	3d2c731709	avoid class initialization order warnings	2018-09-10 22:32:56 -04:00
Axel Kohlmeyer	02b653c0ce	correct end of itemized list to be compatible with txt2rst conversion	2018-09-10 22:22:42 -04:00
Axel Kohlmeyer	a33f45f176	Merge pull request #1112 from akohlmey/rename-cdeam-omp Rename files for pair style eam/cd/omp in USER-OMP	2018-09-10 15:16:10 -04:00
Richard Berger	a903e64947	Merge pull request #1114 from junghans/cmake_eigen3 cmake: fix Eigen3 detection	2018-09-10 10:45:20 -04:00
Richard Berger	46b87518b8	cmake: use NO_MODULE for Eigen3	2018-09-10 00:35:55 -04:00
Richard Berger	45682f8695	Merge pull request #1115 from junghans/cmake_globfix cmake: fix globbing of emacs files	2018-09-09 22:15:44 -04:00
Axel Kohlmeyer	2faa34b2be	Merge pull request #1105 from rbberger/truncate-file-path Avoid buffer overflow during errors with long filenames	2018-09-09 01:30:14 -04:00
Axel Kohlmeyer	075d366051	Merge pull request #1116 from akohlmey/correct-doc-build Correct multiple issues in manual build	2018-09-09 01:28:29 -04:00
Axel Kohlmeyer	3b073de357	make it less likely to have people get confused by Eigen3_DIR	2018-09-09 01:21:37 -04:00
Richard Berger	6f379f54d6	Merge pull request #1117 from lammps/doc-tweak small doc page changes, format and content on doc page tarballs	2018-09-07 23:48:46 -04:00
Richard Berger	e325c78628	Merge pull request #1101 from Pakketeretet2/bugfix-pair-morse-smooth-linear Bugfix pair morse smooth linear	2018-09-07 23:38:16 -04:00
Steve Plimpton	b488f1072e	recent package links	2018-09-07 14:22:38 -06:00
Steve Plimpton	0384ef8967	more tweaks to client/server doc pages	2018-09-07 14:00:39 -06:00
Steve Plimpton	25907c856e	small doc page changes, format and content on doc page tarballs	2018-09-07 09:56:03 -06:00
Christoph Junghans	861a7acdf0	cmake: fix globbing in some more places	2018-09-07 06:40:06 -06:00
Richard Berger	aea148a86a	Merge pull request #1100 from rbberger/info-command-refactoring Avoid unnecessary sorting in print_columns	2018-09-06 22:55:56 -04:00
Axel Kohlmeyer	dd64c063cf	various fixes for the manual to address breakage from merging MESSAGE and USER-SCAFACOS packages	2018-09-06 22:28:20 -04:00
Axel Kohlmeyer	44fcdc4024	update reference examples with versions that do not vary by MPI rank count	2018-09-06 21:20:47 -04:00
Axel Kohlmeyer	947f574503	Merge branch 'master' into compute_mop # Conflicts: # cmake/CMakeLists.txt	2018-09-06 21:13:42 -04:00
Axel Kohlmeyer	5c4434b283	Merge pull request #1113 from akohlmey/more-codeowners Expand the list of code owners for more automated review requests	2018-09-06 20:59:07 -04:00
Christoph Junghans	8f8aee65d2	cmake: fix globbing of emacs files Done by: sed -i '/GLOB/s@/\\.@/[^.].@g' CMakeLists.txt	2018-09-06 14:52:49 -06:00
Christoph Junghans	d7b00f86f8	cmake: fix Eigen3 detection For some reason FindEigen3.cmake defines all cap. variables, i.e. Eigen3_FOUND -> EIGEN3_FOUND	2018-09-06 14:18:31 -06:00
Axel Kohlmeyer	efd582fb21	update list of code owners for automatic review requests. started assigning core files	2018-09-06 13:13:20 -04:00
Axel Kohlmeyer	b915716b60	fix inconsistency resulting from error in auto-replace macro	2018-09-06 12:27:10 -04:00
Axel Kohlmeyer	b3079f3aec	rename files and classes for pair style eam/cd/omp to follow the rename of the non-threaded classes	2018-09-06 12:07:25 -04:00
Axel Kohlmeyer	84657f1531	final pass at sprintf() buffer overflow prevention. also fix typo in previous commit	2018-09-06 12:00:04 -04:00
Axel Kohlmeyer	db510af582	more fixes for potential buffer overflows by using snprintf() instead of sprintf()	2018-09-06 11:32:44 -04:00
Axel Kohlmeyer	9c27548a5c	more replacing of sprintf() with snprintf()	2018-09-06 10:45:48 -04:00
Axel Kohlmeyer	4d52cb9245	more buffer overflow avoiding through using snprintf()	2018-09-06 09:57:43 -04:00
Axel Kohlmeyer	fbc1c1cfdd	replace fixed buffer sprintf() with straight fprintf()	2018-09-06 09:57:10 -04:00
Axel Kohlmeyer	ca04e8f31c	use snprintf() in a bunch of cases to avoid overflowing fixed size buffers with unchecked strings	2018-09-06 02:57:53 -04:00
Richard Berger	ba1c5d3191	Avoid buffer overflow during errors with long filenames	2018-09-05 21:39:09 -04:00
Richard Berger	0b951840f2	Merge pull request #1103 from rbberger/cmake-fix-user-omp cmake: add missing fix_omp.h and fix_omp.cpp to compilation	2018-09-05 21:27:24 -04:00
Stan Moore	95c3d2fc8e	Merge pull request #1051 from stanmoore1/data_dup Add data duplication option to the KOKKOS package	2018-09-05 17:09:09 -06:00
Richard Berger	ad498811b1	cmake: add missing fix_omp.h and fix_omp.cpp to compilation	2018-09-05 16:04:35 -04:00
Stefan Paquay	5003354fba	Replaced a tab with whitespace.	2018-09-05 11:42:48 -04:00
Stefan Paquay	9b38a5b359	Replaced gpu Makefile with the one from upstream so as not to delete it.	2018-09-05 11:39:41 -04:00
Stefan Paquay	1c8feed69f	Removed gpu makefile from wrong branch.	2018-09-05 11:38:04 -04:00
Stefan Paquay	84de0d38ea	Replaced files with those from upstream.	2018-09-05 11:34:07 -04:00
Stefan Paquay	c192236a7e	Commit before switching to old master.	2018-09-05 11:28:06 -04:00
Stefan Paquay	779f1bd0b1	Fixes a bug in pair_morse_smooth_linear where the cutoff was not properly initialized.	2018-09-05 11:27:10 -04:00
Axel Kohlmeyer	0c92c22755	moved contents of USER-MOP package to USER-MISC as suggested by @sjplimp	2018-09-05 10:56:12 -04:00
Richard Berger	4a5e28af81	Avoid unnecessary sorting in print_columns std::map is a sorted associative container. We don't need to first copy it into a vector and sort that one. print_columns has been refactored as a template function and makes use of this property.	2018-09-05 10:35:47 -04:00
jrgissing	ae7b18fb77	only one call to extract needed	2018-09-05 00:11:50 -06:00
Jacob Gissinger	efd81a2854	Merge branch 'master' into bond/react-efficient-competing_reactions	2018-09-04 22:34:04 -06:00
jrgissing	a5f7b418de	bond/react: efficient competing reactions	2018-09-04 22:06:49 -06:00
Steve Plimpton	bcecc0389e	add variable option to compute bond/angle/dihedral local	2018-09-04 14:48:44 -06:00
Axel Kohlmeyer	cb4ffaf95c	update docs and references for name changes in USER-MOP package, remove obsoleted files	2018-09-04 08:42:32 -04:00
RomainVermorel	a797a0d193	changed computes names to stress/mop and stress/mop/profile	2018-09-04 14:02:19 +02:00
Axel Kohlmeyer	f6f4b58167	add parallel reference output	2018-09-03 23:42:52 -04:00
Axel Kohlmeyer	7b423c6d4b	integrate USER-MOP package into build and documentat system. apply latest LAMMPS programming style conventions	2018-09-03 23:37:25 -04:00
RomainVermorel	ba4ff7744b	new USER-MOP package submitted	2018-09-03 16:26:36 +02:00
Steve Plimpton	a989d04d09	think this flag should not be set by this fix	2018-08-31 14:17:36 -06:00
Steve Plimpton	e3ce702eec	doc adjust	2018-08-31 13:53:02 -06:00
Steven J. Plimpton	c4c5f9a32e	2 new computes: chunk/spread/atom and reduce/chunk	2018-08-31 13:44:49 -06:00
Stefan Paquay	3f07adb765	Merge branch 'master' of https://www.github.com/lammps/lammps into port-enforce2d-kokkos	2018-08-30 11:53:05 -04:00
Stefan Paquay	f8e6e4275a	Merge branch 'master' of https://www.github.com/lammps/lammps into port-enforce2d-kokkos	2018-08-20 15:12:11 -04:00
Stefan Paquay	e0fc050bf4	Changes to linux makefile.	2018-08-20 15:12:05 -04:00
Stan Moore	165fa01a97	Cleanup	2018-08-10 14:23:51 -06:00
Stan Moore	8f665a5a0f	Update Kokkos docs for data duplication	2018-08-10 13:46:03 -06:00
Stan Moore	6f1986a8f1	Small tweaks to Kokkos EAM	2018-08-10 13:10:02 -06:00
Stan Moore	eb4d586493	Remove duplicate if test in fix_qeq_reax_kokkos	2018-08-10 12:53:27 -06:00
Stan Moore	9f058f19bc	Deallocate duplicated memory	2018-08-10 12:49:02 -06:00
Stan Moore	44d7c79fdc	Merge branch 'master' of https://github.com/lammps/lammps into reax_dup	2018-08-10 12:33:28 -06:00
Stan Moore	12ecc45b6a	Add data duplication to pair_snap_kokkos	2018-08-10 12:30:58 -06:00
Stan Moore	ebc0abbb8d	Add data duplication to pair_eam_kokkos variants	2018-08-10 12:30:37 -06:00
Stan Moore	faa21a0591	Add data duplication to pair_sw_kokkos	2018-08-10 12:30:17 -06:00
Stan Moore	d9fb37e25e	Add data duplication to pair_tersoff_kokkos and variants	2018-08-10 12:30:03 -06:00
Stan Moore	120fdbb9fc	Add data duplication to pair_eam_kokkos	2018-08-10 10:53:22 -06:00
Stan Moore	b0183de7ca	Merge branch 'master' into reax_dup	2018-08-09 17:11:26 -06:00
Stefan Paquay	64cd37b6ed	Merge branch 'master' of https://www.github.com/lammps/lammps into port-enforce2d-kokkos	2018-08-08 11:56:28 -04:00
Stan Moore	bf2a942f36	Merge branch 'master' into reax_dup	2018-08-07 13:18:45 -06:00
Stan Moore	ba693a74be	Add contribute method to pair_table_kokkos	2018-08-07 12:27:32 -06:00
Stefan Paquay	c1dffe40dc	Merge branch 'master' of https://www.github.com/lammps/lammps into port-enforce2d-kokkos	2018-08-06 10:25:22 -04:00
Stefan Paquay	446a8da8e7	Commit before merge.	2018-08-06 10:25:12 -04:00
Stan Moore	cdd85b0749	Merge branch 'reax_dup' of ssh://github.com/stanmoore1/lammps into reax_dup	2018-08-03 07:37:49 -06:00
Stan Moore	3e962c9729	Update from master	2018-08-03 07:37:18 -06:00
Stan Moore	30f8bb059f	Merge branch 'reax_dup' of github.com:stanmoore1/lammps into reax_dup	2018-04-02 12:11:22 -06:00
Stan Moore	52254fe155	Fix issue in fix_qeq_reax_kokkos	2018-04-02 12:10:30 -06:00
Stan Moore	d8e0f48864	Merge branch 'master' into reax_dup	2018-04-02 10:55:09 -06:00
Stan Moore	385e1e5adf	Fix compile error with CUDA	2018-03-22 12:28:21 -06:00
Stan Moore	28b894a1d7	Remove unnecessary semicolon	2018-03-22 12:09:04 -06:00
Stan Moore	f72d38e0c3	Merge branch 'master' into reax_dup	2018-03-22 10:03:06 -06:00
Stan Moore	2dcee75ae4	Add data duplication to pair_kokkos.h	2018-03-22 10:01:50 -06:00
Stan Moore	968587ac1e	Merge from master	2018-03-21 17:24:35 -06:00
Stan Moore	6dd8efd0b4	Add if statements	2018-03-21 17:20:45 -06:00
Stan Moore	ed494b295f	WIP	2018-03-01 11:52:39 -07:00
Stan Moore	dbc308f352	Add warning	2018-03-01 10:06:43 -07:00
Stan Moore	4ec99edcc6	Merge branch 'master' of github.com:stanmoore1/lammps into reax_dup	2018-02-27 13:10:17 -07:00
Stan Moore	c2477ce522	Merge branch 'reax_dup' of ssh://github.com/stanmoore1/lammps into reax_dup	2018-02-12 09:00:21 -07:00
Stan Moore	f10c988903	Merge branch 'master' into reax_dup	2018-02-12 08:59:57 -07:00
Stan Moore	81331e2a34	Better load balance fix_qeq_reax_kokkos for half neigh list	2017-12-21 11:07:06 -07:00
Stan Moore	dbbfacc598	Fix atomic issues	2017-12-20 16:13:28 -07:00
Stan Moore	2fc8da08f4	Merge branch 'develop' into reax_dup	2017-12-20 14:32:05 -07:00
Stan Moore	5886cadeef	Fix compiler warnings in atom_vec_hybrid_kokkos	2017-12-18 14:12:58 -07:00
Stan Moore	2b99a26b47	Fix issue in fix_qeq_reax_kokkos, can't call child function from base constructor	2017-12-18 13:57:56 -07:00
Stan Moore	7156d49b8d	Merge branch 'pr-750' into develop	2017-12-18 11:13:17 -07:00
Stan Moore	dce6c9edce	Merge branch 'pr-747' into develop	2017-12-18 11:13:15 -07:00
Stan Moore	b0f9ae049d	Merge branch 'pr-725' into develop	2017-12-18 11:13:11 -07:00
Stan Moore	a5790ef68f	Rename to ScatterView	2017-12-11 08:38:54 -07:00
Stan Moore	8e68015a6f	Merge branch 'reax_dup' of github.com:stanmoore1/lammps into reax_dup	2017-12-11 08:32:20 -07:00
Stan Moore	95aec46b99	Merge remote-tracking branch 'origin/master' into reax_dup	2017-12-11 08:30:27 -07:00
Stan Moore	8a9a7f4e50	Fix issues	2017-12-06 14:06:35 -07:00
Stan Moore	d2da1f5797	Template out atomics for full neighbor list	2017-12-06 09:41:29 -07:00
Stan Moore	9f08cec07a	Merge branch 'improve_reax' of ssh://github.com/stanmoore1/lammps into reax_dup	2017-11-30 09:48:15 -07:00
Stan Moore	ee9ba99cde	Fix some bugs in pair_reaxc_kokkos	2017-11-29 11:11:20 -07:00
Stan Moore	41202c3627	Turn atomics back on for some views in pair_reaxc_kokkos	2017-11-28 16:28:17 -07:00
Stan Moore	54f2493018	Added ReductionView to Kokkos ReaxFF	2017-11-28 14:53:44 -07:00