diff --git a/doc/Makefile b/doc/Makefile index fd087f0344..a1f76d7041 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -158,7 +158,7 @@ $(VENV): @( \ virtualenv -p $(PYTHON) $(VENV); \ . $(VENV)/bin/activate; \ - pip install Sphinx; \ + pip install Sphinx==1.5.6; \ pip install sphinxcontrib-images; \ deactivate;\ ) diff --git a/doc/src/Eqs/pair_meam_spline.jpg b/doc/src/Eqs/pair_meam_spline.jpg index 29f1c72543..fd396d75bc 100644 Binary files a/doc/src/Eqs/pair_meam_spline.jpg and b/doc/src/Eqs/pair_meam_spline.jpg differ diff --git a/doc/src/Eqs/pair_meam_spline.tex b/doc/src/Eqs/pair_meam_spline.tex index 55d42f801c..b4f58381a4 100644 --- a/doc/src/Eqs/pair_meam_spline.tex +++ b/doc/src/Eqs/pair_meam_spline.tex @@ -1,13 +1,14 @@ \documentclass[12pt]{article} +\usepackage{amsmath} \begin{document} $$ - E=\sum_{ij}\phi(r_{ij})+\sum_{i}U(\rho_{i}), + E=\sum_{i LAMMPS Users Manual - + @@ -21,7 +21,7 @@

LAMMPS Documentation :c,h3 -31 Mar 2017 version :c,h4 +19 May 2017 version :c,h4 Version info: :h4 @@ -158,12 +158,11 @@ END_RST --> 2.1 "What's in the LAMMPS distribution"_start_1 :ulb,b 2.2 "Making LAMMPS"_start_2 :b 2.3 "Making LAMMPS with optional packages"_start_3 :b - 2.4 "Building LAMMPS via the Make.py script"_start_4 :b - 2.5 "Building LAMMPS as a library"_start_5 :b - 2.6 "Running LAMMPS"_start_6 :b - 2.7 "Command-line options"_start_7 :b - 2.8 "Screen output"_start_8 :b - 2.9 "Tips for users of previous versions"_start_9 :ule,b + 2.4 "Building LAMMPS as a library"_start_4 :b + 2.5 "Running LAMMPS"_start_5 :b + 2.6 "Command-line options"_start_6 :b + 2.7 "Screen output"_start_7 :b + 2.8 "Tips for users of previous versions"_start_8 :ule,b "Commands"_Section_commands.html :l 3.1 "LAMMPS input script"_cmd_1 :ulb,b 3.2 "Parsing rules"_cmd_2 :b diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index e80b0303eb..dc7ddebe58 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -527,9 +527,9 @@ These are additional commands in USER packages, which can be used if "LAMMPS is built with the appropriate package"_Section_start.html#start_3. -"dump custom/vtk"_dump_custom_vtk.html, -"dump nc"_dump_nc.html, -"dump nc/mpiio"_dump_nc.html, +"dump netcdf"_dump_netcdf.html, +"dump netcdf/mpiio"_dump_netcdf.html, +"dump vtk"_dump_vtk.html, "group2ndx"_group2ndx.html, "ndx2group"_group2ndx.html, "temper/grem"_temper_grem.html :tb(c=3,ea=c) @@ -618,6 +618,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT. "press/berendsen"_fix_press_berendsen.html, "print"_fix_print.html, "property/atom"_fix_property_atom.html, +"python"_fix_python.html, "qeq/comb (o)"_fix_qeq_comb.html, "qeq/dynamic"_fix_qeq.html, "qeq/fire"_fix_qeq.html, @@ -931,6 +932,8 @@ KOKKOS, o = USER-OMP, t = OPT. "gran/hertz/history (o)"_pair_gran.html, "gran/hooke (o)"_pair_gran.html, "gran/hooke/history (o)"_pair_gran.html, +"gw"_pair_gw.html, +"gw/zbl"_pair_gw.html, "hbond/dreiding/lj (o)"_pair_hbond_dreiding.html, "hbond/dreiding/morse (o)"_pair_hbond_dreiding.html, "kim"_pair_kim.html, @@ -982,6 +985,7 @@ KOKKOS, o = USER-OMP, t = OPT. "peri/pmb (o)"_pair_peri.html, "peri/ves"_pair_peri.html, "polymorphic"_pair_polymorphic.html, +"python"_pair_python.html, "reax"_pair_reax.html, "rebo (o)"_pair_airebo.html, "resquared (go)"_pair_resquared.html, @@ -1016,6 +1020,7 @@ package"_Section_start.html#start_3. "dpd/fdt/energy"_pair_dpd_fdt.html, "eam/cd (o)"_pair_eam.html, "edip (o)"_pair_edip.html, +"edip/multi"_pair_edip.html, "eff/cut"_pair_eff.html, "exp6/rx"_pair_exp6_rx.html, "gauss/cut"_pair_gauss.html, @@ -1052,7 +1057,7 @@ package"_Section_start.html#start_3. "oxdna2/excv"_pair_oxdna2.html, "oxdna2/stk"_pair_oxdna2.html, "quip"_pair_quip.html, -"reax/c (k)"_pair_reax_c.html, +"reax/c (k)"_pair_reaxc.html, "smd/hertz"_pair_smd_hertz.html, "smd/tlsph"_pair_smd_tlsph.html, "smd/triangulated/surface"_pair_smd_triangulated_surface.html, @@ -1155,7 +1160,7 @@ USER-OMP, t = OPT. "zero"_dihedral_zero.html, "hybrid"_dihedral_hybrid.html, "charmm (ko)"_dihedral_charmm.html, -"charmmfsh"_dihedral_charmm.html, +"charmmfsw"_dihedral_charmm.html, "class2 (ko)"_dihedral_class2.html, "harmonic (io)"_dihedral_harmonic.html, "helix (o)"_dihedral_helix.html, diff --git a/doc/src/Section_errors.txt b/doc/src/Section_errors.txt index 832c5718ab..5e0574b390 100644 --- a/doc/src/Section_errors.txt +++ b/doc/src/Section_errors.txt @@ -11171,6 +11171,12 @@ Self-explanatory. :dd If the fix changes the timestep, the dump dcd file will not reflect the change. :dd +{Energy due to X extra global DOFs will be included in minimizer energies} :dt + +When using fixes like box/relax, the potential energy used by the minimizer +is augmented by an additional energy provided by the fix. Thus the printed +converged energy may be different from the total potential energy. :dd + {Energy tally does not account for 'zero yes'} :dt The energy removed by using the 'zero yes' flag is not accounted diff --git a/doc/src/Section_howto.txt b/doc/src/Section_howto.txt index 6f59f8b55e..579cb68474 100644 --- a/doc/src/Section_howto.txt +++ b/doc/src/Section_howto.txt @@ -215,7 +215,7 @@ documentation for the formula it computes. "special_bonds"_special_bonds.html charmm "special_bonds"_special_bonds.html amber :ul -NOTE: For CHARMM, the newer {charmmfsw} or {charmmfsh} styles were +NOTE: For CHARMM, newer {charmmfsw} or {charmmfsh} styles were released in March 2017. We recommend they be used instead of the older {charmm} styles. See discussion of the differences on the "pair charmm"_pair_charmm.html and "dihedral charmm"_dihedral_charmm.html diff --git a/doc/src/Section_intro.txt b/doc/src/Section_intro.txt index 33c3cf395f..bfb6ef3901 100644 --- a/doc/src/Section_intro.txt +++ b/doc/src/Section_intro.txt @@ -249,8 +249,12 @@ Pizza.py WWW site"_pizza. :l Specialized features :h5 -These are LAMMPS capabilities which you may not think of as typical -molecular dynamics options: +LAMMPS can be built with optional packages which implement a variety +of additional capabilities. An overview of all the packages is "given +here"_Section_packages.html. + +These are some LAMMPS capabilities which you may not think of as +typical classical molecular dynamics options: "static"_balance.html and "dynamic load-balancing"_fix_balance.html "generalized aspherical particles"_body.html @@ -515,7 +519,7 @@ the packages they have written are somewhat unique to LAMMPS and the code would not be as general-purpose as it is without their expertise and efforts. -Axel Kohlmeyer (Temple U), akohlmey at gmail.com, SVN and Git repositories, indefatigable mail list responder, USER-CG-CMM and USER-OMP packages +Axel Kohlmeyer (Temple U), akohlmey at gmail.com, SVN and Git repositories, indefatigable mail list responder, USER-CGSDK and USER-OMP packages Roy Pollock (LLNL), Ewald and PPPM solvers Mike Brown (ORNL), brownw at ornl.gov, GPU package Greg Wagner (Sandia), gjwagne at sandia.gov, MEAM package for MEAM potential diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt index 5c6463a71a..cc44c05906 100644 --- a/doc/src/Section_packages.txt +++ b/doc/src/Section_packages.txt @@ -10,1895 +10,2593 @@ Section"_Section_accelerate.html :c 4. Packages :h3 -This section gives an overview of the add-on optional packages that -extend LAMMPS functionality. Packages are groups of files that enable -a specific set of features. For example, force fields for molecular -systems or granular systems are in packages. You can see the list of -all packages by typing "make package" from within the src directory of -the LAMMPS distribution. +This section gives an overview of the optional packages that extend +LAMMPS functionality with instructions on how to build LAMMPS with +each of them. Packages are groups of files that enable a specific set +of features. For example, force fields for molecular systems or +granular systems are in packages. You can see the list of all +packages and "make" commands to manage them by typing "make package" +from within the src directory of the LAMMPS distribution. "Section +2.3"_Section_start.html#start_3 gives general info on how to install +and un-install packages as part of the LAMMPS build process. -Here are links for two tables below, which list standard and user -packages. +There are two kinds of packages in LAMMPS, standard and user packages: -4.1 "Standard packages"_#pkg_1 -4.2 "User packages"_#pkg_2 :all(b) +"Table of standard packages"_#table_standard +"Table of user packages"_#table_user :ul -"Section 2.3"_Section_start.html#start_3 of the manual describes -the difference between standard packages and user packages. It also -has general details on how to include/exclude specific packages as -part of the LAMMPS build process, and on how to build auxiliary -libraries or modify a machine Makefile if a package requires it. +Standard packages are supported by the LAMMPS developers and are +written in a syntax and style consistent with the rest of LAMMPS. +This means the developers will answer questions about them, debug and +fix them if necessary, and keep them compatible with future changes to +LAMMPS. -Following the two tables below, is a sub-section for each package. It -has a summary of what the package contains. It has specific -instructions on how to install it, build or obtain any auxiliary -library it requires, and any Makefile.machine changes it requires. It -also lists pointers to examples of its use or documentation provided -in the LAMMPS distribution. If you want to know the complete list of -commands that a package adds to LAMMPS, simply list the files in its -directory, e.g. "ls src/GRANULAR". Source files with names that start -with compute, fix, pair, bond, etc correspond to command styles with -the same names. +User packages have been contributed by users, and begin with the +"user" prefix. If they are a single command (single file), they are +typically in the user-misc package. User packages don't necessarily +meet the requirements of the standard packages. If you have problems +using a feature provided in a user package, you may need to contact +the contributor directly to get help. Information on how to submit +additions you make to LAMMPS as single files or as a standard or user +package are given in "this section"_Section_modify.html#mod_15 of the +manual. -NOTE: The USER package sub-sections below are still being filled in, -as of March 2016. +Following the next two tables is a sub-section for each package. It +lists authors (if applicable) and summarizes the package contents. It +has specific instructions on how to install the package, including (if +necessary) downloading or building any extra library it requires. It +also gives links to documentation, example scripts, and +pictures/movies (if available) that illustrate use of the package. -Unless otherwise noted below, every package is independent of all the -others. I.e. any package can be included or excluded in a LAMMPS -build, independent of all other packages. However, note that some -packages include commands derived from commands in other packages. If -the other package is not installed, the derived command from the new -package will also not be installed when you include the new one. -E.g. the pair lj/cut/coul/long/omp command from the USER-OMP package -will not be installed as part of the USER-OMP package if the KSPACE -package is not also installed, since it contains the pair -lj/cut/coul/long command. If you later install the KSPACE package and -the USER-OMP package is already installed, both the pair -lj/cut/coul/long and lj/cut/coul/long/omp commands will be installed. +NOTE: To see the complete list of commands a package adds to LAMMPS, +just look at the files in its src directory, e.g. "ls src/GRANULAR". +Files with names that start with fix, compute, atom, pair, bond, +angle, etc correspond to commands with the same style names. + +In these two tables, the "Example" column is a sub-directory in the +examples directory of the distribution which has an input script that +uses the package. E.g. "peptide" refers to the examples/peptide +directory; USER/atc refers to the examples/USER/atc directory. The +"Library" column indicates whether an extra library is needed to build +and use the package: + +dash = no library +sys = system library: you likely have it on your machine +int = internal library: provided with LAMMPS, but you may need to build it +ext = external library: you will need to download and install it on your machine :ul :line - -4.1 Standard packages :h4,link(pkg_1) - -The current list of standard packages is as follows. Each package -name links to a sub-section below with more details. - -Package, Description, Author(s), Doc page, Example, Library -"ASPHERE"_#ASPHERE, aspherical particles, -, "Section 6.6.14"_Section_howto.html#howto_14, ellipse, - -"BODY"_#BODY, body-style particles, -, "body"_body.html, body, - -"CLASS2"_#CLASS2, class 2 force fields, -, "pair_style lj/class2"_pair_class2.html, -, - -"COLLOID"_#COLLOID, colloidal particles, Kumar (1), "atom_style colloid"_atom_style.html, colloid, - -"COMPRESS"_#COMPRESS, I/O compression, Axel Kohlmeyer (Temple U), "dump */gz"_dump.html, -, - -"CORESHELL"_#CORESHELL, adiabatic core/shell model, Hendrik Heenen (Technical U of Munich), "Section 6.6.25"_Section_howto.html#howto_25, coreshell, - -"DIPOLE"_#DIPOLE, point dipole particles, -, "pair_style dipole/cut"_pair_dipole.html, dipole, - -"GPU"_#GPU, GPU-enabled styles, Mike Brown (ORNL), "Section 5.3.1"_accelerate_gpu.html, gpu, lib/gpu -"GRANULAR"_#GRANULAR, granular systems, -, "Section 6.6.6"_Section_howto.html#howto_6, pour, - -"KIM"_#KIM, openKIM potentials, Smirichinski & Elliot & Tadmor (3), "pair_style kim"_pair_kim.html, kim, KIM -"KOKKOS"_#KOKKOS, Kokkos-enabled styles, Trott & Moore (4), "Section 5.3.3"_accelerate_kokkos.html, kokkos, lib/kokkos -"KSPACE"_#KSPACE, long-range Coulombic solvers, -, "kspace_style"_kspace_style.html, peptide, - -"MANYBODY"_#MANYBODY, many-body potentials, -, "pair_style tersoff"_pair_tersoff.html, shear, - -"MEAM"_#MEAM, modified EAM potential, Greg Wagner (Sandia), "pair_style meam"_pair_meam.html, meam, lib/meam -"MC"_#MC, Monte Carlo options, -, "fix gcmc"_fix_gcmc.html, -, - -"MOLECULE"_#MOLECULE, molecular system force fields, -, "Section 6.6.3"_Section_howto.html#howto_3, peptide, - -"OPT"_#OPT, optimized pair styles, Fischer & Richie & Natoli (2), "Section 5.3.5"_accelerate_opt.html, -, - -"PERI"_#PERI, Peridynamics models, Mike Parks (Sandia), "pair_style peri"_pair_peri.html, peri, - -"POEMS"_#POEMS, coupled rigid body motion, Rudra Mukherjee (JPL), "fix poems"_fix_poems.html, rigid, lib/poems -"PYTHON"_#PYTHON, embed Python code in an input script, -, "python"_python.html, python, lib/python -"REAX"_#REAX, ReaxFF potential, Aidan Thompson (Sandia), "pair_style reax"_pair_reax.html, reax, lib/reax -"REPLICA"_#REPLICA, multi-replica methods, -, "Section 6.6.5"_Section_howto.html#howto_5, tad, - -"RIGID"_#RIGID, rigid bodies, -, "fix rigid"_fix_rigid.html, rigid, - -"SHOCK"_#SHOCK, shock loading methods, -, "fix msst"_fix_msst.html, -, - -"SNAP"_#SNAP, quantum-fit potential, Aidan Thompson (Sandia), "pair snap"_pair_snap.html, snap, - -"SRD"_#SRD, stochastic rotation dynamics, -, "fix srd"_fix_srd.html, srd, - -"VORONOI"_#VORONOI, Voronoi tesselations, Daniel Schwen (LANL), "compute voronoi/atom"_compute_voronoi_atom.html, -, Voro++ -:tb(ea=c) - -The "Authors" column lists a name(s) if a specific person is -responsible for creating and maintaining the package. - -(1) The COLLOID package includes Fast Lubrication Dynamics pair styles -which were created by Amit Kumar and Michael Bybee from Jonathan -Higdon's group at UIUC. - -(2) The OPT package was created by James Fischer (High Performance -Technologies), David Richie, and Vincent Natoli (Stone Ridge -Technolgy). - -(3) The KIM package was created by Valeriu Smirichinski, Ryan Elliott, -and Ellad Tadmor (U Minn). - -(4) The KOKKOS package was created primarily by Christian Trott and -Stan Moore (Sandia). It uses the Kokkos library which was developed -by Carter Edwards, Christian Trott, and others at Sandia. - -The "Doc page" column links to either a sub-section of the -"Section 6"_Section_howto.html of the manual, or an input script -command implemented as part of the package, or to additional -documentation provided within the package. - -The "Example" column is a sub-directory in the examples directory of -the distribution which has an input script that uses the package. -E.g. "peptide" refers to the examples/peptide directory. - -The "Library" column lists an external library which must be built -first and which LAMMPS links to when it is built. If it is listed as -lib/package, then the code for the library is under the lib directory -of the LAMMPS distribution. See the lib/package/README file for info -on how to build the library. If it is not listed as lib/package, then -it is a third-party library not included in the LAMMPS distribution. -See details on all of this below for individual packages. - :line -ASPHERE package :link(ASPHERE),h5 +[Standard packages] :link(table_standard),p -Contents: Several computes, time-integration fixes, and pair styles -for aspherical particle models: ellipsoids, 2d lines, 3d triangles. +Package, Description, Doc page, Example, Library +"ASPHERE"_#ASPHERE, aspherical particle models, "Section 6.6.14"_Section_howto.html#howto_14, ellipse, - +"BODY"_#BODY, body-style particles, "body"_body.html, body, - +"CLASS2"_#CLASS2, class 2 force fields, "pair_style lj/class2"_pair_class2.html, -, - +"COLLOID"_#COLLOID, colloidal particles, "atom_style colloid"_atom_style.html, colloid, - +"COMPRESS"_#COMPRESS, I/O compression, "dump */gz"_dump.html, -, sys +"CORESHELL"_#CORESHELL, adiabatic core/shell model, "Section 6.6.25"_Section_howto.html#howto_25, coreshell, - +"DIPOLE"_#DIPOLE, point dipole particles, "pair_style dipole/cut"_pair_dipole.html, dipole, - +"GPU"_#GPU, GPU-enabled styles, "Section 5.3.1"_accelerate_gpu.html, WWW bench, int +"GRANULAR"_#GRANULAR, granular systems, "Section 6.6.6"_Section_howto.html#howto_6, pour, - +"KIM"_#KIM, openKIM wrapper, "pair_style kim"_pair_kim.html, kim, ext +"KOKKOS"_#KOKKOS, Kokkos-enabled styles, "Section 5.3.3"_accelerate_kokkos.html, WWW bench, - +"KSPACE"_#KSPACE, long-range Coulombic solvers, "kspace_style"_kspace_style.html, peptide, - +"MANYBODY"_#MANYBODY, many-body potentials, "pair_style tersoff"_pair_tersoff.html, shear, - +"MC"_#MC, Monte Carlo options, "fix gcmc"_fix_gcmc.html, -, - +"MEAM"_#MEAM, modified EAM potential, "pair_style meam"_pair_meam.html, meam, int +"MISC"_#MISC, miscellanous single-file commands, -, -, - +"MOLECULE"_#MOLECULE, molecular system force fields, "Section 6.6.3"_Section_howto.html#howto_3, peptide, - +"MPIIO"_#MPIIO, MPI parallel I/O dump and restart, "dump"_dump.html, -, - +"MSCG"_#MSCG, multi-scale coarse-graining wrapper, "fix mscg"_fix_mscg.html, mscg, ext +"OPT"_#OPT, optimized pair styles, "Section 5.3.5"_accelerate_opt.html, WWW bench, - +"PERI"_#PERI, Peridynamics models, "pair_style peri"_pair_peri.html, peri, - +"POEMS"_#POEMS, coupled rigid body motion, "fix poems"_fix_poems.html, rigid, int +"PYTHON"_#PYTHON, embed Python code in an input script, "python"_python.html, python, sys +"QEQ"_#QEQ, QEq charge equilibration, "fix qeq"_fix_qeq.html, qeq, - +"REAX"_#REAX, ReaxFF potential (Fortran), "pair_style reax"_pair_reax.html, reax, int +"REPLICA"_#REPLICA, multi-replica methods, "Section 6.6.5"_Section_howto.html#howto_5, tad, - +"RIGID"_#RIGID, rigid bodies and constraints, "fix rigid"_fix_rigid.html, rigid, - +"SHOCK"_#SHOCK, shock loading methods, "fix msst"_fix_msst.html, -, - +"SNAP"_#SNAP, quantum-fitted potential, "pair snap"_pair_snap.html, snap, - +"SRD"_#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, - +"VORONOI"_#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext +:tb(ea=c,ca1=l) -To install via make or Make.py: +[USER packages] :link(table_user),p + +Package, Description, Doc page, Example, Library +"USER-ATC"_#USER-ATC, atom-to-continuum coupling, "fix atc"_fix_atc.html, USER/atc, int +"USER-AWPMD"_#USER-AWPMD, wave-packet MD, "pair_style awpmd/cut"_pair_awpmd.html, USER/awpmd, int +"USER-CGDNA"_#USER-CGDNA, coarse-grained DNA force fields, src/USER-CGDNA/README, USER/cgdna, - +"USER-CGSDK"_#USER-CGSDK, SDK coarse-graining model, "pair_style lj/sdk"_pair_sdk.html, USER/cgsdk, - +"USER-COLVARS"_#USER-COLVARS, collective variables library, "fix colvars"_fix_colvars.html, USER/colvars, int +"USER-DIFFRACTION"_#USER-DIFFRACTION, virtual x-ray and electron diffraction,"compute xrd"_compute_xrd.html, USER/diffraction, - +"USER-DPD"_#USER-DPD, reactive dissipative particle dynamics, src/USER-DPD/README, USER/dpd, - +"USER-DRUDE"_#USER-DRUDE, Drude oscillators, "tutorial"_tutorial_drude.html, USER/drude, - +"USER-EFF"_#USER-EFF, electron force field,"pair_style eff/cut"_pair_eff.html, USER/eff, - +"USER-FEP"_#USER-FEP, free energy perturbation,"compute fep"_compute_fep.html, USER/fep, - +"USER-H5MD"_#USER-H5MD, dump output via HDF5,"dump h5md"_dump_h5md.html, -, ext +"USER-INTEL"_#USER-INTEL, optimized Intel CPU and KNL styles,"Section 5.3.2"_accelerate_intel.html, WWW bench, - +"USER-LB"_#USER-LB, Lattice Boltzmann fluid,"fix lb/fluid"_fix_lb_fluid.html, USER/lb, - +"USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surfaces,"fix manifoldforce"_fix_manifoldforce.html, USER/manifold, - +"USER-MGPT"_#USER-MGPT, fast MGPT multi-ion potentials, "pair_style mgpt"_pair_mgpt.html, USER/mgpt, - +"USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER/misc, - +"USER-MOLFILE"_#USER-MOLFILE, "VMD"_vmd_home molfile plug-ins,"dump molfile"_dump_molfile.html, -, ext +"USER-NETCDF"_#USER-NETCDF, dump output via NetCDF,"dump netcdf"_dump_netcdf.html, -, ext +"USER-OMP"_#USER-OMP, OpenMP-enabled styles,"Section 5.3.4"_accelerate_omp.html, WWW bench, - +"USER-PHONON"_#USER-PHONON, phonon dynamical matrix,"fix phonon"_fix_phonon.html, USER/phonon, - +"USER-QMMM"_#USER-QMMM, QM/MM coupling,"fix qmmm"_fix_qmmm.html, USER/qmmm, ext +"USER-QTB"_#USER-QTB, quantum nuclear effects,"fix qtb"_fix_qtb.html "fix qbmsst"_fix_qbmsst.html, qtb, - +"USER-QUIP"_#USER-QUIP, QUIP/libatoms interface,"pair_style quip"_pair_quip.html, USER/quip, ext +"USER-REAXC"_#USER-REAXC, ReaxFF potential (C/C++) ,"pair_style reaxc"_pair_reaxc.html, reax, - +"USER-SMD"_#USER-SMD, smoothed Mach dynamics,"SMD User Guide"_PDF/SMD_LAMMPS_userguide.pdf, USER/smd, ext +"USER-SMTBQ"_#USER-SMTBQ, second moment tight binding QEq potential,"pair_style smtbq"_pair_smtbq.html, USER/smtbq, - +"USER-SPH"_#USER-SPH, smoothed particle hydrodynamics,"SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, - +"USER-TALLY"_#USER-TALLY, pairwise tally computes,"compute XXX/tally"_compute_tally.html, USER/tally, - +"USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext +:tb(ea=c,ca1=l) + +:line +:line + +ASPHERE package :link(ASPHERE),h4 + +[Contents:] + +Computes, time-integration fixes, and pair styles for aspherical +particle models including ellipsoids, 2d lines, and 3d triangles. + +[Install or un-install:] make yes-asphere make machine :pre -Make.py -p asphere -a machine :pre - -To un-install via make or Make.py: - make no-asphere make machine :pre -Make.py -p ^asphere -a machine :pre +[Supporting info:] -Supporting info: "Section 6.14"_Section_howto.html#howto_14, -"pair_style gayberne"_pair_gayberne.html, "pair_style -resquared"_pair_resquared.html, -"doc/PDF/pair_gayberne_extra.pdf"_PDF/pair_gayberne_extra.pdf, -"doc/PDF/pair_resquared_extra.pdf"_PDF/pair_resquared_extra.pdf, -examples/ASPHERE, examples/ellipse +src/ASPHERE: filenames -> commands +"Section 6.14"_Section_howto.html#howto_14 +"pair_style gayberne"_pair_gayberne.html +"pair_style resquared"_pair_resquared.html +"doc/PDF/pair_gayberne_extra.pdf"_PDF/pair_gayberne_extra.pdf +"doc/PDF/pair_resquared_extra.pdf"_PDF/pair_resquared_extra.pdf +examples/ASPHERE +examples/ellipse +http://lammps.sandia.gov/movies.html#line +http://lammps.sandia.gov/movies.html#tri :ul :line -BODY package :link(BODY),h5 +BODY package :link(BODY),h4 -Contents: Support for body-style particles. Computes, +[Contents:] + +Body-style particles with internal structure. Computes, time-integration fixes, pair styles, as well as the body styles themselves. See the "body"_body.html doc page for an overview. -To install via make or Make.py: +[Install or un-install:] make yes-body make machine :pre -Make.py -p body -a machine :pre - -To un-install via make or Make.py: - make no-body make machine :pre -Make.py -p ^body -a machine :pre +[Supporting info:] -Supporting info: "atom_style body"_atom_style.html, "body"_body.html, -"pair_style body"_pair_body.html, examples/body +src/BODY filenames -> commands +"body"_body.html +"atom_style body"_atom_style.html +"fix nve/body"_fix_nve_body.html +"pair_style body"_pair_body.html +examples/body :ul :line -CLASS2 package :link(CLASS2),h5 +CLASS2 package :link(CLASS2),h4 -Contents: Bond, angle, dihedral, improper, and pair styles for the -COMPASS CLASS2 molecular force field. +[Contents:] -To install via make or Make.py: +Bond, angle, dihedral, improper, and pair styles for the COMPASS +CLASS2 molecular force field. + +[Install or un-install:] make yes-class2 make machine :pre -Make.py -p class2 -a machine :pre - -To un-install via make or Make.py: - make no-class2 make machine :pre -Make.py -p ^class2 -a machine :pre +[Supporting info:] -Supporting info: "bond_style class2"_bond_class2.html, "angle_style -class2"_angle_class2.html, "dihedral_style -class2"_dihedral_class2.html, "improper_style -class2"_improper_class2.html, "pair_style lj/class2"_pair_class2.html +src/CLASS2: filenames -> commands +"bond_style class2"_bond_class2.html +"angle_style class2"_angle_class2.html +"dihedral_style class2"_dihedral_class2.html +"improper_style class2"_improper_class2.html +"pair_style lj/class2"_pair_class2.html :ul :line -COLLOID package :link(COLLOID),h5 +COLLOID package :link(COLLOID),h4 -Contents: Support for coarse-grained colloidal particles. Wall fix -and pair styles that implement colloidal interaction models for -finite-size particles. This includes the Fast Lubrication Dynamics -method for hydrodynamic interactions, which is a simplified -approximation to Stokesian dynamics. +[Contents:] -To install via make or Make.py: +Coarse-grained finite-size colloidal particles. Pair stayle and fix +wall styles for colloidal interactions. Includes the Fast Lubrication +Dynamics (FLD) method for hydrodynamic interactions, which is a +simplified approximation to Stokesian dynamics. + +[Authors:] This package includes Fast Lubrication Dynamics pair styles +which were created by Amit Kumar and Michael Bybee from Jonathan +Higdon's group at UIUC. + +[Install or un-install:] make yes-colloid make machine :pre -Make.py -p colloid -a machine :pre - -To un-install via make or Make.py: - make no-colloid make machine :pre -Make.py -p ^colloid -a machine :pre +[Supporting info:] -Supporting info: "fix wall/colloid"_fix_wall.html, "pair_style -colloid"_pair_colloid.html, "pair_style -yukawa/colloid"_pair_yukawa_colloid.html, "pair_style -brownian"_pair_brownian.html, "pair_style -lubricate"_pair_lubricate.html, "pair_style -lubricateU"_pair_lubricateU.html, examples/colloid, examples/srd +src/COLLOID: filenames -> commands +"fix wall/colloid"_fix_wall.html +"pair_style colloid"_pair_colloid.html +"pair_style yukawa/colloid"_pair_yukawa_colloid.html +"pair_style brownian"_pair_brownian.html +"pair_style lubricate"_pair_lubricate.html +"pair_style lubricateU"_pair_lubricateU.html +examples/colloid +examples/srd :ul :line -COMPRESS package :link(COMPRESS),h5 +COMPRESS package :link(COMPRESS),h4 -Contents: Support for compressed output of dump files via the zlib -compression library, using dump styles with a "gz" in their style -name. +[Contents:] -Building with the COMPRESS package assumes you have the zlib -compression library available on your system. The build uses the -lib/compress/Makefile.lammps file in the compile/link process. You -should only need to edit this file if the LAMMPS build cannot find the -zlib info it specifies. +Compressed output of dump files via the zlib compression library, +using dump styles with a "gz" in their style name. -To install via make or Make.py: +To use this package you must have the zlib compression library +available on your system. + +[Author:] Axel Kohlmeyer (Temple U). + +[Install or un-install:] + +Note that building with this package assumes you have the zlib +compression library available on your system. The LAMMPS build uses +the settings in the lib/compress/Makefile.lammps file in the +compile/link process. You should only need to edit this file if the +LAMMPS build fails on your system. make yes-compress make machine :pre -Make.py -p compress -a machine :pre - -To un-install via make or Make.py: - make no-compress make machine :pre -Make.py -p ^compress -a machine :pre +[Supporting info:] -Supporting info: src/COMPRESS/README, lib/compress/README, "dump -atom/gz"_dump.html, "dump cfg/gz"_dump.html, "dump -custom/gz"_dump.html, "dump xyz/gz"_dump.html +src/COMPRESS: filenames -> commands +src/COMPRESS/README +lib/compress/README +"dump atom/gz"_dump.html +"dump cfg/gz"_dump.html +"dump custom/gz"_dump.html +"dump xyz/gz"_dump.html :ul :line -CORESHELL package :link(CORESHELL),h5 +CORESHELL package :link(CORESHELL),h4 -Contents: Compute and pair styles that implement the adiabatic -core/shell model for polarizability. The compute temp/cs command -measures the temperature of a system with core/shell particles. The -pair styles augment Born, Buckingham, and Lennard-Jones styles with -core/shell capabilities. See "Section 6.26"_Section_howto.html#howto_26 -for an overview of how to use the package. +[Contents:] -To install via make or Make.py: +Compute and pair styles that implement the adiabatic core/shell model +for polarizability. The pair styles augment Born, Buckingham, and +Lennard-Jones styles with core/shell capabilities. The "compute +temp/cs"_compute_temp_cs.html command calculates the temperature of a +system with core/shell particles. See "Section +6.26"_Section_howto.html#howto_26 for an overview of how to use this +package. + +[Author:] Hendrik Heenen (Technical U of Munich). + +[Install or un-install:] make yes-coreshell make machine :pre -Make.py -p coreshell -a machine :pre - -To un-install via make or Make.py: - make no-coreshell make machine :pre -Make.py -p ^coreshell -a machine :pre +[Supporting info:] -Supporting info: "Section 6.26"_Section_howto.html#howto_26, -"compute temp/cs"_compute_temp_cs.html, -"pair_style born/coul/long/cs"_pair_cs.html, "pair_style -buck/coul/long/cs"_pair_cs.html, pair_style -lj/cut/coul/long/cs"_pair_lj.html, examples/coreshell +src/CORESHELL: filenames -> commands +"Section 6.26"_Section_howto.html#howto_26 +"Section 6.25"_Section_howto.html#howto_25 +"compute temp/cs"_compute_temp_cs.html +"pair_style born/coul/long/cs"_pair_cs.html +"pair_style buck/coul/long/cs"_pair_cs.html +"pair_style lj/cut/coul/long/cs"_pair_lj.html +examples/coreshell :ul :line -DIPOLE package :link(DIPOLE),h5 +DIPOLE package :link(DIPOLE),h4 -Contents: An atom style and several pair styles to support point -dipole models with short-range or long-range interactions. +[Contents:] -To install via make or Make.py: +An atom style and several pair styles for point dipole models with +short-range or long-range interactions. + +[Install or un-install:] make yes-dipole make machine :pre -Make.py -p dipole -a machine :pre - -To un-install via make or Make.py: - make no-dipole make machine :pre -Make.py -p ^dipole -a machine :pre +[Supporting info:] -Supporting info: "atom_style dipole"_atom_style.html, "pair_style -lj/cut/dipole/cut"_pair_dipole.html, "pair_style -lj/cut/dipole/long"_pair_dipole.html, "pair_style -lj/long/dipole/long"_pair_dipole.html, examples/dipole +src/DIPOLE: filenames -> commands +"atom_style dipole"_atom_style.html +"pair_style lj/cut/dipole/cut"_pair_dipole.html +"pair_style lj/cut/dipole/long"_pair_dipole.html +"pair_style lj/long/dipole/long"_pair_dipole.html +examples/dipole :ul :line -GPU package :link(GPU),h5 +GPU package :link(GPU),h4 -Contents: Dozens of pair styles and a version of the PPPM long-range -Coulombic solver for NVIDIA GPUs. All of them have a "gpu" in their -style name. "Section 5.3.1"_accelerate_gpu.html gives +[Contents:] + +Dozens of pair styles and a version of the PPPM long-range Coulombic +solver optimized for NVIDIA GPUs. All such styles have a "gpu" as a +suffix in their style name. "Section 5.3.1"_accelerate_gpu.html gives details of what hardware and Cuda software is required on your system, -and how to build and use this package. See the KOKKOS package, which -also has GPU-enabled styles. +and details on how to build and use this package. Its styles can be +invoked at run time via the "-sf gpu" or "-suffix gpu" "command-line +switches"_Section_start.html#start_7. See also the "KOKKOS"_#KOKKOS +package, which has GPU-enabled styles. -Building LAMMPS with the GPU package requires first building the GPU -library itself, which is a set of C and Cuda files in lib/gpu. -Details of how to do this are in lib/gpu/README. As illustrated -below, perform a "make" using one of the Makefile.machine files in -lib/gpu which should create a lib/reax/libgpu.a file. -Makefile.linux.* and Makefile.xk7 are examples for different -platforms. There are 3 important settings in the Makefile.machine you -use: +[Authors:] Mike Brown (Intel) while at Sandia and ORNL and Trung Nguyen +(Northwestern U) while at ORNL. + +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the GPU +library in lib/gpu from a set of provided C and Cuda files. You can +do this manually if you prefer; follow the instructions in +lib/gpu/README. You can also do it in one step from the lammps/src +dir, using a command like these, which simply invoke the +lib/gpu/Install.py script with the specified args: + +make lib-gpu # print help message +make lib-gpu args="-m" # build GPU library with default Makefile.linux +make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision +make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU library + +Note that this procedure starts with one of the existing +Makefile.machine files in lib/gpu. It allows you to alter 4 important +settings in that Makefile, via the -h, -a, -p, -e switches, +and save the new Makefile, if desired: CUDA_HOME = where NVIDIA Cuda software is installed on your system -CUDA_ARCH = appropriate to your GPU hardware -CUDA_PREC = precision (double, mixed, single) you desire :ul +CUDA_ARCH = what GPU hardware you have (see help message for details) +CUDA_PRECISION = precision (double, mixed, single) +EXTRAMAKE = which Makefile.lammps.* file to copy to Makefile.lammps :ul -See example Makefile.machine files in lib/gpu for the syntax of these -settings. See lib/gpu/Makefile.linux.double for ARCH settings for -various NVIDIA GPUs. The "make" also creates a -lib/gpu/Makefile.lammps file. This file has settings that enable -LAMMPS to link with Cuda libraries. If the settings in -Makefile.lammps for your machine are not correct, the LAMMPS link will -fail. Note that the Make.py script has a "-gpu" option to allow the -GPU library (with several of its options) and LAMMPS to be built in -one step, with Type "python src/Make.py -h -gpu" to see the details. +If the library build is successful, 2 files should be created: +lib/gpu/libgpu.a and lib/gpu/Makefile.lammps. The latter has settings +that enable LAMMPS to link with Cuda libraries. If the settings in +Makefile.lammps for your machine are not correct, the LAMMPS build +will fail. -To install via make or Make.py: +You can then install/un-install the package and build LAMMPS in the +usual manner: -cd ~/lammps/lib/gpu -make -f Makefile.linux.mixed # for example -cd ~/lammps/src make yes-gpu make machine :pre -Make.py -p gpu -gpu mode=mixed arch=35 -a machine :pre - -To un-install via make or Make.py: - make no-gpu make machine :pre -Make.py -p ^gpu -a machine :pre +NOTE: If you re-build the GPU library in lib/gpu, you should always +un-install the GPU package, then re-install it and re-build LAMMPS. +This is because the compilation of files in the GPU package use the +library settings from the lib/gpu/Makefile.machine used to build the +GPU library. -Supporting info: src/GPU/README, lib/gpu/README, -"Section 5.3"_Section_accelerate.html#acc_3, -"Section 5.3.1"_accelerate_gpu.html, -Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 -for any pair style listed with a (g), -"kspace_style"_kspace_style.html, "package gpu"_package.html, -examples/accelerate, bench/FERMI, bench/KEPLER +[Supporting info:] + +src/GPU: filenames -> commands +src/GPU/README +lib/gpu/README +"Section 5.3"_Section_accelerate.html#acc_3 +"Section 5.3.1"_accelerate_gpu.html +"Section 2.7 -sf gpu"_Section_start.html#start_7 +"Section 2.7 -pk gpu"_Section_start.html#start_7 +"package gpu"_package.html +Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (g) +"Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul :line -GRANULAR package :link(GRANULAR),h5 +GRANULAR package :link(GRANULAR),h4 -Contents: Fixes and pair styles that support models of finite-size -granular particles, which interact with each other and boundaries via -frictional and dissipative potentials. +[Contents:] -To install via make or Make.py: +Pair styles and fixes for finite-size granular particles, which +interact with each other and boundaries via frictional and dissipative +potentials. + +[Install or un-install:] make yes-granular make machine :pre -Make.py -p granular -a machine :pre - -To un-install via make or Make.py: - make no-granular make machine :pre -Make.py -p ^granular -a machine :pre +[Supporting info:] -Supporting info: "Section 6.6"_Section_howto.html#howto_6, "fix -pour"_fix_pour.html, "fix wall/gran"_fix_wall_gran.html, "pair_style -gran/hooke"_pair_gran.html, "pair_style -gran/hertz/history"_pair_gran.html, examples/pour, bench/in.chute +src/GRANULAR: filenames -> commands +"Section 6.6"_Section_howto.html#howto_6, +"fix pour"_fix_pour.html +"fix wall/gran"_fix_wall_gran.html +"pair_style gran/hooke"_pair_gran.html +"pair_style gran/hertz/history"_pair_gran.html +examples/granregion +examples/pour +bench/in.chute +http://lammps.sandia.gov/pictures.html#jamming +http://lammps.sandia.gov/movies.html#hopper +http://lammps.sandia.gov/movies.html#dem +http://lammps.sandia.gov/movies.html#brazil +http://lammps.sandia.gov/movies.html#granregion :ul :line -KIM package :link(KIM),h5 +KIM package :link(KIM),h4 -Contents: A pair style that interfaces to the Knowledge Base for -Interatomic Models (KIM) repository of interatomic potentials, so that -KIM potentials can be used in a LAMMPS simulation. +[Contents:] -To build LAMMPS with the KIM package you must have previously -installed the KIM API (library) on your system. The lib/kim/README -file explains how to download and install KIM. Building with the KIM -package also uses the lib/kim/Makefile.lammps file in the compile/link -process. You should not need to edit this file. +A "pair_style kim"_pair_kim.html command which is a wrapper on the +Knowledge Base for Interatomic Models (KIM) repository of interatomic +potentials, enabling any of them to be used in LAMMPS simulations. -To install via make or Make.py: +To use this package you must have the KIM library available on your +system. + +Information about the KIM project can be found at its website: +https://openkim.org. The KIM project is led by Ellad Tadmor and Ryan +Elliott (U Minnesota) and James Sethna (Cornell U). + +[Authors:] Ryan Elliott (U Minnesota) is the main developer for the KIM +API which the "pair_style kim"_pair_kim.html command uses. He +developed the pair style in collaboration with Valeriu Smirichinski (U +Minnesota). + +[Install or un-install:] + +Using this package requires the KIM library and its models +(interatomic potentials) to be downloaded and installed on your +system. The library can be downloaded and built in lib/kim or +elsewhere on your system. Details of the download, build, and install +process for KIM are given in the lib/kim/README file. + +Once that process is complete, you can then install/un-install the +package and build LAMMPS in the usual manner: make yes-kim make machine :pre -Make.py -p kim -a machine :pre - -To un-install via make or Make.py: - make no-kim make machine :pre -Make.py -p ^kim -a machine :pre +[Supporting info:] -Supporting info: src/KIM/README, lib/kim/README, "pair_style -kim"_pair_kim.html, examples/kim +src/KIM: filenames -> commands +src/KIM/README +lib/kim/README +"pair_style kim"_pair_kim.html +examples/kim :ul :line -KOKKOS package :link(KOKKOS),h5 +KOKKOS package :link(KOKKOS),h4 -Contents: Dozens of atom, pair, bond, angle, dihedral, improper styles -which run with the Kokkos library to provide optimization for -multicore CPUs (via OpenMP), NVIDIA GPUs, or the Intel Xeon Phi (in -native mode). All of them have a "kk" in their style name. "Section -5.3.3"_accelerate_kokkos.html gives details of what -hardware and software is required on your system, and how to build and -use this package. See the GPU, OPT, USER-INTEL, USER-OMP packages, -which also provide optimizations for the same range of hardware. +[Contents:] -Building with the KOKKOS package requires choosing which of 3 hardware -options you are optimizing for: CPU acceleration via OpenMP, GPU -acceleration, or Intel Xeon Phi. (You can build multiple times to -create LAMMPS executables for different hardware.) It also requires a -C++11 compatible compiler. For GPUs, the NVIDIA "nvcc" compiler is -used, and an appropriate KOKKOS_ARCH setting should be made in your -Makefile.machine for your GPU hardware and NVIDIA software. +Dozens of atom, pair, bond, angle, dihedral, improper, fix, compute +styles adapted to compile using the Kokkos library which can convert +them to OpenMP or Cuda code so that they run efficiently on multicore +CPUs, KNLs, or GPUs. All the styles have a "kk" as a suffix in their +style name. "Section 5.3.3"_accelerate_kokkos.html gives details of +what hardware and software is required on your system, and how to +build and use this package. Its styles can be invoked at run time via +the "-sf kk" or "-suffix kk" "command-line +switches"_Section_start.html#start_7. Also see the "GPU"_#GPU, +"OPT"_#OPT, "USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP +packages, which have styles optimized for CPUs, KNLs, and GPUs. -The simplest way to do this is to use Makefile.kokkos_cuda or -Makefile.kokkos_omp or Makefile.kokkos_phi in src/MAKE/OPTIONS, via -"make kokkos_cuda" or "make kokkos_omp" or "make kokkos_phi". (Check -the KOKKOS_ARCH setting in Makefile.kokkos_cuda), Or, as illustrated -below, you can use the Make.py script with its "-kokkos" option to -choose which hardware to build for. Type "python src/Make.py -h --kokkos" to see the details. If these methods do not work on your -system, you will need to read the "Section 5.3.3"_accelerate_kokkos.html -doc page for details of what Makefile.machine settings are needed. +You must have a C++11 compatible compiler to use this package. -To install via make or Make.py for each of 3 hardware options: +[Authors:] The KOKKOS package was created primarily by Christian Trott +and Stan Moore (Sandia), with contributions from other folks as well. +It uses the open-source "Kokkos library"_https://github.com/kokkos +which was developed by Carter Edwards, Christian Trott, and others at +Sandia, and which is included in the LAMMPS distribution in +lib/kokkos. + +[Install or un-install:] + +For the KOKKOS package, you have 3 choices when building. You can +build with either CPU or KNL or GPU support. Each choice requires +additional settings in your Makefile.machine for the KOKKOS_DEVICES +and KOKKOS_ARCH settings. See the src/MAKE/OPTIONS/Makefile.kokkos* +files for examples. + +For multicore CPUs using OpenMP: + +KOKKOS_DEVICES = OpenMP +KOKKOS_ARCH = HSW # HSW = Haswell, SNB = SandyBridge, BDW = Broadwell, etc + +For Intel KNLs using OpenMP: + +KOKKOS_DEVICES = OpenMP +KOKKOS_ARCH = KNL + +For NVIDIA GPUs using Cuda: + +KOKKOS_DEVICES = Cuda +KOKKOS_ARCH = Pascal60,Power8 # P100 hosted by an IBM Power8, etc +KOKKOS_ARCH = Kepler37,Power8 # K80 hosted by an IBM Power8, etc + +For GPUs, you also need these 2 lines in your Makefile.machine before +the CC line is defined, in this case for use with OpenMPI mpicxx. The +2 lines define a nvcc wrapper compiler, which will use nvcc for +compiling Cuda files or use a C++ compiler for non-Kokkos, non-Cuda +files. + +KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) +export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper +CC = mpicxx + +Once you have an appropriate Makefile.machine, you can +install/un-install the package and build LAMMPS in the usual manner. +Note that you cannot build one executable to run on multiple hardware +targets (CPU or KNL or GPU). You need to build LAMMPS once for each +hardware target, to produce a separate executable. Also note that we +do not recommend building with other acceleration packages installed +(GPU, OPT, USER-INTEL, USER-OMP) when also building with KOKKOS. make yes-kokkos -make kokkos_omp # for CPUs with OpenMP -make kokkos_cuda # for GPUs, check the KOKKOS_ARCH setting in Makefile.kokkos_cuda -make kokkos_phi # for Xeon Phis :pre - -Make.py -p kokkos -kokkos omp -a machine # for CPUs with OpenMP -Make.py -p kokkos -kokkos cuda arch=35 -a machine # for GPUs of style arch -Make.py -p kokkos -kokkos phi -a machine # for Xeon Phis - -To un-install via make or Make.py: - +make machine :pre + make no-kokkos make machine :pre -Make.py -p ^kokkos -a machine :pre +[Supporting info:] -Supporting info: src/KOKKOS/README, lib/kokkos/README, -"Section 5.3"_Section_accelerate.html#acc_3, -"Section 5.3.3"_accelerate_kokkos.html, -Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 -for any pair style listed with a (k), "package kokkos"_package.html, -examples/accelerate, bench/FERMI, bench/KEPLER +src/KOKKOS: filenames -> commands +src/KOKKOS/README +lib/kokkos/README +"Section 5.3"_Section_accelerate.html#acc_3 +"Section 5.3.3"_accelerate_kokkos.html +"Section 2.7 -k on ..."_Section_start.html#start_7 +"Section 2.7 -sf kk"_Section_start.html#start_7 +"Section 2.7 -pk kokkos"_Section_start.html#start_7 +"package kokkos"_package.html +Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (k) +"Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul :line -KSPACE package :link(KSPACE),h5 +KSPACE package :link(KSPACE),h4 -Contents: A variety of long-range Coulombic solvers, and pair styles -which compute the corresponding short-range portion of the pairwise -Coulombic interactions. These include Ewald, particle-particle -particle-mesh (PPPM), and multilevel summation method (MSM) solvers. +[Contents:] -Building with the KSPACE package requires a 1d FFT library be present -on your system for use by the PPPM solvers. This can be the KISS FFT -library provided with LAMMPS, or 3rd party libraries like FFTW or a +A variety of long-range Coulombic solvers, as well as pair styles +which compute the corresponding short-range pairwise Coulombic +interactions. These include Ewald, particle-particle particle-mesh +(PPPM), and multilevel summation method (MSM) solvers. + +[Install or un-install:] + +Building with this package requires a 1d FFT library be present on +your system for use by the PPPM solvers. This can be the KISS FFT +library provided with LAMMPS, 3rd party libraries like FFTW, or a vendor-supplied FFT library. See step 6 of "Section -2.2.2"_Section_start.html#start_2_2 of the manual for details of how -to select different FFT options in your machine Makefile. The Make.py -tool has an "-fft" option which can insert these settings into your -machine Makefile automatically. Type "python src/Make.py -h -fft" to -see the details. - -To install via make or Make.py: +2.2.2"_Section_start.html#start_2_2 of the manual for details on how +to select different FFT options in your machine Makefile. make yes-kspace make machine :pre -Make.py -p kspace -a machine :pre - -To un-install via make or Make.py: - make no-kspace make machine :pre -Make.py -p ^kspace -a machine :pre +[Supporting info:] -Supporting info: "kspace_style"_kspace_style.html, -"doc/PDF/kspace.pdf"_PDF/kspace.pdf, -"Section 6.7"_Section_howto.html#howto_7, -"Section 6.8"_Section_howto.html#howto_8, -"Section 6.9"_Section_howto.html#howto_9, -"pair_style coul"_pair_coul.html, other pair style command doc pages -which have "long" or "msm" in their style name, -examples/peptide, bench/in.rhodo +src/KSPACE: filenames -> commands +"kspace_style"_kspace_style.html +"doc/PDF/kspace.pdf"_PDF/kspace.pdf +"Section 6.7"_Section_howto.html#howto_7 +"Section 6.8"_Section_howto.html#howto_8 +"Section 6.9"_Section_howto.html#howto_9 +"pair_style coul"_pair_coul.html +Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 with "long" or "msm" in pair style name +examples/peptide +bench/in.rhodo :ul :line -MANYBODY package :link(MANYBODY),h5 +MANYBODY package :link(MANYBODY),h4 -Contents: A variety of many-body and bond-order potentials. These -include (AI)REBO, EAM, EIM, BOP, Stillinger-Weber, and Tersoff -potentials. Do a directory listing, "ls src/MANYBODY", to see -the full list. +[Contents:] -To install via make or Make.py: +A variety of manybody and bond-order potentials. These include +(AI)REBO, BOP, EAM, EIM, Stillinger-Weber, and Tersoff potentials. + +[Install or un-install:] make yes-manybody make machine :pre -Make.py -p manybody -a machine :pre - -To un-install via make or Make.py: - make no-manybody make machine :pre -Make.py -p ^manybody -a machine :pre +[Supporting info:] -Supporting info: - -Examples: Pair Styles section of "Section -3.5"_Section_commands.html#cmd_5, examples/comb, examples/eim, -examples/nb3d, examples/vashishta +src/MANYBODY: filenames -> commands +Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 +examples/comb +examples/eim +examples/nb3d +examples/shear +examples/streitz +examples/vashishta +bench/in.eam :ul :line -MC package :link(MC),h5 +MC package :link(MC),h4 -Contents: Several fixes and a pair style that have Monte Carlo (MC) or -MC-like attributes. These include fixes for creating, breaking, and -swapping bonds, and for performing atomic swaps and grand-canonical MC -in conjuction with dynamics. +[Contents:] -To install via make or Make.py: +Several fixes and a pair style that have Monte Carlo (MC) or MC-like +attributes. These include fixes for creating, breaking, and swapping +bonds, for performing atomic swaps, and performing grand-canonical MC +(GCMC) in conjuction with dynamics. + +[Install or un-install:] make yes-mc make machine :pre -Make.py -p mc -a machine :pre - -To un-install via make or Make.py: - make no-mc make machine :pre -Make.py -p ^mc -a machine :pre +[Supporting info:] -Supporting info: "fix atom/swap"_fix_atom_swap.html, "fix -bond/break"_fix_bond_break.html, "fix -bond/create"_fix_bond_create.html, "fix bond/swap"_fix_bond_swap.html, -"fix gcmc"_fix_gcmc.html, "pair_style dsmc"_pair_dsmc.html +src/MC: filenames -> commands +"fix atom/swap"_fix_atom_swap.html +"fix bond/break"_fix_bond_break.html +"fix bond/create"_fix_bond_create.html +"fix bond/swap"_fix_bond_swap.html +"fix gcmc"_fix_gcmc.html +"pair_style dsmc"_pair_dsmc.html +http://lammps.sandia.gov/movies.html#gcmc :ul :line -MEAM package :link(MEAM),h5 +MEAM package :link(MEAM),h4 -Contents: A pair style for the modified embedded atom (MEAM) -potential. +[Contents:] -Building LAMMPS with the MEAM package requires first building the MEAM -library itself, which is a set of Fortran 95 files in lib/meam. -Details of how to do this are in lib/meam/README. As illustrated -below, perform a "make" using one of the Makefile.machine files in -lib/meam which should create a lib/meam/libmeam.a file. -Makefile.gfortran and Makefile.ifort are examples for the GNU Fortran -and Intel Fortran compilers. The "make" also copies a -lib/meam/Makefile.lammps.machine file to lib/meam/Makefile.lammps. -This file has settings that enable the C++ compiler used to build -LAMMPS to link with a Fortran library (typically the 2 compilers to be -consistent e.g. both Intel compilers, or both GNU compilers). If the -settings in Makefile.lammps for your compilers and machine are not -correct, the LAMMPS link will fail. Note that the Make.py script has -a "-meam" option to allow the MEAM library and LAMMPS to be built in -one step. Type "python src/Make.py -h -meam" to see the details. +A pair style for the modified embedded atom (MEAM) potential. -NOTE: The MEAM potential can run dramatically faster if built with the -Intel Fortran compiler, rather than the GNU Fortran compiler. +[Author:] Greg Wagner (Northwestern U) while at Sandia. -To install via make or Make.py: +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the +MEAM library in lib/meam. You can do this manually if you prefer; +follow the instructions in lib/meam/README. You can also do it in one +step from the lammps/src dir, using a command like these, which simply +invoke the lib/meam/Install.py script with the specified args: + +make lib-meam # print help message +make lib-meam args="-m gfortran" # build with GNU Fortran compiler +make lib-meam args="-m ifort" # build with Intel ifort compiler :pre + +The build should produce two files: lib/meam/libmeam.a and +lib/meam/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to link C++ (LAMMPS) with +Fortran (MEAM library). Typically the two compilers used for LAMMPS +and the MEAM library need to be consistent (e.g. both Intel or both +GNU compilers). If necessary, you can edit/create a new +lib/meam/Makefile.machine file for your system, which should define an +EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine +file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: -cd ~/lammps/lib/meam -make -f Makefile.gfortran # for example -cd ~/lammps/src make yes-meam make machine :pre -Make.py -p meam -meam make=gfortran -a machine :pre - -To un-install via make or Make.py: - make no-meam make machine :pre -Make.py -p ^meam -a machine :pre +NOTE: You should test building the MEAM library with both the Intel +and GNU compilers to see if a simulation runs faster with one versus +the other on your system. -Supporting info: lib/meam/README, "pair_style meam"_pair_meam.html, -examples/meam +[Supporting info:] + +src/MEAM: filenames -> commands +src/meam/README +lib/meam/README +"pair_style meam"_pair_meam.html +examples/meam :ul :line -MISC package :link(MISC),h5 +MISC package :link(MISC),h4 -Contents: A variety of computes, fixes, and pair styles that are not -commonly used, but don't align with other packages. Do a directory +[Contents:] + +A variety of compute, fix, pair, dump styles with specialized +capabilities that don't align with other packages. Do a directory listing, "ls src/MISC", to see the list of commands. -To install via make or Make.py: +[Install or un-install:] make yes-misc make machine :pre -Make.py -p misc -a machine :pre - -To un-install via make or Make.py: - make no-misc make machine :pre -Make.py -p ^misc -a machine :pre +[Supporting info:] -Supporting info: "compute ti"_compute_ti.html, "fix -evaporate"_fix_evaporate.html, "fix tmm"_fix_ttm.html, "fix -viscosity"_fix_viscosity.html, examples/misc +src/MISC: filenames -> commands +"compute ti"_compute_ti.html +"fix evaporate"_fix_evaporate.html +"fix orient/fcc"_fix_orient.html +"fix ttm"_fix_ttm.html +"fix thermal/conductivity"_fix_thermal_conductivity.html +"fix viscosity"_fix_viscosity.html +examples/KAPPA +examples/VISCOSITY +http://lammps.sandia.gov/pictures.html#ttm +http://lammps.sandia.gov/movies.html#evaporation :ul :line -MOLECULE package :link(MOLECULE),h5 +MOLECULE package :link(MOLECULE),h4 -Contents: A large number of atom, pair, bond, angle, dihedral, -improper styles that are used to model molecular systems with fixed -covalent bonds. The pair styles include terms for the Dreiding -(hydrogen-bonding) and CHARMM force fields, and TIP4P water model. +[Contents:] -To install via make or Make.py: +A large number of atom, pair, bond, angle, dihedral, improper styles +that are used to model molecular systems with fixed covalent bonds. +The pair styles include the Dreiding (hydrogen-bonding) and CHARMM +force fields, and a TIP4P water model. + +[Install or un-install:] make yes-molecule make machine :pre -Make.py -p molecule -a machine :pre - -To un-install via make or Make.py: - make no-molecule make machine :pre -Make.py -p ^molecule -a machine :pre +[Supporting info:] -Supporting info:"atom_style"_atom_style.html, -"bond_style"_bond_style.html, "angle_style"_angle_style.html, -"dihedral_style"_dihedral_style.html, -"improper_style"_improper_style.html, "pair_style -hbond/dreiding/lj"_pair_hbond_dreiding.html, "pair_style -lj/charmm/coul/charmm"_pair_charmm.html, -"Section 6.3"_Section_howto.html#howto_3, -examples/micelle, examples/peptide, bench/in.chain, bench/in.rhodo +src/MOLECULE: filenames -> commands +"atom_style"_atom_style.html +"bond_style"_bond_style.html +"angle_style"_angle_style.html +"dihedral_style"_dihedral_style.html +"improper_style"_improper_style.html +"pair_style hbond/dreiding/lj"_pair_hbond_dreiding.html +"pair_style lj/charmm/coul/charmm"_pair_charmm.html +"Section 6.3"_Section_howto.html#howto_3 +examples/cmap +examples/dreiding +examples/micelle, +examples/peptide +bench/in.chain +bench/in.rhodo :ul :line -MPIIO package :link(MPIIO),h5 +MPIIO package :link(MPIIO),h4 -Contents: Support for parallel output/input of dump and restart files -via the MPIIO library, which is part of the standard message-passing -interface (MPI) library. It adds "dump styles"_dump.html with a -"mpiio" in their style name. Restart files with an ".mpiio" suffix -are also written and read in parallel. +[Contents:] -To install via make or Make.py: +Support for parallel output/input of dump and restart files via the +MPIIO library. It adds "dump styles"_dump.html with a "mpiio" in +their style name. Restart files with an ".mpiio" suffix are also +written and read in parallel. +[Install or un-install:] + +Note that MPIIO is part of the standard message-passing interface +(MPI) library, so you should not need any additional compiler or link +settings, beyond what LAMMPS normally uses for MPI on your system. + make yes-mpiio make machine :pre - -Make.py -p mpiio -a machine :pre - -To un-install via make or Make.py: - + make no-mpiio make machine :pre + +[Supporting info:] -Make.py -p ^mpiio -a machine :pre - -Supporting info: "dump"_dump.html, "restart"_restart.html, -"write_restart"_write_restart.html, "read_restart"_read_restart.html +src/MPIIO: filenames -> commands +"dump"_dump.html +"restart"_restart.html +"write_restart"_write_restart.html +"read_restart"_read_restart.html :ul :line + +MSCG package :link(mscg),h4 -OPT package :link(OPT),h5 +[Contents:] -Contents: A handful of pair styles with an "opt" in their style name -which are optimized for improved CPU performance on single or multiple -cores. These include EAM, LJ, CHARMM, and Morse potentials. "Section -5.3.5"_accelerate_opt.html gives details of how to build and -use this package. See the KOKKOS, USER-INTEL, and USER-OMP packages, -which also have styles optimized for CPU performance. +A "fix mscg"_fix_mscg.html command which can parameterize a +Mulit-Scale Coarse-Graining (MSCG) model using the open-source "MS-CG +library"_mscg_home. -Some C++ compilers, like the Intel compiler, require the compile flag -"-restrict" to build LAMMPS with the OPT package. It should be added -to the CCFLAGS line of your Makefile.machine. Or use Makefile.opt in -src/MAKE/OPTIONS, via "make opt". For compilers that use the flag, -the Make.py command adds it automatically to the Makefile.auto file it -creates and uses. +:link(mscg_home,https://github.com/uchicago-voth/MSCG-release) -To install via make or Make.py: +To use this package you must have the MS-CG library available on your +system. + +[Authors:] The fix was written by Lauren Abbott (Sandia). The MS-CG +library was developed by Jacob Wagner in Greg Voth's group at the +University of Chicago. + +[Install or un-install:] + +Before building LAMMPS with this package, you must first download and +build the MS-CG library. Building the MS-CG library and using it from +LAMMPS requires a C++11 compatible compiler, and that LAPACK and GSL +(GNU Scientific Library) libraries be installed on your machine. See +the lib/mscg/README and MSCG/Install files for more details. + +Assuming these libraries are in place, you can do the download and +build of MS-CG manually if you prefer; follow the instructions in +lib/mscg/README. You can also do it in one step from the lammps/src +dir, using a command like these, which simply invoke the +lib/mscg/Install.py script with the specified args: + +make lib-mscg # print help message +make lib-mscg args="-g -b -l" # download and build in default lib/mscg/MSCG-release-master +make lib-mscg args="-h . MSCG -g -b -l" # download and build in lib/mscg/MSCG +make lib-mscg args="-h ~ MSCG -g -b -l" # download and build in ~/mscg :pre + +Note that the final -l switch is to create 2 symbolic (soft) links, +"includelink" and "liblink", in lib/mscg to point to the MS-CG src +dir. When LAMMPS builds it will use these links. You should not need +to edit the lib/mscg/Makefile.lammps file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-mscg +make machine :pre + +make no-mscg +make machine :pre + +[Supporting info:] + +src/MSCG: filenames -> commands +src/MSCG/README +lib/mscg/README +examples/mscg :ul + +:line + +OPT package :link(OPT),h4 + +[Contents:] + +A handful of pair styles which are optimized for improved CPU +performance on single or multiple cores. These include EAM, LJ, +CHARMM, and Morse potentials. The styles have an "opt" suffix in +their style name. "Section 5.3.5"_accelerate_opt.html gives details +of how to build and use this package. Its styles can be invoked at +run time via the "-sf opt" or "-suffix opt" "command-line +switches"_Section_start.html#start_7. See also the "KOKKOS"_#KOKKOS, +"USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP packages, which +have styles optimized for CPU performance. + +[Authors:] James Fischer (High Performance Technologies), David Richie, +and Vincent Natoli (Stone Ridge Technolgy). + +[Install or un-install:] make yes-opt make machine :pre -Make.py -p opt -a machine :pre - -To un-install via make or Make.py: - make no-opt make machine :pre -Make.py -p ^opt -a machine :pre +NOTE: The compile flag "-restrict" must be used to build LAMMPS with +the OPT package. It should be added to the CCFLAGS line of your +Makefile.machine. See Makefile.opt in src/MAKE/OPTIONS for an +example. -Supporting info: "Section 5.3"_Section_accelerate.html#acc_3, -"Section 5.3.5"_accelerate_opt.html, Pair Styles section of -"Section 3.5"_Section_commands.html#cmd_5 for any pair style -listed with an (t), examples/accelerate, bench/KEPLER +CCFLAGS: add -restrict :ul + +[Supporting info:] + +src/OPT: filenames -> commands +"Section 5.3"_Section_accelerate.html#acc_3 +"Section 5.3.5"_accelerate_opt.html +"Section 2.7 -sf opt"_Section_start.html#start_7 +Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (t) +"Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul :line -PERI package :link(PERI),h5 +PERI package :link(PERI),h4 -Contents: Support for the Peridynamics method, a particle-based -meshless continuum model. The package includes an atom style, several -computes which calculate diagnostics, and several Peridynamic pair -styles which implement different materials models. +[Contents:] -To install via make or Make.py: +An atom style, several pair styles which implement different +Peridynamics materials models, and several computes which calculate +diagnostics. Peridynamics is a a particle-based meshless continuum +model. + +[Authors:] The original package was created by Mike Parks (Sandia). +Additional Peridynamics models were added by Rezwanur Rahman and John +Foster (UTSA). + +[Install or un-install:] make yes-peri make machine :pre -Make.py -p peri -a machine :pre - -To un-install via make or Make.py: - make no-peri make machine :pre -Make.py -p ^peri -a machine :pre +[Supporting info:] -Supporting info: -"doc/PDF/PDLammps_overview.pdf"_PDF/PDLammps_overview.pdf, -"doc/PDF/PDLammps_EPS.pdf"_PDF/PDLammps_EPS.pdf, -"doc/PDF/PDLammps_VES.pdf"_PDF/PDLammps_VES.pdf, "atom_style -peri"_atom_style.html, "compute damage/atom"_compute_damage_atom.html, -"pair_style peri/pmb"_pair_peri.html, examples/peri +src/PERI: filenames -> commands +"doc/PDF/PDLammps_overview.pdf"_PDF/PDLammps_overview.pdf +"doc/PDF/PDLammps_EPS.pdf"_PDF/PDLammps_EPS.pdf +"doc/PDF/PDLammps_VES.pdf"_PDF/PDLammps_VES.pdf +"atom_style peri"_atom_style.html +"pair_style peri/*"_pair_peri.html +"compute damage/atom"_compute_damage_atom.html +"compute plasticity/atom"_compute_plasticity_atom.html +examples/peri +http://lammps.sandia.gov/movies.html#peri :ul :line -POEMS package :link(POEMS),h5 +POEMS package :link(POEMS),h4 -Contents: A fix that wraps the Parallelizable Open source Efficient -Multibody Software (POEMS) librar, which is able to simulate the -dynamics of articulated body systems. These are systems with multiple -rigid bodies (collections of atoms or particles) whose motion is -coupled by connections at hinge points. +[Contents:] -Building LAMMPS with the POEMS package requires first building the -POEMS library itself, which is a set of C++ files in lib/poems. -Details of how to do this are in lib/poems/README. As illustrated -below, perform a "make" using one of the Makefile.machine files in -lib/poems which should create a lib/meam/libpoems.a file. -Makefile.g++ and Makefile.icc are examples for the GNU and Intel C++ -compilers. The "make" also creates a lib/poems/Makefile.lammps file -which you should not need to change. Note the Make.py script has a -"-poems" option to allow the POEMS library and LAMMPS to be built in -one step. Type "python src/Make.py -h -poems" to see the details. +A fix that wraps the Parallelizable Open source Efficient Multibody +Software (POEMS) library, which is able to simulate the dynamics of +articulated body systems. These are systems with multiple rigid +bodies (collections of particles) whose motion is coupled by +connections at hinge points. -To install via make or Make.py: +[Author:] Rudra Mukherjee (JPL) while at RPI. + +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the +POEMS library in lib/poems. You can do this manually if you prefer; +follow the instructions in lib/poems/README. You can also do it in +one step from the lammps/src dir, using a command like these, which +simply invoke the lib/poems/Install.py script with the specified args: + +make lib-poems # print help message +make lib-poems args="-m g++" # build with GNU g++ compiler +make lib-poems args="-m icc" # build with Intel icc compiler :pre + +The build should produce two files: lib/poems/libpoems.a and +lib/poems/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the +POEMS library (though typically the settings are just blank). If +necessary, you can edit/create a new lib/poems/Makefile.machine file +for your system, which should define an EXTRAMAKE variable to specify +a corresponding Makefile.lammps.machine file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: -cd ~/lammps/lib/poems -make -f Makefile.g++ # for example -cd ~/lammps/src make yes-poems make machine :pre -Make.py -p poems -poems make=g++ -a machine :pre - -To un-install via make or Make.py: - make no-meam make machine :pre -Make.py -p ^meam -a machine :pre +[Supporting info:] -Supporting info: src/POEMS/README, lib/poems/README, -"fix poems"_fix_poems.html, examples/rigid +src/POEMS: filenames -> commands +src/POEMS/README +lib/poems/README +"fix poems"_fix_poems.html +examples/rigid :ul :line -PYTHON package :link(PYTHON),h5 +PYTHON package :link(PYTHON),h4 -Contents: A "python"_python.html command which allow you to execute -Python code from a LAMMPS input script. The code can be in a separate -file or embedded in the input script itself. See "Section +[Contents:] + +A "python"_python.html command which allow you to execute Python code +from a LAMMPS input script. The code can be in a separate file or +embedded in the input script itself. See "Section 11.2"_Section_python.html#py_2 for an overview of using Python from -LAMMPS and for other ways to use LAMMPS and Python together. +LAMMPS in this manner and the entire section for other ways to use +LAMMPS and Python together. -Building with the PYTHON package assumes you have a Python shared -library available on your system, which needs to be a Python 2 -version, 2.6 or later. Python 3 is not yet supported. The build uses -the contents of the lib/python/Makefile.lammps file to find all the Python -files required in the build/link process. See the lib/python/README -file if the settings in that file do not work on your system. Note -that the Make.py script has a "-python" option to allow an alternate -lib/python/Makefile.lammps file to be specified and LAMMPS to be built -in one step. Type "python src/Make.py -h -python" to see the details. - -To install via make or Make.py: +[Install or un-install:] make yes-python make machine :pre -Make.py -p python -a machine :pre - -To un-install via make or Make.py: - make no-python make machine :pre -Make.py -p ^python -a machine :pre +NOTE: Building with the PYTHON package assumes you have a Python +shared library available on your system, which needs to be a Python 2 +version, 2.6 or later. Python 3 is not yet supported. See the +lib/python/README for more details. Note that the build uses the +lib/python/Makefile.lammps file in the compile/link process. You +should only need to create a new Makefile.lammps.* file (and copy it +to Makefile.lammps) if the LAMMPS build fails. -Supporting info: examples/python +[Supporting info:] + +src/PYTHON: filenames -> commands +"Section 11"_Section_python.html +lib/python/README +examples/python :ul :line -QEQ package :link(QEQ),h5 +QEQ package :link(QEQ),h4 -Contents: Several fixes for performing charge equilibration (QEq) via -severeal different algorithms. These can be used with pair styles -that use QEq as part of their formulation. +[Contents:] -To install via make or Make.py: +Several fixes for performing charge equilibration (QEq) via different +algorithms. These can be used with pair styles that perform QEq as +part of their formulation. + +[Install or un-install:] make yes-qeq make machine :pre -Make.py -p qeq -a machine :pre - -To un-install via make or Make.py: - make no-qeq make machine :pre -Make.py -p ^qeq -a machine :pre +[Supporting info:] -Supporting info: "fix qeq/*"_fix_qeq.html, examples/qeq +src/QEQ: filenames -> commands +"fix qeq/*"_fix_qeq.html +examples/qeq +examples/streitz :ul :line -REAX package :link(REAX),h5 +REAX package :link(REAX),h4 -Contents: A pair style for the ReaxFF potential, a universal reactive -force field, as well as a "fix reax/bonds"_fix_reax_bonds.html command -for monitoring molecules as bonds are created and destroyed. +[Contents:] -Building LAMMPS with the REAX package requires first building the REAX -library itself, which is a set of Fortran 95 files in lib/reax. -Details of how to do this are in lib/reax/README. As illustrated -below, perform a "make" using one of the Makefile.machine files in -lib/reax which should create a lib/reax/libreax.a file. -Makefile.gfortran and Makefile.ifort are examples for the GNU Fortran -and Intel Fortran compilers. The "make" also copies a -lib/reax/Makefile.lammps.machine file to lib/reax/Makefile.lammps. -This file has settings that enable the C++ compiler used to build -LAMMPS to link with a Fortran library (typically the 2 compilers to be -consistent e.g. both Intel compilers, or both GNU compilers). If the -settings in Makefile.lammps for your compilers and machine are not -correct, the LAMMPS link will fail. Note that the Make.py script has -a "-reax" option to allow the REAX library and LAMMPS to be built in -one step. Type "python src/Make.py -h -reax" to see the details. +A pair style which wraps a Fortran library which implements the ReaxFF +potential, which is a universal reactive force field. See the +"USER-REAXC package"_#USER-REAXC for an alternate implementation in +C/C++. Also a "fix reax/bonds"_fix_reax_bonds.html command for +monitoring molecules as bonds are created and destroyed. -To install via make or Make.py: +[Author:] Aidan Thompson (Sandia). + +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the +REAX library in lib/reax. You can do this manually if you prefer; +follow the instructions in lib/reax/README. You can also do it in one +step from the lammps/src dir, using a command like these, which simply +invoke the lib/reax/Install.py script with the specified args: + +make lib-reax # print help message +make lib-reax args="-m gfortran" # build with GNU Fortran compiler +make lib-reax args="-m ifort" # build with Intel ifort compiler :pre + +The build should produce two files: lib/reax/libreax.a and +lib/reax/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to link C++ (LAMMPS) with +Fortran (REAX library). Typically the two compilers used for LAMMPS +and the REAX library need to be consistent (e.g. both Intel or both +GNU compilers). If necessary, you can edit/create a new +lib/reax/Makefile.machine file for your system, which should define an +EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine +file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: -cd ~/lammps/lib/reax -make -f Makefile.gfortran # for example -cd ~/lammps/src make yes-reax make machine :pre -Make.py -p reax -reax make=gfortran -a machine :pre - -To un-install via make or Make.py: - make no-reax make machine :pre -Make.py -p ^reax -a machine :pre +[Supporting info:] -Supporting info: lib/reax/README, "pair_style reax"_pair_reax.html, -"fix reax/bonds"_fix_reax_bonds.html, examples/reax +src/REAX: filenames -> commands +lib/reax/README +"pair_style reax"_pair_reax.html +"fix reax/bonds"_fix_reax_bonds.html +examples/reax :ul :line -REPLICA package :link(REPLICA),h5 +REPLICA package :link(REPLICA),h4 -Contents: A collection of multi-replica methods that are used by -invoking multiple instances (replicas) of LAMMPS -simulations. Communication between individual replicas is performed in -different ways by the different methods. See "Section +[Contents:] + +A collection of multi-replica methods which can be used when running +multiple LAMMPS simulations (replicas). See "Section 6.5"_Section_howto.html#howto_5 for an overview of how to run -multi-replica simulations in LAMMPS. Multi-replica methods included -in the package are nudged elastic band (NEB), parallel replica -dynamics (PRD), temperature accelerated dynamics (TAD), parallel -tempering, and a verlet/split algorithm for performing long-range -Coulombics on one set of processors, and the remainder of the force -field calculation on another set. +multi-replica simulations in LAMMPS. Methods in the package include +nudged elastic band (NEB), parallel replica dynamics (PRD), +temperature accelerated dynamics (TAD), parallel tempering, and a +verlet/split algorithm for performing long-range Coulombics on one set +of processors, and the remainder of the force field calcalation on +another set. -To install via make or Make.py: +[Install or un-install:] make yes-replica make machine :pre -Make.py -p replica -a machine :pre - -To un-install via make or Make.py: - make no-replica make machine :pre -Make.py -p ^replica -a machine :pre +[Supporting info:] -Supporting info: "Section 6.5"_Section_howto.html#howto_5, -"neb"_neb.html, "prd"_prd.html, "tad"_tad.html, "temper"_temper.html, -"run_style verlet/split"_run_style.html, examples/neb, examples/prd, -examples/tad +src/REPLICA: filenames -> commands +"Section 6.5"_Section_howto.html#howto_5 +"neb"_neb.html +"prd"_prd.html +"tad"_tad.html +"temper"_temper.html, +"run_style verlet/split"_run_style.html +examples/neb +examples/prd +examples/tad :ul :line -RIGID package :link(RIGID),h5 +RIGID package :link(RIGID),h4 -Contents: A collection of computes and fixes which enforce rigid -constraints on collections of atoms or particles. This includes SHAKE -and RATTLE, as well as variants of rigid-body time integrators for a -few large bodies or many small bodies. +[Contents:] -To install via make or Make.py: +Fixes which enforce rigid constraints on collections of atoms or +particles. This includes SHAKE and RATTLE, as well as varous +rigid-body integrators for a few large bodies or many small bodies. +Also several computes which calculate properties of rigid bodies. + +To install/build: make yes-rigid make machine :pre -Make.py -p rigid -a machine :pre - -To un-install via make or Make.py: +To un-install/re-build: make no-rigid make machine :pre -Make.py -p ^rigid -a machine :pre +[Supporting info:] -Supporting info: "compute erotate/rigid"_compute_erotate_rigid.html, -"fix shake"_fix_shake.html, "fix rattle"_fix_shake.html, "fix -rigid/*"_fix_rigid.html, examples/ASPHERE, examples/rigid +src/RIGID: filenames -> commands +"compute erotate/rigid"_compute_erotate_rigid.html +fix shake"_fix_shake.html +"fix rattle"_fix_shake.html +"fix rigid/*"_fix_rigid.html +examples/ASPHERE +examples/rigid +bench/in.rhodo +http://lammps.sandia.gov/movies.html#box +http://lammps.sandia.gov/movies.html#star :ul :line -SHOCK package :link(SHOCK),h5 +SHOCK package :link(SHOCK),h4 -Contents: A small number of fixes useful for running impact -simulations where a shock-wave passes through a material. +[Contents:] -To install via make or Make.py: +Fixes for running impact simulations where a shock-wave passes through +a material. + +[Install or un-install:] make yes-shock make machine :pre -Make.py -p shock -a machine :pre - -To un-install via make or Make.py: - make no-shock make machine :pre -Make.py -p ^shock -a machine :pre +[Supporting info:] -Supporting info: "fix append/atoms"_fix_append_atoms.html, "fix -msst"_fix_msst.html, "fix nphug"_fix_nphug.html, "fix -wall/piston"_fix_wall_piston.html, examples/hugoniostat, examples/msst +src/SHOCK: filenames -> commands +"fix append/atoms"_fix_append_atoms.html +"fix msst"_fix_msst.html +"fix nphug"_fix_nphug.html +"fix wall/piston"_fix_wall_piston.html +examples/hugoniostat +examples/msst :ul :line -SNAP package :link(SNAP),h5 +SNAP package :link(SNAP),h4 -Contents: A pair style for the spectral neighbor analysis potential -(SNAP), which is an empirical potential which can be quantum accurate -when fit to an archive of DFT data. Computes useful for analyzing -properties of the potential are also included. +[Contents:] -To install via make or Make.py: +A pair style for the spectral neighbor analysis potential (SNAP). +SNAP is methodology for deriving a highly accurate classical potential +fit to a large archive of quantum mechanical (DFT) data. Also several +computes which analyze attributes of the potential. + +[Author:] Aidan Thompson (Sandia). + +[Install or un-install:] make yes-snap make machine :pre -Make.py -p snap -a machine :pre - -To un-install via make or Make.py: - make no-snap make machine :pre -Make.py -p ^snap -a machine :pre +[Supporting info:] -Supporting info: "pair snap"_pair_snap.html, "compute -sna/atom"_compute_sna_atom.html, "compute snad/atom"_compute_sna_atom.html, -"compute snav/atom"_compute_sna_atom.html, examples/snap +src/SNAP: filenames -> commands +"pair snap"_pair_snap.html +"compute sna/atom"_compute_sna_atom.html +"compute snad/atom"_compute_sna_atom.html +"compute snav/atom"_compute_sna_atom.html +examples/snap :ul :line -SRD package :link(SRD),h5 +SRD package :link(SRD),h4 -Contents: Two fixes which implement the Stochastic Rotation Dynamics -(SRD) method for coarse-graining of a solvent, typically around large -colloidal-scale particles. +[Contents:] -To install via make or Make.py: +A pair of fixes which implement the Stochastic Rotation Dynamics (SRD) +method for coarse-graining of a solvent, typically around large +colloidal particles. + +To install/build: make yes-srd make machine :pre -Make.py -p srd -a machine :pre - -To un-install via make or Make.py: +To un-install/re-build: make no-srd make machine :pre -Make.py -p ^srd -a machine :pre +[Supporting info:] -Supporting info: "fix srd"_fix_srd.html, "fix -wall/srd"_fix_wall_srd.html, examples/srd, examples/ASPHERE +src/SRD: filenames -> commands +"fix srd"_fix_srd.html +"fix wall/srd"_fix_wall_srd.html +examples/srd +examples/ASPHERE +http://lammps.sandia.gov/movies.html#tri +http://lammps.sandia.gov/movies.html#line +http://lammps.sandia.gov/movies.html#poly :ul :line -VORONOI package :link(VORONOI),h5 +VORONOI package :link(VORONOI),h4 -Contents: A "compute voronoi/atom"_compute_voronoi_atom.html command -which computes the Voronoi tesselation of a collection of atoms or -particles by wrapping the Voro++ lib +[Contents:] -To build LAMMPS with the KIM package you must have previously -installed the KIM API (library) on your system. The lib/kim/README -file explains how to download and install KIM. Building with the KIM -package also uses the lib/kim/Makefile.lammps file in the compile/link -process. You should not need to edit this file. +A compute command which calculates the Voronoi tesselation of a +collection of atoms by wrapping the "Voro++ library"_voro_home. This +can be used to calculate the local volume or each atoms or its near +neighbors. +:link(voro_home,http://math.lbl.gov/voro++) -To build LAMMPS with the VORONOI package you must have previously -installed the Voro++ library on your system. The lib/voronoi/README -file explains how to download and install Voro++. There is a -lib/voronoi/install.py script which automates the process. Type -"python install.py" to see instructions. The final step is to create -soft links in the lib/voronoi directory for "includelink" and -"liblink" which point to installed Voro++ directories. Building with -the VORONOI package uses the contents of the -lib/voronoi/Makefile.lammps file in the compile/link process. You -should not need to edit this file. Note that the Make.py script has a -"-voronoi" option to allow the Voro++ library to be downloaded and/or -installed and LAMMPS to be built in one step. Type "python -src/Make.py -h -voronoi" to see the details. +To use this package you must have the Voro++ library available on your +system. -To install via make or Make.py: +[Author:] Daniel Schwen (INL) while at LANL. The open-source Voro++ +library was written by Chris Rycroft (Harvard U) while at UC Berkeley +and LBNL. + +[Install or un-install:] + +Before building LAMMPS with this package, you must first download and +build the Voro++ library. You can do this manually if you prefer; +follow the instructions in lib/voronoi/README. You can also do it in +one step from the lammps/src dir, using a command like these, which +simply invoke the lib/voronoi/Install.py script with the specified +args: + +make lib-voronoi # print help message +make lib-voronoi args="-g -b -l" # download and build in default lib/voronoi/voro++-0.4.6 +make lib-voronoi args="-h . voro++ -g -b -l" # download and build in lib/voronoi/voro++ +make lib-voronoi args="-h ~ voro++ -g -b -l" # download and build in ~/voro++ :pre + +Note that the final -l switch is to create 2 symbolic (soft) links, +"includelink" and "liblink", in lib/voronoi to point to the Voro++ src +dir. When LAMMPS builds it will use these links. You should not need +to edit the lib/voronoi/Makefile.lammps file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: -cd ~/lammps/lib/voronoi -python install.py -g -b -l # download Voro++, build in lib/voronoi, create links -cd ~/lammps/src make yes-voronoi make machine :pre -Make.py -p voronoi -voronoi install="-g -b -l" -a machine :pre - -To un-install via make or Make.py: - make no-voronoi make machine :pre -Make.py -p ^voronoi -a machine :pre +[Supporting info:] -Supporting info: src/VORONOI/README, lib/voronoi/README, "compute -voronoi/atom"_compute_voronoi_atom.html, examples/voronoi +src/VORONOI: filenames -> commands +src/VORONOI/README +lib/voronoi/README +"compute voronoi/atom"_compute_voronoi_atom.html +examples/voronoi :ul :line - -4.2 User packages :h4,link(pkg_2) - -The current list of user-contributed packages is as follows: - -Package, Description, Author(s), Doc page, Example, Pic/movie, Library -"USER-ATC"_#USER-ATC, atom-to-continuum coupling, Jones & Templeton & Zimmerman (1), "fix atc"_fix_atc.html, USER/atc, "atc"_atc, lib/atc -"USER-AWPMD"_#USER-AWPMD, wave-packet MD, Ilya Valuev (JIHT), "pair_style awpmd/cut"_pair_awpmd.html, USER/awpmd, -, lib/awpmd -"USER-CG-CMM"_#USER-CG-CMM, coarse-graining model, Axel Kohlmeyer (Temple U), "pair_style lj/sdk"_pair_sdk.html, USER/cg-cmm, "cg"_cg, - -"USER-CGDNA"_#USER-CGDNA, coarse-grained DNA force fields, Oliver Henrich (U Strathclyde Glasgow), src/USER-CGDNA/README, USER/cgdna, -, - -"USER-COLVARS"_#USER-COLVARS, collective variables, Fiorin & Henin & Kohlmeyer (2), "fix colvars"_fix_colvars.html, USER/colvars, "colvars"_colvars, lib/colvars -"USER-DIFFRACTION"_#USER-DIFFRACTION, virutal x-ray and electron diffraction, Shawn Coleman (ARL),"compute xrd"_compute_xrd.html, USER/diffraction, -, - -"USER-DPD"_#USER-DPD, reactive dissipative particle dynamics (DPD), Larentzos & Mattox & Brennan (5), src/USER-DPD/README, USER/dpd, -, - -"USER-DRUDE"_#USER-DRUDE, Drude oscillators, Dequidt & Devemy & Padua (3), "tutorial"_tutorial_drude.html, USER/drude, -, - -"USER-EFF"_#USER-EFF, electron force field, Andres Jaramillo-Botero (Caltech), "pair_style eff/cut"_pair_eff.html, USER/eff, "eff"_eff, - -"USER-FEP"_#USER-FEP, free energy perturbation, Agilio Padua (U Blaise Pascal Clermont-Ferrand), "compute fep"_compute_fep.html, USER/fep, -, - -"USER-H5MD"_#USER-H5MD, dump output via HDF5, Pierre de Buyl (KU Leuven), "dump h5md"_dump_h5md.html, -, -, lib/h5md -"USER-INTEL"_#USER-INTEL, Vectorized CPU and Intel(R) coprocessor styles, W. Michael Brown (Intel), "Section 5.3.2"_accelerate_intel.html, examples/intel, -, - -"USER-LB"_#USER-LB, Lattice Boltzmann fluid, Colin Denniston (U Western Ontario), "fix lb/fluid"_fix_lb_fluid.html, USER/lb, -, - -"USER-MGPT"_#USER-MGPT, fast MGPT multi-ion potentials, Tomas Oppelstrup & John Moriarty (LLNL), "pair_style mgpt"_pair_mgpt.html, USER/mgpt, -, - -"USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER-MISC/README, -, -, - -"USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surface, Stefan Paquay (Eindhoven U of Technology), "fix manifoldforce"_fix_manifoldforce.html, USER/manifold, "manifold"_manifold, - -"USER-MOLFILE"_#USER-MOLFILE, "VMD"_VMD molfile plug-ins, Axel Kohlmeyer (Temple U), "dump molfile"_dump_molfile.html, -, -, VMD-MOLFILE -"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc / dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf -"USER-OMP"_#USER-OMP, OpenMP threaded styles, Axel Kohlmeyer (Temple U), "Section 5.3.4"_accelerate_omp.html, -, -, - -"USER-PHONON"_#USER-PHONON, phonon dynamical matrix, Ling-Ti Kong (Shanghai Jiao Tong U), "fix phonon"_fix_phonon.html, USER/phonon, -, - -"USER-QMMM"_#USER-QMMM, QM/MM coupling, Axel Kohlmeyer (Temple U), "fix qmmm"_fix_qmmm.html, USER/qmmm, -, lib/qmmm -"USER-QTB"_#USER-QTB, quantum nuclear effects, Yuan Shen (Stanford), "fix qtb"_fix_qtb.html "fix qbmsst"_fix_qbmsst.html, qtb, -, - -"USER-QUIP"_#USER-QUIP, QUIP/libatoms interface, Albert Bartok-Partay (U Cambridge), "pair_style quip"_pair_quip.html, USER/quip, -, lib/quip -"USER-REAXC"_#USER-REAXC, C version of ReaxFF, Metin Aktulga (LBNL), "pair_style reaxc"_pair_reax_c.html, reax, -, - -"USER-SMD"_#USER-SMD, smoothed Mach dynamics, Georg Ganzenmuller (EMI), "SMD User Guide"_PDF/SMD_LAMMPS_userguide.pdf, USER/smd, -, - -"USER-SMTBQ"_#USER-SMTBQ, Second Moment Tight Binding - QEq potential, Salles & Maras & Politano & Tetot (4), "pair_style smtbq"_pair_smtbq.html, USER/smtbq, -, - -"USER-SPH"_#USER-SPH, smoothed particle hydrodynamics, Georg Ganzenmuller (EMI), "SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, "sph"_sph, - -"USER-TALLY"_#USER-TALLY, Pairwise tallied computes, Axel Kohlmeyer (Temple U), "compute XXX/tally"_compute_tally.html, USER/tally, -, - -"USER-VTK"_#USER-VTK, VTK-style dumps, Berger and Queteschiner (6), "compute custom/vtk"_dump_custom_vtk.html, -, -, lib/vtk -:tb(ea=c) - -:link(atc,http://lammps.sandia.gov/pictures.html#atc) -:link(cg,http://lammps.sandia.gov/pictures.html#cg) -:link(eff,http://lammps.sandia.gov/movies.html#eff) -:link(manifold,http://lammps.sandia.gov/movies.html#manifold) -:link(sph,http://lammps.sandia.gov/movies.html#sph) -:link(VMD,http://www.ks.uiuc.edu/Research/vmd) - -The "Authors" column lists a name(s) if a specific person is -responsible for creating and maintaining the package. - -(1) The ATC package was created by Reese Jones, Jeremy Templeton, and -Jon Zimmerman (Sandia). - -(2) The COLVARS package was created by Axel Kohlmeyer (Temple U) using -the colvars module library written by Giacomo Fiorin (Temple U) and -Jerome Henin (LISM, Marseille, France). - -(3) The DRUDE package was created by Alain Dequidt (U Blaise Pascal -Clermont-Ferrand) and co-authors Julien Devemy (CNRS) and Agilio Padua -(U Blaise Pascal). - -(4) The SMTBQ package was created by Nicolas Salles, Emile Maras, -Olivier Politano, and Robert Tetot (LAAS-CNRS, France). - -(5) The USER-DPD package was created by James Larentzos (ARL), Timothy -Mattox (Engility), and John Brennan (ARL). - -(6) The USER-VTK package was created by Richard Berger (JKU) and -Daniel Queteschiner (DCS Computing). - -The "Doc page" column links to either a sub-section of the -"Section 6"_Section_howto.html of the manual, or an input script -command implemented as part of the package, or to additional -documentation provided within the package. - -The "Example" column is a sub-directory in the examples directory of -the distribution which has an input script that uses the package. -E.g. "peptide" refers to the examples/peptide directory. - -The "Library" column lists an external library which must be built -first and which LAMMPS links to when it is built. If it is listed as -lib/package, then the code for the library is under the lib directory -of the LAMMPS distribution. See the lib/package/README file for info -on how to build the library. If it is not listed as lib/package, then -it is a third-party library not included in the LAMMPS distribution. -See details on all of this below for individual packages. - :line -USER-ATC package :link(USER-ATC),h5 +USER-ATC package :link(USER-ATC),h4 -Contents: ATC stands for atoms-to-continuum. This package implements -a "fix atc"_fix_atc.html command to either couple MD with continuum -finite element equations or perform on-the-fly post-processing of -atomic information to continuum fields. See src/USER-ATC/README for -more details. +[Contents:] -To build LAMMPS with this package ... +ATC stands for atoms-to-continuum. This package implements a "fix +atc"_fix_atc.html command to either couple molecular dynamics with +continuum finite element equations or perform on-the-fly conversion of +atomic information to continuum fields. -To install via make or Make.py: +[Authors:] Reese Jones, Jeremy Templeton, Jon Zimmerman (Sandia). + +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the ATC +library in lib/atc. You can do this manually if you prefer; follow +the instructions in lib/atc/README. You can also do it in one step +from the lammps/src dir, using a command like these, which simply +invoke the lib/atc/Install.py script with the specified args: + +make lib-atc # print help message +make lib-atc args="-m g++" # build with GNU g++ compiler +make lib-atc args="-m icc" # build with Intel icc compiler :pre + +The build should produce two files: lib/atc/libatc.a and +lib/atc/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the ATC +library. If necessary, you can edit/create a new +lib/atc/Makefile.machine file for your system, which should define an +EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine +file. + +Note that the Makefile.lammps file has settings for the BLAS and +LAPACK linear algebra libraries. As explained in lib/atc/README these +can either exist on your system, or you can use the files provided in +lib/linalg. In the latter case you also need to build the library +in lib/linalg with a command like these: + +make lib-linalg # print help message +make lib-atc args="-m gfortran" # build with GNU Fortran compiler + +You can then install/un-install the package and build LAMMPS in the +usual manner: make yes-user-atc make machine :pre - -Make.py -p atc -a machine :pre - -To un-install via make or Make.py: - + make no-user-atc make machine :pre + +[Supporting info:] -Make.py -p ^atc -a machine :pre - -Supporting info:src/USER-ATC/README, "fix atc"_fix_atc.html, +src/USER-ATC: filenames -> commands +src/USER-ATC/README +"fix atc"_fix_atc.html examples/USER/atc - -Authors: Reese Jones (rjones at sandia.gov), Jeremy Templeton (jatempl -at sandia.gov) and Jon Zimmerman (jzimmer at sandia.gov) at Sandia. -Contact them directly if you have questions. +http://lammps.sandia.gov/pictures.html#atc :ul :line -USER-AWPMD package :link(USER-AWPMD),h5 +USER-AWPMD package :link(USER-AWPMD),h4 -Contents: AWPMD stands for Antisymmetrized Wave Packet Molecular -Dynamics. This package implements an atom, pair, and fix style which -allows electrons to be treated as explicit particles in an MD -calculation. See src/USER-AWPMD/README for more details. +[Contents:] -To build LAMMPS with this package ... +AWPMD stands for Antisymmetrized Wave Packet Molecular Dynamics. This +package implements an atom, pair, and fix style which allows electrons +to be treated as explicit particles in a classical molecular dynamics +model. -Supporting info: src/USER-AWPMD/README, "fix -awpmd/cut"_pair_awpmd.html, examples/USER/awpmd +[Author:] Ilya Valuev (JIHT, Russia). -Author: Ilya Valuev at the JIHT in Russia (valuev at -physik.hu-berlin.de). Contact him directly if you have questions. +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the +AWPMD library in lib/awpmd. You can do this manually if you prefer; +follow the instructions in lib/awpmd/README. You can also do it in +one step from the lammps/src dir, using a command like these, which +simply invoke the lib/awpmd/Install.py script with the specified args: + +make lib-awpmd # print help message +make lib-awpmd args="-m g++" # build with GNU g++ compiler +make lib-awpmd args="-m icc" # build with Intel icc compiler :pre + +The build should produce two files: lib/awpmd/libawpmd.a and +lib/awpmd/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the +AWPMD library. If necessary, you can edit/create a new +lib/awpmd/Makefile.machine file for your system, which should define +an EXTRAMAKE variable to specify a corresponding +Makefile.lammps.machine file. + +Note that the Makefile.lammps file has settings for the BLAS and +LAPACK linear algebra libraries. As explained in lib/awpmd/README +these can either exist on your system, or you can use the files +provided in lib/linalg. In the latter case you also need to build the +library in lib/linalg with a command like these: + +make lib-linalg # print help message +make lib-atc args="-m gfortran" # build with GNU Fortran compiler + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-awpmd +make machine :pre + +make no-user-awpmd +make machine :pre + +[Supporting info:] + +src/USER-AWPMD: filenames -> commands +src/USER-AWPMD/README +"pair awpmd/cut"_pair_awpmd.html +examples/USER/awpmd :ul :line -USER-CG-CMM package :link(USER-CG-CMM),h5 +USER-CGDNA package :link(USER-CGDNA),h4 -Contents: CG-CMM stands for coarse-grained ??. This package -implements several pair styles and an angle style using the coarse -grained parametrization of Shinoda, DeVane, Klein, Mol Sim, 33, 27 -(2007) (SDK), with extensions to simulate ionic liquids, electrolytes, -lipids and charged amino acids. See src/USER-CG-CMM/README for more -details. +[Contents:] -Supporting info: src/USER-CG-CMM/README, "pair lj/sdk"_pair_sdk.html, -"pair lj/sdk/coul/long"_pair_sdk.html, "angle sdk"_angle_sdk.html, -examples/USER/cg-cmm +Several pair styles, a bond style, and integration fixes for +coarse-grained models of single- and double-stranded DNA based on the +oxDNA model of Doye, Louis and Ouldridge at the University of Oxford. +This includes Langevin-type rigid-body integrators with improved +stability. -Author: Axel Kohlmeyer at Temple U (akohlmey at gmail.com). Contact -him directly if you have questions. +[Author:] Oliver Henrich (University of Edinburgh). -:line +[Install or un-install:] + +make yes-user-cgdna +make machine :pre + +make no-user-cgdna +make machine :pre + +[Supporting info:] -USER-CGDNA package :link(USER-CGDNA),h5 - -Contents: The CGDNA package implements coarse-grained force fields for -single- and double-stranded DNA. These are at the moment mainly the -oxDNA and oxDNA2 models, developed by Doye, Louis and Ouldridge at the University -of Oxford. The package also contains Langevin-type rigid-body -integrators with improved stability. - -See these doc pages to get started: - -"bond_style oxdna/fene"_bond_oxdna.html -"bond_style oxdna2/fene"_bond_oxdna.html -"pair_style oxdna/..."_pair_oxdna.html -"pair_style oxdna2/..."_pair_oxdna2.html +src/USER-CGDNA: filenames -> commands +/src/USER-CGDNA/README +"pair_style oxdna/*"_pair_oxdna.html +"pair_style oxdna2/*"_pair_oxdna2.html +"bond_style oxdna/*"_bond_oxdna.html +"bond_style oxdna2/*"_bond_oxdna.html "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html :ul -Supporting info: /src/USER-CGDNA/README, "bond_style -oxdna/fene"_bond_oxdna.html, "bond_style -oxdna2/fene"_bond_oxdna.html, "pair_style -oxdna/..."_pair_oxdna.html, "pair_style -oxdna2/..."_pair_oxdna2.html, "fix -nve/dotc/langevin"_fix_nve_dotc_langevin.html +:line -Author: Oliver Henrich at the University of Strathclyde, Glasgow, UK and -University of Edinburgh (ohenrich@ph.ed.ac.uk). -Contact him directly if you have any questions. +USER-CGSDK package :link(USER-CGSDK),h4 + +[Contents:] + +Several pair styles and an angle style which implement the +coarse-grained SDK model of Shinoda, DeVane, and Klein which enables +simulation of ionic liquids, electrolytes, lipids and charged amino +acids. + +[Author:] Axel Kohlmeyer (Temple U). + +[Install or un-install:] + +make yes-user-cgsdk +make machine :pre + +make no-user-cgsdk +make machine :pre + +[Supporting info:] + +src/USER-CGSDK: filenames -> commands +src/USER-CGSDK/README +"pair_style lj/sdk/*"_pair_sdk.html +"angle_style sdk"_angle_sdk.html +examples/USER/cgsdk +http://lammps.sandia.gov/pictures.html#cg :ul :line -USER-COLVARS package :link(USER-COLVARS),h5 +USER-COLVARS package :link(USER-COLVARS),h4 -Contents: COLVARS stands for collective variables which can be used to -implement Adaptive Biasing Force, Metadynamics, Steered MD, Umbrella -Sampling and Restraints. This package implements a "fix -colvars"_fix_colvars.html command which wraps a COLVARS library which -can perform those kinds of simulations. See src/USER-COLVARS/README -for more details. +[Contents:] -Supporting info: -"doc/PDF/colvars-refman-lammps.pdf"_PDF/colvars-refman-lammps.pdf, -src/USER-COLVARS/README, lib/colvars/README, "fix -colvars"_fix_colvars.html, examples/USER/colvars +COLVARS stands for collective variables, which can be used to +implement various enhanced sampling methods, including Adaptive +Biasing Force, Metadynamics, Steered MD, Umbrella Sampling and +Restraints. A "fix colvars"_fix_colvars.html command is implemented +which wraps a COLVARS library, which implements these methods. +simulations. -Authors: Axel Kohlmeyer at Temple U (akohlmey at gmail.com) wrote the -fix. The COLVARS library itself is written and maintained by Giacomo -Fiorin (ICMS, Temple University, Philadelphia, PA, USA) and Jerome -Henin (LISM, CNRS, Marseille, France). Contact them directly if you -have questions. +[Authors:] Axel Kohlmeyer (Temple U). The COLVARS library was written +by Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA) and +Jerome Henin (LISM, CNRS, Marseille, France). + +[Install or un-install:] + +Before building LAMMPS with this package, you must first build the +COLVARS library in lib/colvars. You can do this manually if you +prefer; follow the instructions in lib/colvars/README. You can also +do it in one step from the lammps/src dir, using a command like these, +which simply invoke the lib/colvars/Install.py script with the +specified args: + +make lib-colvars # print help message +make lib-colvars args="-m g++" # build with GNU g++ compiler :pre + +The build should produce two files: lib/colvars/libcolvars.a and +lib/colvars/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the +COLVARS library (though typically the settings are just blank). If +necessary, you can edit/create a new lib/colvars/Makefile.machine file +for your system, which should define an EXTRAMAKE variable to specify +a corresponding Makefile.lammps.machine file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-colvars +make machine :pre + +make no-user-colvars +make machine :pre + +[Supporting info:] + +src/USER-COLVARS: filenames -> commands +"doc/PDF/colvars-refman-lammps.pdf"_PDF/colvars-refman-lammps.pdf +src/USER-COLVARS/README +lib/colvars/README +"fix colvars"_fix_colvars.html +examples/USER/colvars :ul :line -USER-DIFFRACTION package :link(USER-DIFFRACTION),h5 +USER-DIFFRACTION package :link(USER-DIFFRACTION),h4 -Contents: This packages implements two computes and a fix for -calculating x-ray and electron diffraction intensities based on -kinematic diffraction theory. See src/USER-DIFFRACTION/README for -more details. +[Contents:] -Supporting info: "compute saed"_compute_saed.html, "compute -xrd"_compute_xrd.html, "fix saed/vtk"_fix_saed_vtk.html, -examples/USER/diffraction +Two computes and a fix for calculating x-ray and electron diffraction +intensities based on kinematic diffraction theory. -Author: Shawn P. Coleman (shawn.p.coleman8.ctr at mail.mil) while at -the University of Arkansas. Contact him directly if you have -questions. +[Author:] Shawn Coleman while at the U Arkansas. + +[Install or un-install:] + +make yes-user-diffraction +make machine :pre + +make no-user-diffraction +make machine :pre + +[Supporting info:] + +src/USER-DIFFRACTION: filenames -> commands +"compute saed"_compute_saed.html +"compute xrd"_compute_xrd.html +"fix saed/vtk"_fix_saed_vtk.html +examples/USER/diffraction :ul :line -USER-DPD package :link(USER-DPD),h5 +USER-DPD package :link(USER-DPD),h4 -Contents: DPD stands for dissipative particle dynamics, This package -implements DPD for isothermal, isoenergetic, isobaric and isenthalpic -conditions. It also has extensions for performing reactive DPD, where -each particle has internal state for multiple species and a coupled -set of chemical reaction ODEs are integrated each timestep. The DPD -equations of motion are integrated efficiently through the Shardlow -splitting algorithm. See src/USER-DPD/README for more details. +[Contents:] -Supporting info: /src/USER-DPD/README, "compute dpd"_compute_dpd.html +DPD stands for dissipative particle dynamics. This package implements +coarse-grained DPD-based models for energetic, reactive molecular +crystalline materials. It includes many pair styles specific to these +systems, including for reactive DPD, where each particle has internal +state for multiple species and a coupled set of chemical reaction ODEs +are integrated each timestep. Highly accurate time intergrators for +isothermal, isoenergetic, isobaric and isenthalpic conditions are +included. These enable long timesteps via the Shardlow splitting +algorithm. + +[Authors:] Jim Larentzos (ARL), Tim Mattox (Engility Corp), and and John +Brennan (ARL). + +[Install or un-install:] + +make yes-user-dpd +make machine :pre + +make no-user-dpd +make machine :pre + +[Supporting info:] + +src/USER-DPD: filenames -> commands +/src/USER-DPD/README +"compute dpd"_compute_dpd.html "compute dpd/atom"_compute_dpd_atom.html -"fix eos/cv"_fix_eos_table.html "fix eos/table"_fix_eos_table.html -"fix eos/table/rx"_fix_eos_table_rx.html "fix shardlow"_fix_shardlow.html -"fix rx"_fix_rx.html "pair table/rx"_pair_table_rx.html -"pair dpd/fdt"_pair_dpd_fdt.html "pair dpd/fdt/energy"_pair_dpd_fdt.html -"pair exp6/rx"_pair_exp6_rx.html "pair multi/lucy"_pair_multi_lucy.html -"pair multi/lucy/rx"_pair_multi_lucy_rx.html, examples/USER/dpd - -Authors: James Larentzos (ARL) (james.p.larentzos.civ at mail.mil), -Timothy Mattox (Engility Corp) (Timothy.Mattox at engilitycorp.com) -and John Brennan (ARL) (john.k.brennan.civ at mail.mil). Contact them -directly if you have questions. +"fix eos/cv"_fix_eos_table.html +"fix eos/table"_fix_eos_table.html +"fix eos/table/rx"_fix_eos_table_rx.html +"fix shardlow"_fix_shardlow.html +"fix rx"_fix_rx.html +"pair table/rx"_pair_table_rx.html +"pair dpd/fdt"_pair_dpd_fdt.html +"pair dpd/fdt/energy"_pair_dpd_fdt.html +"pair exp6/rx"_pair_exp6_rx.html +"pair multi/lucy"_pair_multi_lucy.html +"pair multi/lucy/rx"_pair_multi_lucy_rx.html +examples/USER/dpd :ul :line -USER-DRUDE package :link(USER-DRUDE),h5 +USER-DRUDE package :link(USER-DRUDE),h4 -Contents: This package contains methods for simulating polarizable -systems using thermalized Drude oscillators. It has computes, fixes, -and pair styles for this purpose. See "Section +[Contents:] + +Fixes, pair styles, and a compute to simulate thermalized Drude +oscillators as a model of polarization. See "Section 6.27"_Section_howto.html#howto_27 for an overview of how to use the -package. See src/USER-DRUDE/README for additional details. There are -auxiliary tools for using this package in tools/drude. +package. There are auxiliary tools for using this package in +tools/drude. -Supporting info: "Section 6.27"_Section_howto.html#howto_27, -src/USER-DRUDE/README, "fix drude"_fix_drude.html, "fix -drude/transform/*"_fix_drude_transform.html, "compute -temp/drude"_compute_temp_drude.html, "pair thole"_pair_thole.html, -"pair lj/cut/thole/long"_pair_thole.html, examples/USER/drude, -tools/drude +[Authors:] Alain Dequidt (U Blaise Pascal Clermont-Ferrand), Julien +Devemy (CNRS), and Agilio Padua (U Blaise Pascal). -Authors: Alain Dequidt at Universite Blaise Pascal Clermont-Ferrand -(alain.dequidt at univ-bpclermont.fr); co-authors: Julien Devemy, -Agilio Padua. Contact them directly if you have questions. +[Install or un-install:] + +make yes-user-drude +make machine :pre + +make no-user-drude +make machine :pre + +[Supporting info:] + +src/USER-DRUDE: filenames -> commands +"Section 6.27"_Section_howto.html#howto_27 +"Section 6.25"_Section_howto.html#howto_25 +src/USER-DRUDE/README +"fix drude"_fix_drude.html +"fix drude/transform/*"_fix_drude_transform.html +"compute temp/drude"_compute_temp_drude.html +"pair thole"_pair_thole.html +"pair lj/cut/thole/long"_pair_thole.html +examples/USER/drude +tools/drude :ul :line -USER-EFF package :link(USER-EFF),h5 +USER-EFF package :link(USER-EFF),h4 -Contents: EFF stands for electron force field. This package contains -atom, pair, fix and compute styles which implement the eFF as +[Contents:] + +EFF stands for electron force field which allows a classical MD code +to model electrons as particles of variable radius. This package +contains atom, pair, fix and compute styles which implement the eFF as described in A. Jaramillo-Botero, J. Su, Q. An, and W.A. Goddard III, -JCC, 2010. The eFF potential was first introduced by Su and Goddard, -in 2007. See src/USER-EFF/README for more details. There are -auxiliary tools for using this package in tools/eff; see its README +JCC, 2010. The eFF potential was first introduced by Su and Goddard, +in 2007. There are auxiliary tools for using this package in +tools/eff; see its README file. + +[Author:] Andres Jaramillo-Botero (CalTech). + +[Install or un-install:] + +make yes-user-eff +make machine :pre + +make no-user-eff +make machine :pre + +[Supporting info:] + +src/USER-EFF: filenames -> commands +src/USER-EFF/README +"atom_style electron"_atom_style.html +"fix nve/eff"_fix_nve_eff.html +"fix nvt/eff"_fix_nh_eff.html +"fix npt/eff"_fix_nh_eff.html +"fix langevin/eff"_fix_langevin_eff.html +"compute temp/eff"_compute_temp_eff.html +"pair eff/cut"_pair_eff.html +"pair eff/inline"_pair_eff.html +examples/USER/eff +tools/eff/README +tools/eff +http://lammps.sandia.gov/movies.html#eff :ul + +:line + +USER-FEP package :link(USER-FEP),h4 + +[Contents:] + +FEP stands for free energy perturbation. This package provides +methods for performing FEP simulations by using a "fix +adapt/fep"_fix_adapt_fep.html command with soft-core pair potentials, +which have a "soft" in their style name. There are auxiliary tools +for using this package in tools/fep; see its README file. + +[Author:] Agilio Padua (Universite Blaise Pascal Clermont-Ferrand) + +[Install or un-install:] + +make yes-user-fep +make machine :pre + +make no-user-fep +make machine :pre + +[Supporting info:] + +src/USER-FEP: filenames -> commands +src/USER-FEP/README +"fix adapt/fep"_fix_adapt_fep.html +"compute fep"_compute_fep.html +"pair_style */soft"_pair_lj_soft.html +examples/USER/fep +tools/fep/README +tools/fep :ul + +:line + +USER-H5MD package :link(USER-H5MD),h4 + +[Contents:] + +H5MD stands for HDF5 for MD. "HDF5"_HDF5 is a portable, binary, +self-describing file format, used by many scientific simulations. +H5MD is a format for molecular simulations, built on top of HDF5. +This package implements a "dump h5md"_dump_h5md.html command to output +LAMMPS snapshots in this format. + +:link(HDF5,http://www.hdfgroup.org/HDF5) + +To use this package you must have the HDF5 library available on your +system. + +[Author:] Pierre de Buyl (KU Leuven) created both the package and the +H5MD format. + +[Install or un-install:] + +Note that to follow these steps to compile and link to the CH5MD +library, you need the standard HDF5 software package installed on your +system, which should include the h5cc compiler and the HDF5 library. + +Before building LAMMPS with this package, you must first build the +CH5MD library in lib/h5md. You can do this manually if you prefer; +follow the instructions in lib/h5md/README. You can also do it in one +step from the lammps/src dir, using a command like these, which simply +invoke the lib/h5md/Install.py script with the specified args: + +make lib-h5md # print help message +make lib-hm5d args="-m h5cc" # build with h5cc compiler :pre + +The build should produce two files: lib/h5md/libch5md.a and +lib/h5md/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the +system HDF5 library. If necessary, you can edit/create a new +lib/h5md/Makefile.machine file for your system, which should define an +EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine file. -Supporting info: +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-h5md +make machine :pre + +make no-user-h5md +make machine :pre + +[Supporting info:] -Author: Andres Jaramillo-Botero at CalTech (ajaramil at -wag.caltech.edu). Contact him directly if you have questions. +src/USER-H5MD: filenames -> commands +src/USER-H5MD/README +lib/h5md/README +"dump h5md"_dump_h5md.html :ul :line -USER-FEP package :link(USER-FEP),h5 +USER-INTEL package :link(USER-INTEL),h4 -Contents: FEP stands for free energy perturbation. This package -provides methods for performing FEP simulations by using a "fix -adapt/fep"_fix_adapt_fep.html command with soft-core pair potentials, -which have a "soft" in their style name. See src/USER-FEP/README for -more details. There are auxiliary tools for using this package in -tools/fep; see its README file. +[Contents:] -Supporting info: src/USER-FEP/README, "fix -adapt/fep"_fix_adapt_fep.html, "compute fep"_compute_fep.html, -"pair_style */soft"_pair_lj_soft.html, examples/USER/fep +Dozens of pair, fix, bond, angle, dihedral, improper, and kspace +styles which are optimized for Intel CPUs and KNLs (Knights Landing). +All of them have an "intel" in their style name. "Section +5.3.2"_accelerate_intel.html gives details of what hardware and +compilers are required on your system, and how to build and use this +package. Its styles can be invoked at run time via the "-sf intel" or +"-suffix intel" "command-line switches"_Section_start.html#start_7. +Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and "USER-OMP"_#USER-OMP +packages, which have styles optimized for CPUs and KNLs. -Author: Agilio Padua at Universite Blaise Pascal Clermont-Ferrand -(agilio.padua at univ-bpclermont.fr). Contact him directly if you have -questions. +You need to have an Intel compiler, version 14 or higher to take full +advantage of this package. -:line +[Author:] Mike Brown (Intel). -USER-H5MD package :link(USER-H5MD),h5 - -Contents: H5MD stands for HDF5 for MD. "HDF5"_HDF5 is a binary, -portable, self-describing file format, used by many scientific -simulations. H5MD is a format for molecular simulations, built on top -of HDF5. This package implements a "dump h5md"_dump_h5md.html command -to output LAMMPS snapshots in this format. See src/USER-H5MD/README -for more details. - -:link(HDF5,http://www.hdfgroup.org/HDF5/) - -Supporting info: src/USER-H5MD/README, lib/h5md/README, "dump -h5md"_dump_h5md.html - -Author: Pierre de Buyl at KU Leuven (see http://pdebuyl.be) created -this package as well as the H5MD format and library. Contact him -directly if you have questions. - -:line - -USER-INTEL package :link(USER-INTEL),h5 - -Contents: Dozens of pair, bond, angle, dihedral, and improper styles -that are optimized for Intel CPUs and the Intel Xeon Phi (in offload -mode). All of them have an "intel" in their style name. "Section -5.3.2"_accelerate_intel.html gives details of what hardware -and compilers are required on your system, and how to build and use -this package. Also see src/USER-INTEL/README for more details. See -the KOKKOS, OPT, and USER-OMP packages, which also have CPU and -Phi-enabled styles. - -Supporting info: examples/accelerate, src/USER-INTEL/TEST - -"Section 5.3"_Section_accelerate.html#acc_3 - -Author: Mike Brown at Intel (michael.w.brown at intel.com). Contact -him directly if you have questions. +[Install or un-install:] For the USER-INTEL package, you have 2 choices when building. You can -build with CPU or Phi support. The latter uses Xeon Phi chips in -"offload" mode. Each of these modes requires additional settings in -your Makefile.machine for CCFLAGS and LINKFLAGS. +build with either CPU or KNL support. Each choice requires additional +settings in your Makefile.machine for CCFLAGS and LINKFLAGS and +optimized malloc libraries. See the +src/MAKE/OPTIONS/Makefile.intel_cpu and src/MAKE/OPTIONS/Makefile.knl +files for examples. -For CPU mode (if using an Intel compiler): +For CPUs: -CCFLAGS: add -fopenmp, -DLAMMPS_MEMALIGN=64, -restrict, -xHost, -fno-alias, -ansi-alias, -override-limits -LINKFLAGS: add -fopenmp :ul +OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits +CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ + -fno-alias -ansi-alias -restrict $(OPTFLAGS) +LINKFLAGS = -g -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc -ltbbmalloc_proxy -For Phi mode add the following in addition to the CPU mode flags: +For KNLs: -CCFLAGS: add -DLMP_INTEL_OFFLOAD and -LINKFLAGS: add -offload :ul +OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits +CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ + -fno-alias -ansi-alias -restrict $(OPTFLAGS) +LINKFLAGS = -g -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc -And also add this to CCFLAGS: +Once you have an appropriate Makefile.machine, you can +install/un-install the package and build LAMMPS in the usual manner. +Note that you cannot build one executable to run on multiple hardware +targets (Intel CPUs or KNL). You need to build LAMMPS once for each +hardware target, to produce a separate executable. --offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" :pre +You should also typically install the USER-OMP package, as it can be +used in tandem with the USER-INTEL package to good effect, as +explained in "Section 5.3.2"_accelerate_intel.html. -Examples: +make yes-user-intel yes-user-omp +make machine :pre + +make no-user-intel no-user-omp +make machine :pre + +[Supporting info:] + +src/USER-INTEL: filenames -> commands +src/USER-INTEL/README +"Section 5.3"_Section_accelerate.html#acc_3 +"Section 5.3.2"_accelerate_gpu.html +"Section 2.7 -sf intel"_Section_start.html#start_7 +"Section 2.7 -pk intel"_Section_start.html#start_7 +"package intel"_package.html +Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (i) +src/USER-INTEL/TEST +"Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul :line -USER-LB package :link(USER-LB),h5 +USER-LB package :link(USER-LB),h4 -Supporting info: +[Contents:] -This package contains a LAMMPS implementation of a background -Lattice-Boltzmann fluid, which can be used to model MD particles -influenced by hydrodynamic forces. +Fixes which implement a background Lattice-Boltzmann (LB) fluid, which +can be used to model MD particles influenced by hydrodynamic forces. -See this doc page and its related commands to get started: +[Authors:] Frances Mackay and Colin Denniston (University of Western +Ontario). +[Install or un-install:] + +make yes-user-lb +make machine :pre + +make no-user-lb +make machine :pre + +[Supporting info:] + +src/USER-LB: filenames -> commands +src/USER-LB/README "fix lb/fluid"_fix_lb_fluid.html - -The people who created this package are Frances Mackay (fmackay at -uwo.ca) and Colin (cdennist at uwo.ca) Denniston, University of -Western Ontario. Contact them directly if you have questions. - -Examples: examples/USER/lb +"fix lb/momentum"_fix_lb_momentum.html +"fix lb/viscous"_fix_lb_viscous.html +examples/USER/lb :ul :line -USER-MGPT package :link(USER-MGPT),h5 +USER-MGPT package :link(USER-MGPT),h4 -Supporting info: +[Contents:] -This package contains a fast implementation for LAMMPS of -quantum-based MGPT multi-ion potentials. The MGPT or model GPT method -derives from first-principles DFT-based generalized pseudopotential -theory (GPT) through a series of systematic approximations valid for -mid-period transition metals with nearly half-filled d bands. The -MGPT method was originally developed by John Moriarty at Lawrence -Livermore National Lab (LLNL). +A pair style which provides a fast implementation of the quantum-based +MGPT multi-ion potentials. The MGPT or model GPT method derives from +first-principles DFT-based generalized pseudopotential theory (GPT) +through a series of systematic approximations valid for mid-period +transition metals with nearly half-filled d bands. The MGPT method +was originally developed by John Moriarty at LLNL. The pair style in +this package calculates forces and energies using an optimized +matrix-MGPT algorithm due to Tomas Oppelstrup at LLNL. -In the general matrix representation of MGPT, which can also be -applied to f-band actinide metals, the multi-ion potentials are -evaluated on the fly during a simulation through d- or f-state matrix -multiplication, and the forces that move the ions are determined -analytically. The {mgpt} pair style in this package calculates forces -and energies using an optimized matrix-MGPT algorithm due to Tomas -Oppelstrup at LLNL. +[Authors:] Tomas Oppelstrup and John Moriarty (LLNL). -See this doc page to get started: +[Install or un-install:] + +make yes-user-mgpt +make machine :pre + +make no-user-mgpt +make machine :pre + +[Supporting info:] +src/USER-MGPT: filenames -> commands +src/USER-MGPT/README "pair_style mgpt"_pair_mgpt.html - -The persons who created the USER-MGPT package are Tomas Oppelstrup -(oppelstrup2@llnl.gov) and John Moriarty (moriarty2@llnl.gov) -Contact them directly if you have any questions. - -Examples: examples/USER/mgpt +examples/USER/mgpt :ul :line -USER-MISC package :link(USER-MISC),h5 +USER-MISC package :link(USER-MISC),h4 -Supporting info: +[Contents:] -The files in this package are a potpourri of (mostly) unrelated -features contributed to LAMMPS by users. Each feature is a single -pair of files (*.cpp and *.h). +A potpourri of (mostly) unrelated features contributed to LAMMPS by +users. Each feature is a single fix, compute, pair, bond, angle, +dihedral, improper, or command style. -More information about each feature can be found by reading its doc -page in the LAMMPS doc directory. The doc page which lists all LAMMPS -input script commands is as follows: - -"Section 3.5"_Section_commands.html#cmd_5 - -User-contributed features are listed at the bottom of the fix, -compute, pair, etc sections. - -The list of features and author of each is given in the +[Authors:] The author for each style in the package is listed in the src/USER-MISC/README file. -You should contact the author directly if you have specific questions -about the feature or its coding. +[Install or un-install:] + +make yes-user-misc +make machine :pre + +make no-user-misc +make machine :pre + +[Supporting info:] -Examples: examples/USER/misc +src/USER-MISC: filenames -> commands +src/USER-MISC/README +one doc page per individual command listed in src/USER-MISC/README +examples/USER/misc :ul :line -USER-MANIFOLD package :link(USER-MANIFOLD),h5 +USER-MANIFOLD package :link(USER-MANIFOLD),h4 -Supporting info: +[Contents:] -This package contains a dump molfile command which uses molfile -plugins that are bundled with the -"VMD"_http://www.ks.uiuc.edu/Research/vmd molecular visualization and -analysis program, to enable LAMMPS to dump its information in formats -compatible with various molecular simulation tools. +Several fixes and a "manifold" class which enable simulations of +particles constrained to a manifold (a 2D surface within the 3D +simulation box). This is done by applying the RATTLE constraint +algorithm to formulate single-particle constraint functions +g(xi,yi,zi) = 0 and their derivative (i.e. the normal of the manifold) +n = grad(g). -This package allows LAMMPS to perform MD simulations of particles -constrained on a manifold (i.e., a 2D subspace of the 3D simulation -box). It achieves this using the RATTLE constraint algorithm applied -to single-particle constraint functions g(xi,yi,zi) = 0 and their -derivative (i.e. the normal of the manifold) n = grad(g). +[Author:] Stefan Paquay (Eindhoven University of Technology (TU/e), The +Netherlands) -See this doc page to get started: +[Install or un-install:] + +make yes-user-manifold +make machine :pre + +make no-user-manifold +make machine :pre + +[Supporting info:] +src/USER-MANIFOLD: filenames -> commands +src/USER-MANIFOLD/README +"doc/manifolds"_manifolds.html "fix manifoldforce"_fix_manifoldforce.html - -The person who created this package is Stefan Paquay, at the Eindhoven -University of Technology (TU/e), The Netherlands (s.paquay at tue.nl). -Contact him directly if you have questions. +"fix nve/manifold/rattle"_fix_nve_manifold_rattle.html +"fix nvt/manifold/rattle"_fix_nvt_manifold_rattle.html +examples/USER/manifold +http://lammps.sandia.gov/movies.html#manifold :ul :line -USER-MOLFILE package :link(USER-MOLFILE),h5 +USER-MOLFILE package :link(USER-MOLFILE),h4 -Supporting info: +[Contents:] -This package contains a dump molfile command which uses molfile -plugins that are bundled with the -"VMD"_http://www.ks.uiuc.edu/Research/vmd molecular visualization and -analysis program, to enable LAMMPS to dump its information in formats -compatible with various molecular simulation tools. +A "dump molfile"_dump_molfile.html command which uses molfile plugins +that are bundled with the "VMD"_vmd_home +molecular visualization and analysis program, to enable LAMMPS to dump +snapshots in formats compatible with various molecular simulation +tools. -The package only provides the interface code, not the plugins. These -can be obtained from a VMD installation which has to match the -platform that you are using to compile LAMMPS for. By adding plugins -to VMD, support for new file formats can be added to LAMMPS (or VMD or -other programs that use them) without having to recompile the -application itself. +:link(vmd_home,http://www.ks.uiuc.edu/Research/vmd) -See this doc page to get started: +To use this package you must have the desired VMD plugins available on +your system. -"dump molfile"_dump_molfile.html +Note that this package only provides the interface code, not the +plugins themselves, which will be accessed when requesting a specific +plugin via the "dump molfile"_dump_molfile.html command. Plugins can +be obtained from a VMD installation which has to match the platform +that you are using to compile LAMMPS for. By adding plugins to VMD, +support for new file formats can be added to LAMMPS (or VMD or other +programs that use them) without having to recompile the application +itself. More information about the VMD molfile plugins can be found +at +"http://www.ks.uiuc.edu/Research/vmd/plugins/molfile"_http://www.ks.uiuc.edu/Research/vmd/plugins/molfile. -The person who created this package is Axel Kohlmeyer at Temple U -(akohlmey at gmail.com). Contact him directly if you have questions. +[Author:] Axel Kohlmeyer (Temple U). + +[Install or un-install:] + +Note that the lib/molfile/Makefile.lammps file has a setting for a +dynamic loading library libdl.a that should is typically present on +all systems, which is required for LAMMPS to link with this package. +If the setting is not valid for your system, you will need to edit the +Makefile.lammps file. See lib/molfile/README and +lib/molfile/Makefile.lammps for details. + +make yes-user-molfile +make machine :pre + +make no-user-molfile +make machine :pre + +[Supporting info:] + +src/USER-MOLFILE: filenames -> commands +src/USER-MOLFILE/README +lib/molfile/README +"dump molfile"_dump_molfile.html :ul :line -USER-NC-DUMP package :link(USER-NC-DUMP),h5 +USER-NETCDF package :link(USER-NETCDF),h4 -Contents: Dump styles for writing NetCDF format files. NetCDF is a binary, -portable, self-describing file format on top of HDF5. The file format -contents follow the AMBER NetCDF trajectory conventions -(http://ambermd.org/netcdf/nctraj.xhtml), but include extensions to this -convention. This package implements a "dump nc"_dump_nc.html command -and a "dump nc/mpiio"_dump_nc.html command to output LAMMPS snapshots -in this format. See src/USER-NC-DUMP/README for more details. +[Contents:] -NetCDF files can be directly visualized with the following tools: +Dump styles for writing NetCDF formatted dump files. NetCDF is a +portable, binary, self-describing file format developed on top of +HDF5. The file contents follow the AMBER NetCDF trajectory conventions +(http://ambermd.org/netcdf/nctraj.xhtml), but include extensions. -Ovito (http://www.ovito.org/). Ovito supports the AMBER convention -and all of the above extensions. :ulb,l -VMD (http://www.ks.uiuc.edu/Research/vmd/) :l -AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye contains -a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule +To use this package you must have the NetCDF library available on your +system. -The person who created these files is Lars Pastewka at -Karlsruhe Institute of Technology (lars.pastewka at kit.edu). -Contact him directly if you have questions. +Note that NetCDF files can be directly visualized with the following +tools: + +"Ovito"_ovito (Ovito supports the AMBER convention and the extensions mentioned above) +"VMD"_vmd_home +"AtomEye"_atomeye (the libAtoms version of AtomEye contains a NetCDF reader not present in the standard distribution) :ul + +:link(ovito,http://www.ovito.org) +:link(atomeye,http://www.libatoms.org) + +[Author:] Lars Pastewka (Karlsruhe Institute of Technology). + +[Install or un-install:] + +Note that to follow these steps, you need the standard NetCDF software +package installed on your system. The lib/netcdf/Makefile.lammps file +has settings for NetCDF include and library files that LAMMPS needs to +compile and linkk with this package. If the settings are not valid +for your system, you will need to edit the Makefile.lammps file. See +lib/netcdf/README for details. + +make yes-user-netcdf +make machine :pre + +make no-user-netcdf +make machine :pre + +[Supporting info:] + +src/USER-NETCDF: filenames -> commands +src/USER-NETCDF/README +lib/netcdf/README +"dump netcdf"_dump_netcdf.html :ul :line -USER-OMP package :link(USER-OMP),h5 +USER-OMP package :link(USER-OMP),h4 -Supporting info: +[Contents:] -This package provides OpenMP multi-threading support and -other optimizations of various LAMMPS pair styles, dihedral -styles, and fix styles. +Hundreds of pair, fix, compute, bond, angle, dihedral, improper, and +kspace styles which are altered to enable threading on many-core CPUs +via OpenMP directives. All of them have an "omp" in their style name. +"Section 5.3.4"_accelerate_omp.html gives details of what hardware and +compilers are required on your system, and how to build and use this +package. Its styles can be invoked at run time via the "-sf omp" or +"-suffix omp" "command-line switches"_Section_start.html#start_7. +Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and +"USER-INTEL"_#USER-INTEL packages, which have styles optimized for +CPUs. -See this section of the manual to get started: +[Author:] Axel Kohlmeyer (Temple U). -"Section 5.3"_Section_accelerate.html#acc_3 +NOTE: The compile flags "-restrict" and "-fopenmp" must be used to +build LAMMPS with the USER-OMP package, as well as the link flag +"-fopenmp". They should be added to the CCFLAGS and LINKFLAGS lines +of your Makefile.machine. See src/MAKE/OPTIONS/Makefile.omp for an +example. -The person who created this package is Axel Kohlmeyer at Temple U -(akohlmey at gmail.com). Contact him directly if you have questions. +Once you have an appropriate Makefile.machine, you can +install/un-install the package and build LAMMPS in the usual manner: -For the USER-OMP package, your Makefile.machine needs additional -settings for CCFLAGS and LINKFLAGS. +[Install or un-install:] + +make yes-user-omp +make machine :pre + +make no-user-omp +make machine :pre CCFLAGS: add -fopenmp and -restrict LINKFLAGS: add -fopenmp :ul -Examples: examples/accelerate, bench/KEPLER +[Supporting info:] + +src/USER-OMP: filenames -> commands +src/USER-OMP/README +"Section 5.3"_Section_accelerate.html#acc_3 +"Section 5.3.4"_accelerate_omp.html +"Section 2.7 -sf omp"_Section_start.html#start_7 +"Section 2.7 -pk omp"_Section_start.html#start_7 +"package omp"_package.html +Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (o) +"Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul :line -USER-PHONON package :link(USER-PHONON),h5 +USER-PHONON package :link(USER-PHONON),h4 -This package contains a fix phonon command that calculates dynamical +[Contents:] + +A "fix phonon"_fix_phonon.html command that calculates dynamical matrices, which can then be used to compute phonon dispersion relations, directly from molecular dynamics simulations. -See this doc page to get started: +[Author:] Ling-Ti Kong (Shanghai Jiao Tong University). +[Install or un-install:] + +make yes-user-phonon +make machine :pre + +make no-user-phonon +make machine :pre + +[Supporting info:] + +src/USER-PHONON: filenames -> commands +src/USER-PHONON/README "fix phonon"_fix_phonon.html - -The person who created this package is Ling-Ti Kong (konglt at -sjtu.edu.cn) at Shanghai Jiao Tong University. Contact him directly -if you have questions. - -Examples: examples/USER/phonon +examples/USER/phonon :ul :line -USER-QMMM package :link(USER-QMMM),h5 +USER-QMMM package :link(USER-QMMM),h4 -Supporting info: +[Contents:] -This package provides a fix qmmm command which allows LAMMPS to be -used in a QM/MM simulation, currently only in combination with pw.x -code from the "Quantum ESPRESSO"_espresso package. +A "fix qmmm"_fix_qmmm.html command which allows LAMMPS to be used in a +QM/MM simulation, currently only in combination with the "Quantum +ESPRESSO"_espresso package. :link(espresso,http://www.quantum-espresso.org) +To use this package you must have Quantum ESPRESSO available on your +system. + The current implementation only supports an ONIOM style mechanical coupling to the Quantum ESPRESSO plane wave DFT package. Electrostatic coupling is in preparation and the interface has been written in a manner that coupling to other QM codes should be possible without changes to LAMMPS itself. -See this doc page to get started: +[Author:] Axel Kohlmeyer (Temple U). -"fix qmmm"_fix_qmmm.html +[Install or un-install:] -as well as the lib/qmmm/README file. +Before building LAMMPS with this package, you must first build the +QMMM library in lib/qmmm. You can do this manually if you prefer; +follow the first two steps explained in lib/colvars/README. You can +also do it in one step from the lammps/src dir, using a command like +these, which simply invoke the lib/colvars/Install.py script with the +specified args: -The person who created this package is Axel Kohlmeyer at Temple U -(akohlmey at gmail.com). Contact him directly if you have questions. +make lib-qmmm # print help message +make lib-qmmm args="-m gfortran" # build with GNU Fortran compiler :pre + +The build should produce two files: lib/qmmm/libqmmm.a and +lib/qmmm/Makefile.lammps. The latter is copied from an existing +Makefile.lammps.* and has settings needed to build LAMMPS with the +QMMM library (though typically the settings are just blank). If +necessary, you can edit/create a new lib/qmmm/Makefile.machine file +for your system, which should define an EXTRAMAKE variable to specify +a corresponding Makefile.lammps.machine file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-qmmm +make machine :pre + +make no-user-qmmm +make machine :pre + +NOTE: The LAMMPS executable these steps produce is not yet functional +for a QM/MM simulation. You must also build Quantum ESPRESSO and +create a new executable which links LAMMPS and Quanutm ESPRESSO +together. These are steps 3 and 4 described in the lib/qmmm/README +file. + +[Supporting info:] + +src/USER-QMMM: filenames -> commands +src/USER-QMMM/README +lib/qmmm/README +"fix phonon"_fix_phonon.html +lib/qmmm/example-ec/README +lib/qmmm/example-mc/README :ul :line -USER-QTB package :link(USER-QTB),h5 +USER-QTB package :link(USER-QTB),h4 -Supporting info: +[Contents:] -This package provides a self-consistent quantum treatment of the +Two fixes which provide a self-consistent quantum treatment of vibrational modes in a classical molecular dynamics simulation. By coupling the MD simulation to a colored thermostat, it introduces zero -point energy into the system, alter the energy power spectrum and the -heat capacity towards their quantum nature. This package could be of -interest if one wants to model systems at temperatures lower than -their classical limits or when temperatures ramp up across the -classical limits in the simulation. +point energy into the system, altering the energy power spectrum and +the heat capacity to account for their quantum nature. This is useful +when modeling systems at temperatures lower than their classical +limits or when temperatures ramp across the classical limits in a +simulation. -See these two doc pages to get started: +[Author:] Yuan Shen (Stanford U). -"fix qtb"_fix_qtb.html provides quantum nulcear correction through a -colored thermostat and can be used with other time integration schemes -like "fix nve"_fix_nve.html or "fix nph"_fix_nh.html. +[Install or un-install:] + +make yes-user-qtb +make machine :pre + +make no-user-qtb +make machine :pre + +[Supporting info:] -"fix qbmsst"_fix_qbmsst.html enables quantum nuclear correction of a -multi-scale shock technique simulation by coupling the quantum thermal -bath with the shocked system. - -The person who created this package is Yuan Shen (sy0302 at -stanford.edu) at Stanford University. Contact him directly if you -have questions. - -Examples: examples/USER/qtb +src/USER-QTB: filenames -> commands +src/USER-QTB/README +"fix qtb"_fix_qtb.html +"fix qbmsst"_fix_qbmsst.html +examples/USER/qtb :ul :line -USER-QUIP package :link(USER-QUIP),h5 +USER-QUIP package :link(USER-QUIP),h4 -Supporting info: +[Contents:] -Examples: examples/USER/quip +A "pair_style quip"_pair_quip.html command which wraps the "QUIP +libAtoms library"_quip, which includes a variety of interatomic +potentials, including Gaussian Approximation Potential (GAP) models +developed by the Cambridge University group. + +:link(quip,https://github.com/libAtoms/QUIP) + +To use this package you must have the QUIP libAatoms library available +on your system. + +[Author:] Albert Bartok (Cambridge University) + +[Install or un-install:] + +Note that to follow these steps to compile and link to the QUIP +library, you must first download and build QUIP on your systems. It +can be obtained from GitHub. See step 1 and step 1.1 in the +lib/quip/README file for details on how to do this. Note that it +requires setting two environment variables, QUIP_ROOT and QUIP_ARCH, +which will be accessed by the lib/quip/Makefile.lammps file which is +used when you compile and link LAMMPS with this package. You should +only need to edit this file if the LAMMPS build can not use its +settings to successfully build on your system. + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-quip +make machine :pre + +make no-user-quip +make machine :pre + +[Supporting info:] + +src/USER-QUIP: filenames -> commands +src/USER-QUIP/README +"pair_style quip"_pair_quip.html +examples/USER/quip :ul :line -USER-REAXC package :link(USER-REAXC),h5 +USER-REAXC package :link(USER-REAXC),h4 -Supporting info: +[Contents:] -This package contains a implementation for LAMMPS of the ReaxFF force -field. ReaxFF uses distance-dependent bond-order functions to -represent the contributions of chemical bonding to the potential -energy. It was originally developed by Adri van Duin and the Goddard -group at CalTech. +A pair style which implements the ReaxFF potential in C/C++ (in +contrast to the "REAX package"_#REAX and its Fortran library). ReaxFF +is universal reactive force field. See the src/USER-REAXC/README file +for more info on differences between the two packages. Also two fixes +for monitoring molecules as bonds are created and destroyed. -The USER-REAXC version of ReaxFF (pair_style reax/c), implemented in -C, should give identical or very similar results to pair_style reax, -which is a ReaxFF implementation on top of a Fortran library, a -version of which library was originally authored by Adri van Duin. +[Author:] Hasan Metin Aktulga (MSU) while at Purdue University. -The reax/c version should be somewhat faster and more scalable, -particularly with respect to the charge equilibration calculation. It -should also be easier to build and use since there are no complicating -issues with Fortran memory allocation or linking to a Fortran library. +[Install or un-install:] + +make yes-user-reaxc +make machine :pre + +make no-user-reaxc +make machine :pre + +[Supporting info:] -For technical details about this implementation of ReaxFF, see -this paper: - -Parallel and Scalable Reactive Molecular Dynamics: Numerical Methods -and Algorithmic Techniques, H. M. Aktulga, J. C. Fogarty, -S. A. Pandit, A. Y. Grama, Parallel Computing, in press (2011). - -See the doc page for the pair_style reax/c command for details -of how to use it in LAMMPS. - -The person who created this package is Hasan Metin Aktulga (hmaktulga -at lbl.gov), while at Purdue University. Contact him directly, or -Aidan Thompson at Sandia (athomps at sandia.gov), if you have -questions. - -Examples: examples/reax +src/USER-REAXC: filenames -> commands +src/USER-REAXC/README +"pair_style reax/c"_pair_reaxc.html +"fix reax/c/bonds"_fix_reax_bonds.html +"fix reax/c/species"_fix_reaxc_species.html +examples/reax :ul :line -USER-SMD package :link(USER-SMD),h5 +USER-SMD package :link(USER-SMD),h4 -Supporting info: +[Contents:] -This package implements smoothed Mach dynamics (SMD) in -LAMMPS. Currently, the package has the following features: +An atom style, fixes, computes, and several pair styles which +implements smoothed Mach dynamics (SMD) for solids, which is a model +related to smoothed particle hydrodynamics (SPH) for liquids (see the +"USER-SPH package"_#USER-SPH). -* Does liquids via traditional Smooth Particle Hydrodynamics (SPH) +This package solves solids mechanics problems via a state of the art +stabilized meshless method with hourglass control. It can specify +hydrostatic interactions independently from material strength models, +i.e. pressure and deviatoric stresses are separated. It provides many +material models (Johnson-Cook, plasticity with hardening, +Mie-Grueneisen, Polynomial EOS) and allows new material models to be +added. It implements rigid boundary conditions (walls) which can be +specified as surface geometries from *.STL files. -* Also solves solids mechanics problems via a state of the art - stabilized meshless method with hourglass control. +[Author:] Georg Ganzenmuller (Fraunhofer-Institute for High-Speed +Dynamics, Ernst Mach Institute, Germany). -* Can specify hydrostatic interactions independently from material - strength models, i.e. pressure and deviatoric stresses are separated. +[Install or un-install:] -* Many material models available (Johnson-Cook, plasticity with - hardening, Mie-Grueneisen, Polynomial EOS). Easy to add new - material models. +Before building LAMMPS with this package, you must first download the +Eigen library. Eigen is a template library, so you do not need to +build it, just download it. You can do this manually if you prefer; +follow the instructions in lib/smd/README. You can also do it in one +step from the lammps/src dir, using a command like these, which simply +invoke the lib/smd/Install.py script with the specified args: -* Rigid boundary conditions (walls) can be loaded as surface geometries - from *.STL files. +make lib-smd # print help message +make lib-smd args="-g -l" # download in default lib/smd/eigen-eigen-* +make lib-smd args="-h . eigen -g -l" # download in lib/smd/eigen +make lib-smd args="-h ~ eigen -g -l" # download and build in ~/eigen :pre -See the file doc/PDF/SMD_LAMMPS_userguide.pdf to get started. +Note that the final -l switch is to create a symbolic (soft) link +named "includelink" in lib/smd to point to the Eigen dir. When LAMMPS +builds it will use this link. You should not need to edit the +lib/smd/Makefile.lammps file. -There are example scripts for using this package in examples/USER/smd. +You can then install/un-install the package and build LAMMPS in the +usual manner: -The person who created this package is Georg Ganzenmuller at the -Fraunhofer-Institute for High-Speed Dynamics, Ernst Mach Institute in -Germany (georg.ganzenmueller at emi.fhg.de). Contact him directly if -you have questions. +make yes-user-smd +make machine :pre + +make no-user-smd +make machine :pre + +[Supporting info:] -Examples: examples/USER/smd +src/USER-SMD: filenames -> commands +src/USER-SMD/README +doc/PDF/SMD_LAMMPS_userguide.pdf +examples/USER/smd +http://lammps.sandia.gov/movies.html#smd :ul :line -USER-SMTBQ package :link(USER-SMTBQ),h5 +USER-SMTBQ package :link(USER-SMTBQ),h4 -Supporting info: +[Contents:] -This package implements the Second Moment Tight Binding - QEq (SMTB-Q) -potential for the description of ionocovalent bonds in oxides. +A pair style which implements a Second Moment Tight Binding model with +QEq charge equilibration (SMTBQ) potential for the description of +ionocovalent bonds in oxides. -There are example scripts for using this package in -examples/USER/smtbq. +[Authors:] Nicolas Salles, Emile Maras, Olivier Politano, and Robert +Tetot (LAAS-CNRS, France). -See this doc page to get started: +[Install or un-install:] + +make yes-user-smtbq +make machine :pre + +make no-user-smtbq +make machine :pre + +[Supporting info:] +src/USER-SMTBQ: filenames -> commands +src/USER-SMTBQ/README "pair_style smtbq"_pair_smtbq.html - -The persons who created the USER-SMTBQ package are Nicolas Salles, -Emile Maras, Olivier Politano, Robert Tetot, who can be contacted at -these email addresses: lammps@u-bourgogne.fr, nsalles@laas.fr. Contact -them directly if you have any questions. - -Examples: examples/USER/smtbq +examples/USER/smtbq :ul :line -USER-SPH package :link(USER-SPH),h5 +USER-SPH package :link(USER-SPH),h4 -Supporting info: +[Contents:] -This package implements smoothed particle hydrodynamics (SPH) in -LAMMPS. Currently, the package has the following features: +An atom style, fixes, computes, and several pair styles which +implements smoothed particle hydrodynamics (SPH) for liquids. See the +related "USER-SMD package"_#USER-SMD package for smooth Mach dynamics +(SMD) for solids. -* Tait, ideal gas, Lennard-Jones equation of states, full support for - complete (i.e. internal-energy dependent) equations of state +This package contains ideal gas, Lennard-Jones equation of states, +Tait, and full support for complete (i.e. internal-energy dependent) +equations of state. It allows for plain or Monaghans XSPH integration +of the equations of motion. It has options for density continuity or +density summation to propagate the density field. It has +"set"_set.html command options to set the internal energy and density +of particles from the input script and allows the same quantities to +be output with thermodynamic output or to dump files via the "compute +property/atom"_compute_property_atom.html command. -* Plain or Monaghans XSPH integration of the equations of motion +[Author:] Georg Ganzenmuller (Fraunhofer-Institute for High-Speed +Dynamics, Ernst Mach Institute, Germany). -* Density continuity or density summation to propagate the density field +[Install or un-install:] + +make yes-user-sph +make machine :pre + +make no-user-sph +make machine :pre + +[Supporting info:] -* Commands to set internal energy and density of particles from the - input script - -* Output commands to access internal energy and density for dumping and - thermo output - -See the file doc/PDF/SPH_LAMMPS_userguide.pdf to get started. - -There are example scripts for using this package in examples/USER/sph. - -The person who created this package is Georg Ganzenmuller at the -Fraunhofer-Institute for High-Speed Dynamics, Ernst Mach Institute in -Germany (georg.ganzenmueller at emi.fhg.de). Contact him directly if -you have questions. - -Examples: examples/USER/sph +src/USER-SPH: filenames -> commands +src/USER-SPH/README +doc/PDF/SPH_LAMMPS_userguide.pdf +examples/USER/sph +http://lammps.sandia.gov/movies.html#sph :ul :line -USER-TALLY package :link(USER-TALLY),h5 +USER-TALLY package :link(USER-TALLY),h4 -Supporting info: +[Contents:] -Examples: examples/USER/tally +Several compute styles that can be called when pairwise interactions +are calculated to tally information (forces, heat flux, energy, +stress, etc) about individual interactions. + +[Author:] Axel Kohlmeyer (Temple U). + +[Install or un-install:] + +make yes-user-tally +make machine :pre + +make no-user-tally +make machine :pre + +[Supporting info:] + +src/USER-TALLY: filenames -> commands +src/USER-TALLY/README +"compute */tally"_compute_tally.html +examples/USER/tally :ul :line -USER-VTK package :link(USER-VTK),h5 +USER-VTK package :link(USER-VTK),h4 +[Contents:] + +A "dump vtk"_dump_vtk.html command which outputs +snapshot info in the "VTK format"_vtk, enabling visualization by +"Paraview"_paraview or other visuzlization packages. + +:link(vtk,http://www.vtk.org) +:link(paraview,http://www.paraview.org) + +To use this package you must have VTK library available on your +system. + +[Authors:] Richard Berger (JKU) and Daniel Queteschiner (DCS Computing). + +[Install or un-install:] + +The lib/vtk/Makefile.lammps file has settings for accessing VTK files +and its library, which are required for LAMMPS to build and link with +this package. If the settings are not valid for your system, check if +one of the other lib/vtk/Makefile.lammps.* files is compatible and +copy it to Makefile.lammps. If none of the provided files work, you +will need to edit the Makefile.lammps file. + +You can then install/un-install the package and build LAMMPS in the +usual manner: + +make yes-user-vtk +make machine :pre + +make no-user-vtk +make machine :pre + +[Supporting info:] + +src/USER-VTK: filenames -> commands +src/USER-VTK/README +lib/vtk/README +"dump vtk"_dump_vtk.html :ul diff --git a/doc/src/Section_python.txt b/doc/src/Section_python.txt index ff26d18e06..718e9e229c 100644 --- a/doc/src/Section_python.txt +++ b/doc/src/Section_python.txt @@ -118,18 +118,21 @@ check which version of Python you have installed, by simply typing 11.2 Overview of using Python from a LAMMPS script :link(py_2),h4 -NOTE: It is not currently possible to use the "python"_python.html -command described in this section with Python 3, only with Python 2. -The C API changed from Python 2 to 3 and the LAMMPS code is not -compatible with both. +LAMMPS has several commands which can be used to invoke Python +code directly from an input script: -LAMMPS has a "python"_python.html command which can be used in an -input script to define and execute a Python function that you write -the code for. The Python function can also be assigned to a LAMMPS -python-style variable via the "variable"_variable.html command. Each -time the variable is evaluated, either in the LAMMPS input script -itself, or by another LAMMPS command that uses the variable, this will -trigger the Python function to be invoked. +"python"_python.html +"variable python"_variable.html +"fix python"_fix_python.html +"pair_style python"_pair_python.html :ul + +The "python"_python.html command which can be used to define and +execute a Python function that you write the code for. The Python +function can also be assigned to a LAMMPS python-style variable via +the "variable"_variable.html command. Each time the variable is +evaluated, either in the LAMMPS input script itself, or by another +LAMMPS command that uses the variable, this will trigger the Python +function to be invoked. The Python code for the function can be included directly in the input script or in an auxiliary file. The function can have arguments which @@ -162,8 +165,16 @@ doc page for its python-style variables for more info, including examples of Python code you can write for both pure Python operations and callbacks to LAMMPS. -To run pure Python code from LAMMPS, you only need to build LAMMPS -with the PYTHON package installed: +The "fix python"_fix_python.html command can execute +Python code at selected timesteps during a simulation run. + +The "pair_style python"_pair_python command allows you to define +pairwise potentials as python code which encodes a single pairwise +interaction. This is useful for rapid-developement and debugging of a +new potential. + +To use any of these commands, you only need to build LAMMPS with the +PYTHON package installed: make yes-python make machine :pre diff --git a/doc/src/Section_start.txt b/doc/src/Section_start.txt index 47643569e6..dcd320655f 100644 --- a/doc/src/Section_start.txt +++ b/doc/src/Section_start.txt @@ -14,12 +14,11 @@ experienced users. 2.1 "What's in the LAMMPS distribution"_#start_1 2.2 "Making LAMMPS"_#start_2 2.3 "Making LAMMPS with optional packages"_#start_3 -2.4 "Building LAMMPS via the Make.py script"_#start_4 -2.5 "Building LAMMPS as a library"_#start_5 -2.6 "Running LAMMPS"_#start_6 -2.7 "Command-line options"_#start_7 -2.8 "Screen output"_#start_8 -2.9 "Tips for users of previous versions"_#start_9 :all(b) +2.5 "Building LAMMPS as a library"_#start_4 +2.6 "Running LAMMPS"_#start_5 +2.7 "Command-line options"_#start_6 +2.8 "Screen output"_#start_7 +2.9 "Tips for users of previous versions"_#start_8 :all(b) :line @@ -80,7 +79,7 @@ This section has the following sub-sections: Read this first :h5,link(start_2_1) -If you want to avoid building LAMMPS yourself, read the preceding +If you want to avoid building LAMMPS yourself, read the preceeding section about options available for downloading and installing executables. Details are discussed on the "download"_download page. @@ -96,7 +95,7 @@ make serial :pre Note that on a facility supercomputer, there are often "modules" loaded in your environment that provide the compilers and MPI you should use. In this case, the "mpicxx" compile/link command in -Makefile.mpi should just work by accessing those modules. +Makefile.mpi should simply work by accessing those modules. It may be the case that one of the other Makefile.machine files in the src/MAKE sub-directories is a better match to your system (type "make" @@ -107,33 +106,35 @@ make stampede :pre If any of these builds (with an existing Makefile.machine) works on your system, then you're done! +If you need to install an optional package with a LAMMPS command you +want to use, and the package does not depend on an extra library, you +can simply type + +make name :pre + +before invoking (or re-invoking) the above steps. "Name" is the +lower-case name of the package, e.g. replica or user-misc. + If you want to do one of the following: -use optional LAMMPS features that require additional libraries -use optional packages that require additional libraries -use optional accelerator packages that require special compiler/linker settings -run on a specialized platform that has its own compilers, settings, or other libs to use :ul +use a LAMMPS command that requires an extra library (e.g. "dump image"_dump_image.html) +build with a package that requires an extra library +build with an accelerator package that requires special compiler/linker settings +run on a machine that has its own compilers, settings, or libraries :ul then building LAMMPS is more complicated. You may need to find where -auxiliary libraries exist on your machine or install them if they -don't. You may need to build additional libraries that are part of -the LAMMPS package, before building LAMMPS. You may need to edit a +extra libraries exist on your machine or install them if they don't. +You may need to build extra libraries that are included in the LAMMPS +distribution, before building LAMMPS itself. You may need to edit a Makefile.machine file to make it compatible with your system. -Note that there is a Make.py tool in the src directory that automates -several of these steps, but you still have to know what you are doing. -"Section 2.4"_#start_4 below describes the tool. It is a convenient -way to work with installing/un-installing various packages, the -Makefile.machine changes required by some packages, and the auxiliary -libraries some of them use. - Please read the following sections carefully. If you are not comfortable with makefiles, or building codes on a Unix platform, or running an MPI job on your machine, please find a local expert to help -you. Many compilation, linking, and run problems that users have are -often not really LAMMPS issues - they are peculiar to the user's -system, compilers, libraries, etc. Such questions are better answered -by a local expert. +you. Many compilation, linking, and run problems users experience are +often not LAMMPS issues - they are peculiar to the user's system, +compilers, libraries, etc. Such questions are better answered by a +local expert. If you have a build problem that you are convinced is a LAMMPS issue (e.g. the compiler complains about a line of LAMMPS source code), then @@ -251,7 +252,7 @@ re-compile, after typing "make clean" (which will describe different clean options). The LMP_INC variable is used to include options that turn on ifdefs -within the LAMMPS code. The options that are currently recognized are: +within the LAMMPS code. The options that are currently recogized are: -DLAMMPS_GZIP -DLAMMPS_JPEG @@ -362,7 +363,7 @@ installed on your platform. If MPI is installed on your system in the usual place (under /usr/local), you also may not need to specify these 3 variables, assuming /usr/local is in your path. On some large parallel machines which use "modules" for their compile/link -environments, you may simply need to include the correct module in +environements, you may simply need to include the correct module in your build environment, before building LAMMPS. Or the parallel machine may have a vendor-provided MPI which the compiler has no trouble finding. @@ -430,7 +431,7 @@ use the KISS library described above. You may also need to set the FFT_INC, FFT_PATH, and FFT_LIB variables, so the compiler and linker can find the needed FFT header and library files. Note that on some large parallel machines which use "modules" -for their compile/link environments, you may simply need to include +for their compile/link environements, you may simply need to include the correct module in your build environment. Or the parallel machine may have a vendor-provided FFT library which the compiler has no trouble finding. @@ -450,7 +451,7 @@ you must also manually specify the correct library, namely -lsfftw or The FFT_INC variable also allows for a -DFFT_SINGLE setting that will use single-precision FFTs with PPPM, which can speed-up long-range -calculations, particularly in parallel or on GPUs. Fourier transform +calulations, particularly in parallel or on GPUs. Fourier transform and related PPPM operations are somewhat insensitive to floating point truncation errors and thus do not always need to be performed in double precision. Using the -DFFT_SINGLE setting trades off a little @@ -508,13 +509,13 @@ You should get the executable lmp_foo when the build is complete. Errors that can occur when making LAMMPS: h5 :link(start_2_3) -NOTE: If an error occurs when building LAMMPS, the compiler or linker -will state very explicitly what the problem is. The error message -should give you a hint as to which of the steps above has failed, and -what you need to do in order to fix it. Building a code with a -Makefile is a very logical process. The compiler and linker need to -find the appropriate files and those files need to be compatible with -LAMMPS source files. When a make fails, there is usually a very +If an error occurs when building LAMMPS, the compiler or linker will +state very explicitly what the problem is. The error message should +give you a hint as to which of the steps above has failed, and what +you need to do in order to fix it. Building a code with a Makefile is +a very logical process. The compiler and linker need to find the +appropriate files and those files need to be compatible with LAMMPS +settings and source files. When a make fails, there is usually a very simple reason, which you or a local expert will need to fix. Here are two non-obvious errors that can occur: @@ -557,7 +558,8 @@ Typing "make clean-all" or "make clean-machine" will delete *.o object files created when LAMMPS is built, for either all builds or for a particular machine. -Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6 +Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or +-DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6 As explained above, any of these 3 settings can be specified on the LMP_INC line in your low-level src/MAKE/Makefile.foo. @@ -653,13 +655,7 @@ This section has the following sub-sections: 2.3.1 "Package basics"_#start_3_1 2.3.2 "Including/excluding packages"_#start_3_2 -2.3.3 "Packages that require extra libraries"_#start_3_3 -2.3.4 "Packages that require Makefile.machine settings"_#start_3_4 :all(b) - -Note that the following "Section 2.4"_#start_4 describes the Make.py -tool which can be used to install/un-install packages and build the -auxiliary libraries which some of them use. It can also auto-edit a -Makefile.machine to add settings needed by some packages. +2.3.3 "Packages that require extra libraries"_#start_3_3 :all(b) :line @@ -670,235 +666,221 @@ are always included, plus optional packages. Packages are groups of files that enable a specific set of features. For example, force fields for molecular systems or granular systems are in packages. -"Section 4"_Section_packages.html in the manual has details -about all the packages, including specific instructions for building -LAMMPS with each package, which are covered in a more general manner +"Section 4"_Section_packages.html in the manual has details about all +the packages, which come in two flavors: [standard] and [user] +packages. It also has specific instructions for building LAMMPS with +any package which requires an extra library. General instructions are below. You can see the list of all packages by typing "make package" from -within the src directory of the LAMMPS distribution. This also lists -various make commands that can be used to manipulate packages. +within the src directory of the LAMMPS distribution. It will also +list various make commands that can be used to manage packages. If you use a command in a LAMMPS input script that is part of a package, you must have built LAMMPS with that package, else you will get an error that the style is invalid or the command is unknown. -Every command's doc page specifies if it is part of a package. You can -also type +Every command's doc page specfies if it is part of a package. You can +type lmp_machine -h :pre to run your executable with the optional "-h command-line -switch"_#start_7 for "help", which will simply list the styles and -commands known to your executable, and immediately exit. - -There are two kinds of packages in LAMMPS, standard and user packages. -More information about the contents of standard and user packages is -given in "Section 4"_Section_packages.html of the manual. The -difference between standard and user packages is as follows: - -Standard packages, such as molecule or kspace, are supported by the -LAMMPS developers and are written in a syntax and style consistent -with the rest of LAMMPS. This means we will answer questions about -them, debug and fix them if necessary, and keep them compatible with -future changes to LAMMPS. - -User packages, such as user-atc or user-omp, have been contributed by -users, and always begin with the user prefix. If they are a single -command (single file), they are typically in the user-misc package. -Otherwise, they are a set of files grouped together which add a -specific functionality to the code. - -User packages don't necessarily meet the requirements of the standard -packages. If you have problems using a feature provided in a user -package, you may need to contact the contributor directly to get help. -Information on how to submit additions you make to LAMMPS as single -files or either a standard or user-contributed package are given in -"this section"_Section_modify.html#mod_15 of the documentation. +switch"_#start_7 for "help", which will list the styles and commands +known to your executable, and immediately exit. :line Including/excluding packages :h5,link(start_3_2) -To use (or not use) a package you must include it (or exclude it) -before building LAMMPS. From the src directory, this is typically as -simple as: +To use (or not use) a package you must install it (or un-install it) +before building LAMMPS. From the src directory, this is as simple as: make yes-colloid make mpi :pre or -make no-manybody +make no-user-omp make mpi :pre -NOTE: You should NOT include/exclude packages and build LAMMPS in a +NOTE: You should NOT install/un-install packages and build LAMMPS in a single make command using multiple targets, e.g. make yes-colloid mpi. This is because the make procedure creates a list of source files that will be out-of-date for the build if the package configuration changes within the same command. -Some packages have individual files that depend on other packages -being included. LAMMPS checks for this and does the right thing. -I.e. individual files are only included if their dependencies are -already included. Likewise, if a package is excluded, other files +Any package can be installed or not in a LAMMPS build, independent of +all other packages. However, some packages include files derived from +files in other packages. LAMMPS checks for this and does the right +thing. I.e. individual files are only included if their dependencies +are already included. Likewise, if a package is excluded, other files dependent on that package are also excluded. +NOTE: The one exception is that we do not recommend building with both +the KOKKOS package installed and any of the other acceleration +packages (GPU, OPT, USER-INTEL, USER-OMP) also installed. This is +because of how Kokkos sometimes builds using a wrapper compiler which +can make it difficult to invoke all the compile/link flags correctly +for both Kokkos and non-Kokkos files. + If you will never run simulations that use the features in a particular packages, there is no reason to include it in your build. -For some packages, this will keep you from having to build auxiliary -libraries (see below), and will also produce a smaller executable -which may run a bit faster. +For some packages, this will keep you from having to build extra +libraries, and will also produce a smaller executable which may run a +bit faster. -When you download a LAMMPS tarball, these packages are pre-installed -in the src directory: KSPACE, MANYBODY,MOLECULE, because they are so -commonly used. When you download LAMMPS source files from the SVN or -Git repositories, no packages are pre-installed. +When you download a LAMMPS tarball, three packages are pre-installed +in the src directory -- KSPACE, MANYBODY, MOLECULE -- because they are +so commonly used. When you download LAMMPS source files from the SVN +or Git repositories, no packages are pre-installed. -Packages are included or excluded by typing "make yes-name" or "make -no-name", where "name" is the name of the package in lower-case, e.g. -name = kspace for the KSPACE package or name = user-atc for the -USER-ATC package. You can also type "make yes-standard", "make -no-standard", "make yes-std", "make no-std", "make yes-user", "make -no-user", "make yes-lib", "make no-lib", "make yes-all", or "make -no-all" to include/exclude various sets of packages. Type "make -package" to see all of the package-related make options. +Packages are installed or un-installed by typing -NOTE: Inclusion/exclusion of a package works by simply moving files -back and forth between the main src directory and sub-directories with -the package name (e.g. src/KSPACE, src/USER-ATC), so that the files -are seen or not seen when LAMMPS is built. After you have included or -excluded a package, you must re-build LAMMPS. +make yes-name +make no-name :pre -Additional package-related make options exist to help manage LAMMPS -files that exist in both the src directory and in package -sub-directories. You do not normally need to use these commands -unless you are editing LAMMPS files or have downloaded a patch from -the LAMMPS WWW site. +where "name" is the name of the package in lower-case, e.g. name = +kspace for the KSPACE package or name = user-atc for the USER-ATC +package. You can also type any of these commands: -Typing "make package-update" or "make pu" will overwrite src files -with files from the package sub-directories if the package has been -included. It should be used after a patch is installed, since patches -only update the files in the package sub-directory, but not the src -files. Typing "make package-overwrite" will overwrite files in the -package sub-directories with src files. +make yes-all | install all packages +make no-all | un-install all packages +make yes-standard or make yes-std | install standard packages +make no-standard or make no-std| un-install standard packages +make yes-user | install user packages +make no-user | un-install user packages +make yes-lib | install packages that require extra libraries +make no-lib | un-install packages that require extra libraries +make yes-ext | install packages that require external libraries +make no-ext | un-install packages that require external libraries :tb(s=|) + +which install/un-install various sets of packages. Typing "make +package" will list all the these commands. + +NOTE: Installing or un-installing a package works by simply moving +files back and forth between the main src directory and +sub-directories with the package name (e.g. src/KSPACE, src/USER-ATC), +so that the files are included or excluded when LAMMPS is built. +After you have installed or un-installed a package, you must re-build +LAMMPS for the action to take effect. + +The following make commands help manage files that exist in both the +src directory and in package sub-directories. You do not normally +need to use these commands unless you are editing LAMMPS files or have +downloaded a patch from the LAMMPS web site. Typing "make package-status" or "make ps" will show which packages are -currently included. For those that are included, it will list any +currently installed. For those that are installed, it will list any files that are different in the src directory and package -sub-directory. Typing "make package-diff" lists all differences -between these files. Again, type "make package" to see all of the -package-related make options. +sub-directory. + +Typing "make package-update" or "make pu" will overwrite src files +with files from the package sub-directories if the package is +installed. It should be used after a patch has been applied, since +patches only update the files in the package sub-directory, but not +the src files. + +Typing "make package-overwrite" will overwrite files in the package +sub-directories with src files. + +Typing "make package-diff" lists all differences between these files. + +Again, just type "make package" to see all of the package-related make +options. :line Packages that require extra libraries :h5,link(start_3_3) -A few of the standard and user packages require additional auxiliary -libraries. Many of them are provided with LAMMPS, in which case they -must be compiled first, before LAMMPS is built, if you wish to include -that package. If you get a LAMMPS build error about a missing -library, this is likely the reason. See the -"Section 4"_Section_packages.html doc page for a list of -packages that have these kinds of auxiliary libraries. +A few of the standard and user packages require extra libraries. See +"Section 4"_Section_packages.html for two tables of packages which +indicate which ones require libraries. For each such package, the +Section 4 doc page gives details on how to build the extra library, +including how to download it if necessary. The basic ideas are +summarized here. -The lib directory in the distribution has sub-directories with package -names that correspond to the needed auxiliary libs, e.g. lib/gpu. -Each sub-directory has a README file that gives more details. Code -for most of the auxiliary libraries is included in that directory. -Examples are the USER-ATC and MEAM packages. +[System libraries:] -A few of the lib sub-directories do not include code, but do include -instructions (and sometimes scripts) that automate the process of -downloading the auxiliary library and installing it so LAMMPS can link -to it. Examples are the KIM, VORONOI, USER-MOLFILE, and USER-SMD -packages. +Packages in the tables "Section 4"_Section_packages.html with a "sys" +in the last column link to system libraries that typically already +exist on your machine. E.g. the python package links to a system +Python library. If your machine does not have the required library, +you will have to download and install it on your machine, in either +the system or user space. -The lib/python directory (for the PYTHON package) contains only a -choice of Makefile.lammps.* files. This is because no auxiliary code -or libraries are needed, only the Python library and other system libs -that should already available on your system. However, the -Makefile.lammps file is needed to tell LAMMPS which libs to use and -where to find them. +[Internal libraries:] -For libraries with provided code, the sub-directory README file -(e.g. lib/atc/README) has instructions on how to build that library. -This information is also summarized in "Section -4"_Section_packages.html. Typically this is done by typing -something like: +Packages in the tables "Section 4"_Section_packages.html with an "int" +in the last column link to internal libraries whose source code is +included with LAMMPS, in the lib/name directory where name is the +package name. You must first build the library in that directory +before building LAMMPS with that package installed. E.g. the gpu +package links to a library you build in the lib/gpu dir. You can +often do the build in one step by typing "make lib-name args=..." +from the src dir, with appropriate arguments. You can leave off the +args to see a help message. See "Section 4"_Section_packages.html for +details for each package. -make -f Makefile.g++ :pre +[External libraries:] -If one of the provided Makefiles is not appropriate for your system -you will need to edit or add one. Note that all the Makefiles have a -setting for EXTRAMAKE at the top that specifies a Makefile.lammps.* -file. +Packages in the tables "Section 4"_Section_packages.html with an "ext" +in the last column link to exernal libraries whose source code is not +included with LAMMPS. You must first download and install the library +before building LAMMPS with that package installed. E.g. the voronoi +package links to the freely available "Voro++ library"_voro_home2. You +can often do the download/build in one step by typing "make lib-name +args=..." from the src dir, with appropriate arguments. You can leave +off the args to see a help message. See "Section +4"_Section_packages.html for details for each package. -If the library build is successful, it will produce 2 files in the lib -directory: +:link(voro_home2,http://math.lbl.gov/voro++) -libpackage.a -Makefile.lammps :pre +[Possible errors:] -The Makefile.lammps file will typically be a copy of one of the -Makefile.lammps.* files in the library directory. +There are various common errors which can occur when building extra +libraries or when building LAMMPS with packages that require the extra +libraries. -Note that you must insure that the settings in Makefile.lammps are -appropriate for your system. If they are not, the LAMMPS build may -fail. To fix this, you can edit or create a new Makefile.lammps.* -file for your system, and copy it to Makefile.lammps. +If you cannot build the extra library itself successfully, you may +need to edit or create an appropriate Makefile for your machine, e.g. +with appropriate compiler or system settings. Provided makefiles are +typically in the lib/name directory. E.g. see the Makefile.* files in +lib/gpu. -As explained in the lib/package/README files, the settings in -Makefile.lammps are used to specify additional system libraries and -their locations so that LAMMPS can build with the auxiliary library. -For example, if the MEAM package is used, the auxiliary library -consists of F90 code, built with a Fortran complier. To link that -library with LAMMPS (a C++ code) via whatever C++ compiler LAMMPS is -built with, typically requires additional Fortran-to-C libraries be -included in the link. Another example are the BLAS and LAPACK -libraries needed to use the USER-ATC or USER-AWPMD packages. +The LAMMPS build often uses settings in a lib/name/Makefile.lammps +file which either exists in the LAMMPS distribution or is created or +copied from a lib/name/Makefile.lammps.* file when the library is +built. If those settings are not correct for your machine you will +need to edit or create an appropriate Makefile.lammps file. -For libraries without provided code, the sub-directory README file has -information on where to download the library and how to build it, -e.g. lib/voronoi/README and lib/smd/README. The README files also -describe how you must either (a) create soft links, via the "ln" -command, in those directories to point to where you built or installed -the packages, or (b) check or edit the Makefile.lammps file in the -same directory to provide that information. +Package-specific details for these steps are given in "Section +4"_Section_packages.html an in README files in the lib/name +directories. -Some of the sub-directories, e.g. lib/voronoi, also have an install.py -script which can be used to automate the process of -downloading/building/installing the auxiliary library, and setting the -needed soft links. Type "python install.py" for further instructions. +[Compiler options needed for accelerator packages:] -As with the sub-directories containing library code, if the soft links -or settings in the lib/package/Makefile.lammps files are not correct, -the LAMMPS build will typically fail. +Several packages contain code that is optimized for specific hardware, +e.g. CPU, KNL, or GPU. These are the OPT, GPU, KOKKOS, USER-INTEL, +and USER-OMP packages. Compiling and linking the source files in +these accelerator packages for optimal performance requires specific +settings in the Makefile.machine file you use. -:line - -Packages that require Makefile.machine settings :h5,link(start_3_4) - -A few packages require specific settings in Makefile.machine, to -either build or use the package effectively. These are the -USER-INTEL, KOKKOS, USER-OMP, and OPT packages, used for accelerating -code performance on CPUs or other hardware, as discussed in "Section -5.3"_Section_accelerate.html#acc_3. - -A summary of what Makefile.machine changes are needed for each of -these packages is given in "Section 4"_Section_packages.html. -The details are given on the doc pages that describe each of these -accelerator packages in detail: +A summary of the Makefile.machine settings needed for each of these +packages is given in "Section 4"_Section_packages.html. More info is +given on the doc pages that describe each package in detail: 5.3.1 "USER-INTEL package"_accelerate_intel.html +5.3.2 "GPU package"_accelerate_intel.html 5.3.3 "KOKKOS package"_accelerate_kokkos.html 5.3.4 "USER-OMP package"_accelerate_omp.html 5.3.5 "OPT package"_accelerate_opt.html :all(b) -You can also look at the following machine Makefiles in -src/MAKE/OPTIONS, which include the changes. Note that the USER-INTEL -and KOKKOS packages allow for settings that build LAMMPS for different -hardware. The USER-INTEL package builds for CPU and the Xeon Phi, the -KOKKOS package builds for OpenMP, GPUs (Cuda), and the Xeon Phi. +You can also use or examine the following machine Makefiles in +src/MAKE/OPTIONS, which include the settings. Note that the +USER-INTEL and KOKKOS packages can use settings that build LAMMPS for +different hardware. The USER-INTEL package can be compiled for Intel +CPUs and KNLs; the KOKKOS package builds for CPUs (OpenMP), GPUs +(Cuda), and Intel KNLs. Makefile.intel_cpu Makefile.intel_phi @@ -908,127 +890,9 @@ Makefile.kokkos_phi Makefile.omp Makefile.opt :ul -Also note that the Make.py tool, described in the next "Section -2.4"_#start_4 can automatically add the needed info to an existing -machine Makefile, using simple command-line arguments. - :line -2.4 Building LAMMPS via the Make.py tool :h4,link(start_4) - -The src directory includes a Make.py script, written in Python, which -can be used to automate various steps of the build process. It is -particularly useful for working with the accelerator packages, as well -as other packages which require auxiliary libraries to be built. - -The goal of the Make.py tool is to allow any complex multi-step LAMMPS -build to be performed as a single Make.py command. And you can -archive the commands, so they can be re-invoked later via the -r -(redo) switch. If you find some LAMMPS build procedure that can't be -done in a single Make.py command, let the developers know, and we'll -see if we can augment the tool. - -You can run Make.py from the src directory by typing either: - -Make.py -h -python Make.py -h :pre - -which will give you help info about the tool. For the former to work, -you may need to edit the first line of Make.py to point to your local -Python. And you may need to insure the script is executable: - -chmod +x Make.py :pre - -Here are examples of build tasks you can perform with Make.py: - -Install/uninstall packages: Make.py -p no-lib kokkos omp intel -Build specific auxiliary libs: Make.py -a lib-atc lib-meam -Build libs for all installed packages: Make.py -p cuda gpu -gpu mode=double arch=31 -a lib-all -Create a Makefile from scratch with compiler and MPI settings: Make.py -m none -cc g++ -mpi mpich -a file -Augment Makefile.serial with settings for installed packages: Make.py -p intel -intel cpu -m serial -a file -Add JPG and FFTW support to Makefile.mpi: Make.py -m mpi -jpg -fft fftw -a file -Build LAMMPS with a parallel make using Makefile.mpi: Make.py -j 16 -m mpi -a exe -Build LAMMPS and libs it needs using Makefile.serial with accelerator settings: Make.py -p gpu intel -intel cpu -a lib-all file serial :tb(s=:) - -The bench and examples directories give Make.py commands that can be -used to build LAMMPS with the various packages and options needed to -run all the benchmark and example input scripts. See these files for -more details: - -bench/README -bench/FERMI/README -bench/KEPLER/README -bench/PHI/README -examples/README -examples/accelerate/README -examples/accelerate/make.list :ul - -All of the Make.py options and syntax help can be accessed by using -the "-h" switch. - -E.g. typing "Make.py -h" gives - -Syntax: Make.py switch args ... - switches can be listed in any order - help switch: - -h prints help and syntax for all other specified switches - switch for actions: - -a lib-all, lib-dir, clean, file, exe or machine - list one or more actions, in any order - machine is a Makefile.machine suffix, must be last if used - one-letter switches: - -d (dir), -j (jmake), -m (makefile), -o (output), - -p (packages), -r (redo), -s (settings), -v (verbose) - switches for libs: - -atc, -awpmd, -colvars, -cuda - -gpu, -meam, -poems, -qmmm, -reax - switches for build and makefile options: - -intel, -kokkos, -cc, -mpi, -fft, -jpg, -png :pre - -Using the "-h" switch with other switches and actions gives additional -info on all the other specified switches or actions. The "-h" can be -anywhere in the command-line and the other switches do not need their -arguments. E.g. type "Make.py -h -d -atc -intel" will print: - --d dir - dir = LAMMPS home dir - if -d not specified, working dir must be lammps/src :pre - --atc make=suffix lammps=suffix2 - all args are optional and can be in any order - make = use Makefile.suffix (def = g++) - lammps = use Makefile.lammps.suffix2 (def = EXTRAMAKE in makefile) :pre - --intel mode - mode = cpu or phi (def = cpu) - build Intel package for CPU or Xeon Phi :pre - -Note that Make.py never overwrites an existing Makefile.machine. -Instead, it creates src/MAKE/MINE/Makefile.auto, which you can save or -rename if desired. Likewise it creates an executable named -src/lmp_auto, which you can rename using the -o switch if desired. - -The most recently executed Make.py command is saved in -src/Make.py.last. You can use the "-r" switch (for redo) to re-invoke -the last command, or you can save a sequence of one or more Make.py -commands to a file and invoke the file of commands using "-r". You -can also label the commands in the file and invoke one or more of them -by name. - -A typical use of Make.py is to start with a valid Makefile.machine for -your system, that works for a vanilla LAMMPS build, i.e. when optional -packages are not installed. You can then use Make.py to add various -settings (FFT, JPG, PNG) to the Makefile.machine as well as change its -compiler and MPI options. You can also add additional packages to the -build, as well as build the needed supporting libraries. - -You can also use Make.py to create a new Makefile.machine from -scratch, using the "-m none" switch, if you also specify what compiler -and MPI options to use, via the "-cc" and "-mpi" switches. - -:line - -2.5 Building LAMMPS as a library :h4,link(start_5) +2.4 Building LAMMPS as a library :h4,link(start_4) LAMMPS can be built as either a static or shared library, which can then be called from another application or a scripting language. See @@ -1064,7 +928,7 @@ src/MAKE/Makefile.foo and perform the build in the directory Obj_shared_foo. This is so that each file can be compiled with the -fPIC flag which is required for inclusion in a shared library. The build will create the file liblammps_foo.so which another application -can link to dynamically. It will also create a soft link liblammps.so, +can link to dyamically. It will also create a soft link liblammps.so, which will point to the most recently built shared library. This is the file the Python wrapper loads by default. @@ -1150,7 +1014,7 @@ interface and how to extend it for your needs. :line -2.6 Running LAMMPS :h4,link(start_6) +2.5 Running LAMMPS :h4,link(start_5) By default, LAMMPS runs by reading commands from standard input. Thus if you run the LAMMPS executable by itself, e.g. @@ -1282,7 +1146,7 @@ more processors or setup a smaller problem. :line -2.7 Command-line options :h4,link(start_7) +2.6 Command-line options :h4,link(start_6) At run time, LAMMPS recognizes several optional command-line switches which may be used in any order. Either the full word or a one-or-two @@ -1416,8 +1280,8 @@ LAMMPS is compiled with CUDA=yes. numa Nm :pre This option is only relevant when using pthreads with hwloc support. -In this case Nm defines the number of NUMA regions (typically sockets) -on a node which will be utilized by a single MPI rank. By default Nm +In this case Nm defines the number of NUMA regions (typicaly sockets) +on a node which will be utilizied by a single MPI rank. By default Nm = 1. If this option is used the total number of worker-threads per MPI rank is threads*numa. Currently it is always almost better to assign at least one MPI rank per NUMA region, and leave numa set to @@ -1481,7 +1345,7 @@ replica runs on on one or a few processors. Note that with MPI installed on a machine (e.g. your desktop), you can run on more (virtual) processors than you have physical processors. -To run multiple independent simulations from one input script, using +To run multiple independent simulatoins from one input script, using multiple partitions, see "Section 6.4"_Section_howto.html#howto_4 of the manual. World- and universe-style "variables"_variable.html are useful in this context. @@ -1712,7 +1576,7 @@ negative numeric value. It is OK if the first value1 starts with a :line -2.8 LAMMPS screen output :h4,link(start_8) +2.7 LAMMPS screen output :h4,link(start_7) As LAMMPS reads an input script, it prints information to both the screen and a log file about significant actions it takes to setup a @@ -1760,7 +1624,7 @@ The first section provides a global loop timing summary. The {loop time} is the total wall time for the section. The {Performance} line is provided for convenience to help predicting the number of loop continuations required and for comparing performance with other, -similar MD codes. The {CPU use} line provides the CPU utilization per +similar MD codes. The {CPU use} line provides the CPU utilzation per MPI task; it should be close to 100% times the number of OpenMP threads (or 1 of no OpenMP). Lower numbers correspond to delays due to file I/O or insufficient thread utilization. @@ -1868,7 +1732,7 @@ communication, roughly 75% in the example above. :line -2.9 Tips for users of previous LAMMPS versions :h4,link(start_9) +2.8 Tips for users of previous LAMMPS versions :h4,link(start_8) The current C++ began with a complete rewrite of LAMMPS 2001, which was written in F90. Features of earlier versions of LAMMPS are listed diff --git a/doc/src/Section_tools.txt b/doc/src/Section_tools.txt index 03611c7cdb..d95c4f0cd4 100644 --- a/doc/src/Section_tools.txt +++ b/doc/src/Section_tools.txt @@ -369,15 +369,18 @@ supports it. It has its own WWW page at msi2lmp tool :h4,link(msi) -The msi2lmp sub-directory contains a tool for creating LAMMPS input -data files from BIOVIA's Materias Studio files (formerly Accelrys' +The msi2lmp sub-directory contains a tool for creating LAMMPS template +input and data files from BIOVIA's Materias Studio files (formerly Accelrys' Insight MD code, formerly MSI/Biosym and its Discover MD code). This tool was written by John Carpenter (Cray), Michael Peachey (Cray), and Steve Lustig (Dupont). Several people contributed changes to remove bugs and adapt its output to changes in LAMMPS. -See the README file for more information. +This tool has several known limitations and is no longer under active +development, so there are no changes except for the occasional bugfix. + +See the README file in the tools/msi2lmp folder for more information. :line diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt index 581c440bc3..d629828f12 100644 --- a/doc/src/accelerate_intel.txt +++ b/doc/src/accelerate_intel.txt @@ -69,8 +69,9 @@ not {hardware thread}. For Intel Xeon CPUs: Edit src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi as necessary. :ulb,l -If using {kspace_style pppm} in the input script, add "neigh_modify binsize 3" and "kspace_modify diff ad" to the input script for better -performance. :l +If using {kspace_style pppm} in the input script, add "neigh_modify binsize cutoff" and "kspace_modify diff ad" to the input script for better +performance. Cutoff should be roughly the neighbor list cutoff. By +default the binsize is half the neighbor list cutoff. :l "-pk intel 0 omp 2 -sf intel" added to LAMMPS command-line :l :ule diff --git a/doc/src/accelerate_kokkos.txt b/doc/src/accelerate_kokkos.txt index 3bfd4bf379..602c3191f6 100644 --- a/doc/src/accelerate_kokkos.txt +++ b/doc/src/accelerate_kokkos.txt @@ -415,15 +415,15 @@ For binding threads with the KOKKOS OMP option, use thread affinity environment variables to force binding. With OpenMP 3.1 (gcc 4.7 or later, intel 12 or later) setting the environment variable OMP_PROC_BIND=true should be sufficient. For binding threads with the -KOKKOS pthreads option, compile LAMMPS the KOKKOS HWLOC=yes option, as -discussed in "Section 2.3.4"_Sections_start.html#start_3_4 of the -manual. +KOKKOS pthreads option, compile LAMMPS the KOKKOS HWLOC=yes option +(see "this section"_Section_packages.html#KOKKOS of the manual for +details). [Running on GPUs:] Insure the -arch setting in the machine makefile you are using, -e.g. src/MAKE/Makefile.cuda, is correct for your GPU hardware/software -(see "this section"_Section_start.html#start_3_4 of the manual for +e.g. src/MAKE/Makefile.cuda, is correct for your GPU hardware/software. +(see "this section"_Section_packages.html#KOKKOS of the manual for details). The -np setting of the mpirun command should set the number of MPI diff --git a/doc/src/angle_sdk.txt b/doc/src/angle_sdk.txt index 785585f840..0cc535e543 100644 --- a/doc/src/angle_sdk.txt +++ b/doc/src/angle_sdk.txt @@ -46,7 +46,7 @@ from the pair_style. [Restrictions:] This angle style can only be used if LAMMPS was built with the -USER-CG-CMM package. See the "Making +USER-CGSDK package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info on packages. [Related commands:] diff --git a/doc/src/bond_oxdna.txt b/doc/src/bond_oxdna.txt index 6cdbbd3546..f9b35a167c 100644 --- a/doc/src/bond_oxdna.txt +++ b/doc/src/bond_oxdna.txt @@ -46,9 +46,7 @@ for excluded volume interaction {oxdna/excv}, stacking {oxdna/stk}, cross-stacki and coaxial stacking interaction {oxdna/coaxstk} as well as hydrogen-bonding interaction {oxdna/hbond} (see also documentation of "pair_style oxdna/excv"_pair_oxdna.html). For the oxDNA2 "(Snodin)"_#oxdna2 bond style the analogous pair styles and an additional Debye-Hueckel pair style {oxdna2/dh} have to be defined. - -The coefficients -in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model. +The coefficients in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model. Example input and data files for DNA duplexes can be found in examples/USER/cgdna/examples/oxDNA/ and /oxDNA2/. A simple python setup tool which creates single straight or helical DNA strands, diff --git a/doc/src/bonds.txt b/doc/src/bonds.txt index 3b50f6482f..169d56ecbe 100644 --- a/doc/src/bonds.txt +++ b/doc/src/bonds.txt @@ -16,7 +16,6 @@ Bond Styles :h1 bond_none bond_nonlinear bond_oxdna - bond_oxdna2 bond_quartic bond_table bond_zero diff --git a/doc/src/commands.txt b/doc/src/commands.txt index 2fdb69ea4d..7889ea5e7a 100644 --- a/doc/src/commands.txt +++ b/doc/src/commands.txt @@ -32,12 +32,12 @@ Commands :h1 dimension displace_atoms dump - dump_custom_vtk dump_h5md dump_image dump_modify dump_molfile - dump_nc + dump_netcdf + dump_vtk echo fix fix_modify diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt index e2df706473..f82df0d816 100644 --- a/doc/src/compute_sna_atom.txt +++ b/doc/src/compute_sna_atom.txt @@ -24,7 +24,7 @@ twojmax = band limit for bispectrum components (non-negative integer) :l R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l w_1, w_2,... = list of neighbor weights, one for each type :l zero or more keyword/value pairs may be appended :l -keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} :l +keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag}:l {diagonal} value = {0} or {1} or {2} or {3} {0} = all j1, j2, j <= twojmax, j2 <= j1 {1} = subset satisfying j1 == j2 @@ -36,7 +36,10 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} :l {1} = use switching function {bzeroflag} value = {0} or {1} {0} = do not subtract B0 - {1} = subtract B0 :pre + {1} = subtract B0 + {quadraticflag} value = {0} or {1} + {0} = do not generate quadratic terms + {1} = generate quadratic terms :pre :ule [Examples:] @@ -151,7 +154,7 @@ linear mapping from radial distance to polar angle {theta0} on the The argument {twojmax} and the keyword {diagonal} define which bispectrum components are generated. See section below on output for a detailed explanation of the number of bispectrum components and the -ordered in which they are listed +ordered in which they are listed. The keyword {switchflag} can be used to turn off the switching function. @@ -162,6 +165,14 @@ the calculated bispectrum components. This optional keyword is only available for compute {sna/atom}, as {snad/atom} and {snav/atom} are unaffected by the removal of constant terms. +The keyword {quadraticflag} determines whether or not the +quadratic analogs to the bispectrum quantities are generated. +These are formed by taking the outer product of the vector +of bispectrum components with itself. +See section below on output for a +detailed explanation of the number of quadratic terms and the +ordered in which they are listed. + NOTE: If you have a bonded system, then the settings of "special_bonds"_special_bonds.html command can remove pairwise interactions between atoms in the same bond, angle, or dihedral. This @@ -180,7 +191,7 @@ command that includes all pairs in the neighbor list. Compute {sna/atom} calculates a per-atom array, each column corresponding to a particular bispectrum component. The total number -of columns and the identities of the bispectrum component contained in +of columns and the identity of the bispectrum component contained in each column depend on the values of {twojmax} and {diagonal}, as described by the following piece of python code: @@ -213,6 +224,19 @@ block contains six sub-blocks corresponding to the {xx}, {yy}, {zz}, notation. Each of these sub-blocks contains one column for each bispectrum component, the same as for compute {sna/atom} +For example, if {K}=30 and ntypes=1, the number of columns in the per-atom +arrays generated by {sna/atom}, {snad/atom}, and {snav/atom} +are 30, 90, and 180, respectively. With {quadratic} value=1, +the numbers of columns are 930, 2790, and 5580, respectively. + +If the {quadratic} keyword value is set to 1, then additional +columns are appended to each per-atom array, corresponding to +a matrix of quantities that are products of two bispectrum components. If the +number of bispectrum components is {K}, then the number of matrix elements +is {K}^2. These are output in subblocks of {K}^2 columns, using the same +ordering of columns and sub-blocks as was used for the bispectrum +components. + These values can be accessed by any command that uses per-atom values from a compute as input. See "Section 6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output @@ -231,7 +255,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. [Default:] The optional keyword defaults are {diagonal} = 0, {rmin0} = 0, -{switchflag} = 1, {bzeroflag} = 0. +{switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0, :line diff --git a/doc/src/dihedral_charmm.txt b/doc/src/dihedral_charmm.txt index 9fe05a1148..918755ec38 100644 --- a/doc/src/dihedral_charmm.txt +++ b/doc/src/dihedral_charmm.txt @@ -10,25 +10,25 @@ dihedral_style charmm command :h3 dihedral_style charmm/intel command :h3 dihedral_style charmm/kk command :h3 dihedral_style charmm/omp command :h3 -dihedral_style charmmfsh command :h3 +dihedral_style charmmfsw command :h3 [Syntax:] dihedral_style style :pre -style = {charmm} or {charmmfsh} :ul +style = {charmm} or {charmmfsw} :ul [Examples:] dihedral_style charmm -dihedral_style charmmfsh +dihedral_style charmmfsw dihedral_coeff 1 0.2 1 180 1.0 dihedral_coeff 2 1.8 1 0 1.0 dihedral_coeff 1 3.1 2 180 0.5 :pre [Description:] -The {charmm} and {charmmfsh} dihedral styles use the potential +The {charmm} and {charmmfsw} dihedral styles use the potential :c,image(Eqs/dihedral_charmm.jpg) @@ -38,10 +38,12 @@ field (see comment on weighting factors below). See "(Cornell)"_#dihedral-Cornell for a description of the AMBER force field. -NOTE: The newer {charmmfsh} style was released in March 2017. We +NOTE: The newer {charmmfsw} style was released in March 2017. We recommend it be used instead of the older {charmm} style when running -a simulation with the CHARMM force field and Coulomb cutoffs, via the -"pair_style lj/charmmfsw/coul/charmmfsh"_pair_charmm.html command. +a simulation with the CHARMM force field, either with long-range +Coulombics or a Coulomb cutoff, via the "pair_style +lj/charmmfsw/coul/long"_pair_charmm.html and "pair_style +lj/charmmfsw/coul/charmmfsh"_pair_charmm.html commands respectively. Otherwise the older {charmm} style is fine to use. See the discussion below and more details on the "pair_style charmm"_pair_charmm.html doc page. @@ -86,17 +88,18 @@ default). Otherwise 1-4 non-bonded interactions in dihedrals will be computed twice. For simulations using the CHARMM force field with a Coulomb cutoff, -the difference between the {charmm} and {charmmfsh} styles is in the +the difference between the {charmm} and {charmmfsw} styles is in the computation of the 1-4 non-bond interactions, though only if the distance between the two atoms is within the switching region of the pairwise potential defined by the corresponding CHARMM pair style, i.e. within the outer cutoff specified for the pair style. The -{charmmfsh} style should only be used when using the "pair_style -lj/charmmfsw/coul/charmmfsh"_pair_charmm.html to make the Coulombic -pairwise calculations consistent. Use the {charmm} style with -long-range Coulombics or the older "pair_style -lj/charmm/coul/charmm"_pair_charmm.html command. See the discussion -on the "CHARMM pair_style"_pair_charmm.html doc page for details. +{charmmfsw} style should only be used when using the corresponding +"pair_style lj/charmmfsw/coul/charmmfsw"_pair_charmm.html or +"pair_style lj/charmmfsw/coul/long"_pair_charmm.html commands. Use +the {charmm} style with the older "pair_style"_pair_charmm.html +commands that have just "charmm" in their style name. See the +discussion on the "CHARMM pair_style"_pair_charmm.html doc page for +details. Note that for AMBER force fields, which use pair styles with "lj/cut", the special_bonds 1-4 scaling factor should be set to the AMBER @@ -104,7 +107,7 @@ defaults (1/2 and 5/6) and all the dihedral weighting factors (4th coeff above) must be set to 0.0. In this case, you can use any pair style you wish, since the dihedral does not need any Lennard-Jones parameter information and will not compute any 1-4 non-bonded -interactions. Likewise the {charmm} or {charmmfsh} styles are +interactions. Likewise the {charmm} or {charmmfsw} styles are identical in this case since no 1-4 non-bonded interactions are computed. diff --git a/doc/src/dihedral_spherical.txt b/doc/src/dihedral_spherical.txt index 3f888db01b..c71a319912 100644 --- a/doc/src/dihedral_spherical.txt +++ b/doc/src/dihedral_spherical.txt @@ -14,10 +14,10 @@ dihedral_style spherical :pre [Examples:] -dihedral_coeff 1 1 286.1 1 124 1 1 90.0 0 1 90.0 0 -dihedral_coeff 1 3 286.1 1 114 1 1 90 0 1 90.0 0 & - 17.3 0 0.0 0 1 158 1 0 0.0 0 & - 15.1 0 0.0 0 0 0.0 0 1 167.3 1 :pre +dihedral_coeff 1 1 286.1 1 124 1 1 90.0 0 1 90.0 0 +dihedral_coeff 1 3 69.3 1 93.9 1 1 90 0 1 90 0 & + 49.1 0 0.00 0 1 74.4 1 0 0.00 0 & + 25.2 0 0.00 0 0 0.00 0 1 48.1 1 [Description:] @@ -35,13 +35,14 @@ the dihedral interaction even if it requires adding additional terms to the expansion (as was done in the second example). A careful choice of parameters can prevent singularities that occur with traditional force-fields whenever theta1 or theta2 approach 0 or 180 degrees. + The last example above corresponds to an interaction with a single energy -minima located at phi=114, theta1=158, theta2=167.3 degrees, and it remains +minima located near phi=93.9, theta1=74.4, theta2=48.1 degrees, and it remains numerically stable at all angles (phi, theta1, theta2). In this example, -the coefficients 17.3, and 15.1 can be physically interpreted as the +the coefficients 49.1, and 25.2 can be physically interpreted as the harmonic spring constants for theta1 and theta2 around their minima. -The coefficient 286.1 is the harmonic spring constant for phi after -division by sin(158)*sin(167.3) (the minima positions for theta1 and theta2). +The coefficient 69.3 is the harmonic spring constant for phi after +division by sin(74.4)*sin(48.1) (the minima positions for theta1 and theta2). The following coefficients must be defined for each dihedral type via the "dihedral_coeff"_dihedral_coeff.html command as in the example above, or in diff --git a/doc/src/dump.txt b/doc/src/dump.txt index cb9a5ba741..69a00eb473 100644 --- a/doc/src/dump.txt +++ b/doc/src/dump.txt @@ -7,12 +7,12 @@ :line dump command :h3 -"dump custom/vtk"_dump_custom_vtk.html command :h3 +"dump vtk"_dump_vtk.html command :h3 "dump h5md"_dump_h5md.html command :h3 +"dump molfile"_dump_molfile.html command :h3 +"dump netcdf"_dump_netcdf.html command :h3 "dump image"_dump_image.html command :h3 "dump movie"_dump_image.html command :h3 -"dump molfile"_dump_molfile.html command :h3 -"dump nc"_dump_nc.html command :h3 [Syntax:] @@ -20,7 +20,7 @@ dump ID group-ID style N file args :pre ID = user-assigned name for the dump :ulb,l group-ID = ID of the group of atoms to be dumped :l -style = {atom} or {atom/gz} or {atom/mpiio} or {cfg} or {cfg/gz} or {cfg/mpiio} or {dcd} or {xtc} or {xyz} or {xyz/gz} or {xyz/mpiio} or {h5md} or {image} or {movie} or {molfile} or {local} or {custom} or {custom/gz} or {custom/mpiio} :l +style = {atom} or {atom/gz} or {atom/mpiio} or {cfg} or {cfg/gz} or {cfg/mpiio} or {custom} or {custom/gz} or {custom/mpiio} or {dcd} or {h5md} or {image} or or {local} or {molfile} or {movie} or {netcdf} or {netcdf/mpiio} or {vtk} or {xtc} or {xyz} or {xyz/gz} or {xyz/mpiio} :l N = dump every this many timesteps :l file = name of file to write dump info to :l args = list of arguments for a particular style :l @@ -30,33 +30,22 @@ args = list of arguments for a particular style :l {cfg} args = same as {custom} args, see below {cfg/gz} args = same as {custom} args, see below {cfg/mpiio} args = same as {custom} args, see below + {custom}, {custom/gz}, {custom/mpiio} args = see below {dcd} args = none + {h5md} args = discussed on "dump h5md"_dump_h5md.html doc page + {image} args = discussed on "dump image"_dump_image.html doc page + {local} args = see below + {molfile} args = discussed on "dump molfile"_dump_molfile.html doc page + {movie} args = discussed on "dump image"_dump_image.html doc page + {netcdf} args = discussed on "dump netcdf"_dump_netcdf.html doc page + {netcdf/mpiio} args = discussed on "dump netcdf"_dump_netcdf.html doc page + {vtk} args = same as {custom} args, see below, also "dump vtk"_dump_vtk.html doc page {xtc} args = none - {xyz} args = none :pre - {xyz/gz} args = none :pre + {xyz} args = none + {xyz/gz} args = none {xyz/mpiio} args = none :pre - {custom/vtk} args = similar to custom args below, discussed on "dump custom/vtk"_dump_custom_vtk.html doc page :pre - - {h5md} args = discussed on "dump h5md"_dump_h5md.html doc page :pre - - {image} args = discussed on "dump image"_dump_image.html doc page :pre - - {movie} args = discussed on "dump image"_dump_image.html doc page :pre - - {molfile} args = discussed on "dump molfile"_dump_molfile.html doc page - - {nc} args = discussed on "dump nc"_dump_nc.html doc page :pre - - {local} args = list of local attributes - possible attributes = index, c_ID, c_ID\[I\], f_ID, f_ID\[I\] - index = enumeration of local values - c_ID = local vector calculated by a compute with ID - c_ID\[I\] = Ith column of local array calculated by a compute with ID, I can include wildcard (see below) - f_ID = local vector calculated by a fix with ID - f_ID\[I\] = Ith column of local array calculated by a fix with ID, I can include wildcard (see below) :pre - - {custom} or {custom/gz} or {custom/mpiio} args = list of atom attributes +{custom} or {custom/gz} or {custom/mpiio} args = list of atom attributes :l possible attributes = id, mol, proc, procp1, type, element, mass, x, y, z, xs, ys, zs, xu, yu, zu, xsu, ysu, zsu, ix, iy, iz, @@ -94,6 +83,15 @@ args = list of arguments for a particular style :l v_name = per-atom vector calculated by an atom-style variable with name d_name = per-atom floating point vector with name, managed by fix property/atom i_name = per-atom integer vector with name, managed by fix property/atom :pre + +{local} args = list of local attributes :l + possible attributes = index, c_ID, c_ID\[I\], f_ID, f_ID\[I\] + index = enumeration of local values + c_ID = local vector calculated by a compute with ID + c_ID\[I\] = Ith column of local array calculated by a compute with ID, I can include wildcard (see below) + f_ID = local vector calculated by a fix with ID + f_ID\[I\] = Ith column of local array calculated by a fix with ID, I can include wildcard (see below) :pre + :ule [Examples:] diff --git a/doc/src/dump_custom_vtk.txt b/doc/src/dump_custom_vtk.txt deleted file mode 100644 index d4c16193d8..0000000000 --- a/doc/src/dump_custom_vtk.txt +++ /dev/null @@ -1,347 +0,0 @@ - "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c - -:link(lws,http://lammps.sandia.gov) -:link(ld,Manual.html) -:link(lc,Section_commands.html#comm) - -:line - -dump custom/vtk command :h3 - -[Syntax:] - -dump ID group-ID style N file args :pre - -ID = user-assigned name for the dump :ulb,l -group-ID = ID of the group of atoms to be dumped :l -style = {custom/vtk} :l -N = dump every this many timesteps :l -file = name of file to write dump info to :l -args = list of arguments for a particular style :l - {custom/vtk} args = list of atom attributes - possible attributes = id, mol, proc, procp1, type, element, mass, - x, y, z, xs, ys, zs, xu, yu, zu, - xsu, ysu, zsu, ix, iy, iz, - vx, vy, vz, fx, fy, fz, - q, mux, muy, muz, mu, - radius, diameter, omegax, omegay, omegaz, - angmomx, angmomy, angmomz, tqx, tqy, tqz, - c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :pre - - id = atom ID - mol = molecule ID - proc = ID of processor that owns atom - procp1 = ID+1 of processor that owns atom - type = atom type - element = name of atom element, as defined by "dump_modify"_dump_modify.html command - mass = atom mass - x,y,z = unscaled atom coordinates - xs,ys,zs = scaled atom coordinates - xu,yu,zu = unwrapped atom coordinates - xsu,ysu,zsu = scaled unwrapped atom coordinates - ix,iy,iz = box image that the atom is in - vx,vy,vz = atom velocities - fx,fy,fz = forces on atoms - q = atom charge - mux,muy,muz = orientation of dipole moment of atom - mu = magnitude of dipole moment of atom - radius,diameter = radius,diameter of spherical particle - omegax,omegay,omegaz = angular velocity of spherical particle - angmomx,angmomy,angmomz = angular momentum of aspherical particle - tqx,tqy,tqz = torque on finite-size particles - c_ID = per-atom vector calculated by a compute with ID - c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID, I can include wildcard (see below) - f_ID = per-atom vector calculated by a fix with ID - f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID, I can include wildcard (see below) - v_name = per-atom vector calculated by an atom-style variable with name - d_name = per-atom floating point vector with name, managed by fix property/atom - i_name = per-atom integer vector with name, managed by fix property/atom :pre -:ule - -[Examples:] - -dump dmpvtk all custom/vtk 100 dump*.myforce.vtk id type vx fx -dump dmpvtp flow custom/vtk 100 dump*.%.displace.vtp id type c_myD\[1\] c_myD\[2\] c_myD\[3\] v_ke :pre - -The style {custom/vtk} is similar to the "custom"_dump.html style but -uses the VTK library to write data to VTK simple legacy or XML format -depending on the filename extension specified. This can be either -{*.vtk} for the legacy format or {*.vtp} and {*.vtu}, respectively, -for the XML format; see the "VTK -homepage"_http://www.vtk.org/VTK/img/file-formats.pdf for a detailed -description of these formats. Since this naming convention conflicts -with the way binary output is usually specified (see below), -"dump_modify binary"_dump_modify.html allows to set the binary -flag for this dump style explicitly. - -[Description:] - -Dump a snapshot of atom quantities to one or more files every N -timesteps in a format readable by the "VTK visualization -toolkit"_http://www.vtk.org or other visualization tools that use it, -e.g. "ParaView"_http://www.paraview.org. The timesteps on which dump -output is written can also be controlled by a variable; see the -"dump_modify every"_dump_modify.html command for details. - -Only information for atoms in the specified group is dumped. The -"dump_modify thresh and region"_dump_modify.html commands can also -alter what atoms are included; see details below. - -As described below, special characters ("*", "%") in the filename -determine the kind of output. - -IMPORTANT NOTE: Because periodic boundary conditions are enforced only -on timesteps when neighbor lists are rebuilt, the coordinates of an -atom written to a dump file may be slightly outside the simulation -box. - -IMPORTANT NOTE: Unless the "dump_modify sort"_dump_modify.html -option is invoked, the lines of atom information written to dump files -will be in an indeterminate order for each snapshot. This is even -true when running on a single processor, if the "atom_modify -sort"_atom_modify.html option is on, which it is by default. In this -case atoms are re-ordered periodically during a simulation, due to -spatial sorting. It is also true when running in parallel, because -data for a single snapshot is collected from multiple processors, each -of which owns a subset of the atoms. - -For the {custom/vtk} style, sorting is off by default. See the -"dump_modify"_dump_modify.html doc page for details. - -:line - -The dimensions of the simulation box are written to a separate file -for each snapshot (either in legacy VTK or XML format depending on -the format of the main dump file) with the suffix {_boundingBox} -appended to the given dump filename. - -For an orthogonal simulation box this information is saved as a -rectilinear grid (legacy .vtk or .vtr XML format). - -Triclinic simulation boxes (non-orthogonal) are saved as -hexahedrons in either legacy .vtk or .vtu XML format. - -Style {custom/vtk} allows you to specify a list of atom attributes -to be written to the dump file for each atom. Possible attributes -are listed above. In contrast to the {custom} style, the attributes -are rearranged to ensure correct ordering of vector components -(except for computes and fixes - these have to be given in the right -order) and duplicate entries are removed. - -You cannot specify a quantity that is not defined for a particular -simulation - such as {q} for atom style {bond}, since that atom style -doesn't assign charges. Dumps occur at the very end of a timestep, -so atom attributes will include effects due to fixes that are applied -during the timestep. An explanation of the possible dump custom/vtk attributes -is given below. Since position data is required to write VTK files "x y z" -do not have to be specified explicitly. - -The VTK format uses a single snapshot of the system per file, thus -a wildcard "*" must be included in the filename, as discussed below. -Otherwise the dump files will get overwritten with the new snapshot -each time. - -:line - -Dumps are performed on timesteps that are a multiple of N (including -timestep 0) and on the last timestep of a minimization if the -minimization converges. Note that this means a dump will not be -performed on the initial timestep after the dump command is invoked, -if the current timestep is not a multiple of N. This behavior can be -changed via the "dump_modify first"_dump_modify.html command, which -can also be useful if the dump command is invoked after a minimization -ended on an arbitrary timestep. N can be changed between runs by -using the "dump_modify every"_dump_modify.html command. -The "dump_modify every"_dump_modify.html command -also allows a variable to be used to determine the sequence of -timesteps on which dump files are written. In this mode a dump on the -first timestep of a run will also not be written unless the -"dump_modify first"_dump_modify.html command is used. - -Dump filenames can contain two wildcard characters. If a "*" -character appears in the filename, then one file per snapshot is -written and the "*" character is replaced with the timestep value. -For example, tmp.dump*.vtk becomes tmp.dump0.vtk, tmp.dump10000.vtk, -tmp.dump20000.vtk, etc. Note that the "dump_modify pad"_dump_modify.html -command can be used to insure all timestep numbers are the same length -(e.g. 00010), which can make it easier to read a series of dump files -in order with some post-processing tools. - -If a "%" character appears in the filename, then each of P processors -writes a portion of the dump file, and the "%" character is replaced -with the processor ID from 0 to P-1 preceded by an underscore character. -For example, tmp.dump%.vtp becomes tmp.dump_0.vtp, tmp.dump_1.vtp, ... -tmp.dump_P-1.vtp, etc. This creates smaller files and can be a fast -mode of output on parallel machines that support parallel I/O for output. - -By default, P = the number of processors meaning one file per -processor, but P can be set to a smaller value via the {nfile} or -{fileper} keywords of the "dump_modify"_dump_modify.html command. -These options can be the most efficient way of writing out dump files -when running on large numbers of processors. - -For the legacy VTK format "%" is ignored and P = 1, i.e., only -processor 0 does write files. - -Note that using the "*" and "%" characters together can produce a -large number of small dump files! - -If {dump_modify binary} is used, the dump file (or files, if "*" or -"%" is also used) is written in binary format. A binary dump file -will be about the same size as a text version, but will typically -write out much faster. - -:line - -This section explains the atom attributes that can be specified as -part of the {custom/vtk} style. - -The {id}, {mol}, {proc}, {procp1}, {type}, {element}, {mass}, {vx}, -{vy}, {vz}, {fx}, {fy}, {fz}, {q} attributes are self-explanatory. - -{Id} is the atom ID. {Mol} is the molecule ID, included in the data -file for molecular systems. {Proc} is the ID of the processor (0 to -Nprocs-1) that currently owns the atom. {Procp1} is the proc ID+1, -which can be convenient in place of a {type} attribute (1 to Ntypes) -for coloring atoms in a visualization program. {Type} is the atom -type (1 to Ntypes). {Element} is typically the chemical name of an -element, which you must assign to each type via the "dump_modify -element"_dump_modify.html command. More generally, it can be any -string you wish to associated with an atom type. {Mass} is the atom -mass. {Vx}, {vy}, {vz}, {fx}, {fy}, {fz}, and {q} are components of -atom velocity and force and atomic charge. - -There are several options for outputting atom coordinates. The {x}, -{y}, {z} attributes write atom coordinates "unscaled", in the -appropriate distance "units"_units.html (Angstroms, sigma, etc). Use -{xs}, {ys}, {zs} if you want the coordinates "scaled" to the box size, -so that each value is 0.0 to 1.0. If the simulation box is triclinic -(tilted), then all atom coords will still be between 0.0 and 1.0. -I.e. actual unscaled (x,y,z) = xs*A + ys*B + zs*C, where (A,B,C) are -the non-orthogonal vectors of the simulation box edges, as discussed -in "Section 6.12"_Section_howto.html#howto_12. - -Use {xu}, {yu}, {zu} if you want the coordinates "unwrapped" by the -image flags for each atom. Unwrapped means that if the atom has -passed thru a periodic boundary one or more times, the value is -printed for what the coordinate would be if it had not been wrapped -back into the periodic box. Note that using {xu}, {yu}, {zu} means -that the coordinate values may be far outside the box bounds printed -with the snapshot. Using {xsu}, {ysu}, {zsu} is similar to using -{xu}, {yu}, {zu}, except that the unwrapped coordinates are scaled by -the box size. Atoms that have passed through a periodic boundary will -have the corresponding coordinate increased or decreased by 1.0. - -The image flags can be printed directly using the {ix}, {iy}, {iz} -attributes. For periodic dimensions, they specify which image of the -simulation box the atom is considered to be in. An image of 0 means -it is inside the box as defined. A value of 2 means add 2 box lengths -to get the true value. A value of -1 means subtract 1 box length to -get the true value. LAMMPS updates these flags as atoms cross -periodic boundaries during the simulation. - -The {mux}, {muy}, {muz} attributes are specific to dipolar systems -defined with an atom style of {dipole}. They give the orientation of -the atom's point dipole moment. The {mu} attribute gives the -magnitude of the atom's dipole moment. - -The {radius} and {diameter} attributes are specific to spherical -particles that have a finite size, such as those defined with an atom -style of {sphere}. - -The {omegax}, {omegay}, and {omegaz} attributes are specific to -finite-size spherical particles that have an angular velocity. Only -certain atom styles, such as {sphere} define this quantity. - -The {angmomx}, {angmomy}, and {angmomz} attributes are specific to -finite-size aspherical particles that have an angular momentum. Only -the {ellipsoid} atom style defines this quantity. - -The {tqx}, {tqy}, {tqz} attributes are for finite-size particles that -can sustain a rotational torque due to interactions with other -particles. - -The {c_ID} and {c_ID\[I\]} attributes allow per-atom vectors or arrays -calculated by a "compute"_compute.html to be output. The ID in the -attribute should be replaced by the actual ID of the compute that has -been defined previously in the input script. See the -"compute"_compute.html command for details. There are computes for -calculating the per-atom energy, stress, centro-symmetry parameter, -and coordination number of individual atoms. - -Note that computes which calculate global or local quantities, as -opposed to per-atom quantities, cannot be output in a dump custom/vtk -command. Instead, global quantities can be output by the -"thermo_style custom"_thermo_style.html command, and local quantities -can be output by the dump local command. - -If {c_ID} is used as a attribute, then the per-atom vector calculated -by the compute is printed. If {c_ID\[I\]} is used, then I must be in -the range from 1-M, which will print the Ith column of the per-atom -array with M columns calculated by the compute. See the discussion -above for how I can be specified with a wildcard asterisk to -effectively specify multiple values. - -The {f_ID} and {f_ID\[I\]} attributes allow vector or array per-atom -quantities calculated by a "fix"_fix.html to be output. The ID in the -attribute should be replaced by the actual ID of the fix that has been -defined previously in the input script. The "fix -ave/atom"_fix_ave_atom.html command is one that calculates per-atom -quantities. Since it can time-average per-atom quantities produced by -any "compute"_compute.html, "fix"_fix.html, or atom-style -"variable"_variable.html, this allows those time-averaged results to -be written to a dump file. - -If {f_ID} is used as a attribute, then the per-atom vector calculated -by the fix is printed. If {f_ID\[I\]} is used, then I must be in the -range from 1-M, which will print the Ith column of the per-atom array -with M columns calculated by the fix. See the discussion above for -how I can be specified with a wildcard asterisk to effectively specify -multiple values. - -The {v_name} attribute allows per-atom vectors calculated by a -"variable"_variable.html to be output. The name in the attribute -should be replaced by the actual name of the variable that has been -defined previously in the input script. Only an atom-style variable -can be referenced, since it is the only style that generates per-atom -values. Variables of style {atom} can reference individual atom -attributes, per-atom atom attributes, thermodynamic keywords, or -invoke other computes, fixes, or variables when they are evaluated, so -this is a very general means of creating quantities to output to a -dump file. - -The {d_name} and {i_name} attributes allow to output custom per atom -floating point or integer properties that are managed by -"fix property/atom"_fix_property_atom.html. - -See "Section 10"_Section_modify.html of the manual for information -on how to add new compute and fix styles to LAMMPS to calculate -per-atom quantities which could then be output into dump files. - -:line - -[Restrictions:] - -The {custom/vtk} style does not support writing of gzipped dump files. - -The {custom/vtk} dump style is part of the USER-VTK package. It is -only enabled if LAMMPS was built with that package. See the "Making -LAMMPS"_Section_start.html#start_3 section for more info. - -To use this dump style, you also must link to the VTK library. See -the info in lib/vtk/README and insure the Makefile.lammps file in that -directory is appropriate for your machine. - -The {custom/vtk} dump style neither supports buffering nor custom -format strings. - -[Related commands:] - -"dump"_dump.html, "dump image"_dump_image.html, -"dump_modify"_dump_modify.html, "undump"_undump.html - -[Default:] - -By default, files are written in ASCII format. If the file extension -is not one of .vtk, .vtp or .vtu, the legacy VTK file format is used. - diff --git a/doc/src/dump_h5md.txt b/doc/src/dump_h5md.txt index d797e633e6..93c87d85b7 100644 --- a/doc/src/dump_h5md.txt +++ b/doc/src/dump_h5md.txt @@ -17,9 +17,7 @@ group-ID = ID of the group of atoms to be imaged :l h5md = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l N = dump every this many timesteps :l file.h5 = name of file to write to :l -args = list of data elements to dump, with their dump "subintervals". -At least one element must be given and image may only be present if -position is specified first. :l +args = list of data elements to dump, with their dump "subintervals" position options image velocity options @@ -29,15 +27,17 @@ position is specified first. :l box value = {yes} or {no} create_group value = {yes} or {no} author value = quoted string :pre +:ule -For the elements {position}, {velocity}, {force} and {species}, one -may specify a sub-interval to write the data only every N_element +Note that at least one element must be specified and image may only be +present if position is specified first. + +For the elements {position}, {velocity}, {force} and {species}, a +sub-interval may be specified to write the data only every N_element iterations of the dump (i.e. every N*N_element time steps). This is -specified by the option +specified by this option directly following the element declaration: - every N_element :pre - -that follows directly the element declaration. +every N_element :pre :ule diff --git a/doc/src/dump_nc.txt b/doc/src/dump_nc.txt deleted file mode 100644 index 0b81ee6a32..0000000000 --- a/doc/src/dump_nc.txt +++ /dev/null @@ -1,66 +0,0 @@ -"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c - -:link(lws,http://lammps.sandia.gov) -:link(ld,Manual.html) -:link(lc,Section_commands.html#comm) - -:line - -dump nc command :h3 -dump nc/mpiio command :h3 - -[Syntax:] - -dump ID group-ID nc N file.nc args -dump ID group-ID nc/mpiio N file.nc args :pre - -ID = user-assigned name for the dump :ulb,l -group-ID = ID of the group of atoms to be imaged :l -{nc} or {nc/mpiio} = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l -N = dump every this many timesteps :l -file.nc = name of file to write to :l -args = list of per atom data elements to dump, same as for the 'custom' dump style. :l,ule - -[Examples:] - -dump 1 all nc 100 traj.nc type x y z vx vy vz -dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press :pre - -dump 1 all nc/mpiio 1000 traj.nc id type x y z :pre - -[Description:] - -Dump a snapshot of atom coordinates every N timesteps in Amber-style -NetCDF file format. NetCDF files are binary, portable and -self-describing. This dump style will write only one file on the root -node. The dump style {nc} uses the "standard NetCDF -library"_netcdf-home all data is collected on one processor and then -written to the dump file. Dump style {nc/mpiio} used the "parallel -NetCDF library"_pnetcdf-home and MPI-IO; it has better performance on -a larger number of processors. Note that 'nc' outputs all atoms sorted -by atom tag while 'nc/mpiio' outputs in order of the MPI rank. - -In addition to per-atom data, also global (i.e. not per atom, but per -frame) quantities can be included in the dump file. This can be -variables, output from computes or fixes data prefixed with v_, c_ and -f_, respectively. These properties are included via -"dump_modify"_dump_modify.html {global}. - -:link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/) -:link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/) - -:line - -[Restrictions:] - -The {nc} and {nc/mpiio} dump styles are part of the USER-NC-DUMP -package. It is only enabled if LAMMPS was built with that -package. See the "Making LAMMPS"_Section_start.html#start_3 section -for more info. - -:line - -[Related commands:] - -"dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html - diff --git a/doc/src/dump_netcdf.txt b/doc/src/dump_netcdf.txt new file mode 100644 index 0000000000..4e82656698 --- /dev/null +++ b/doc/src/dump_netcdf.txt @@ -0,0 +1,82 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +dump netcdf command :h3 +dump netcdf/mpiio command :h3 + +[Syntax:] + +dump ID group-ID netcdf N file args +dump ID group-ID netcdf/mpiio N file args :pre + +ID = user-assigned name for the dump :ulb,l +group-ID = ID of the group of atoms to be imaged :l +{netcdf} or {netcdf/mpiio} = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l +N = dump every this many timesteps :l +file = name of file to write dump info to :l +args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule + +[Examples:] + +dump 1 all netcdf 100 traj.nc type x y z vx vy vz +dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press +dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre + +[Description:] + +Dump a snapshot of atom coordinates every N timesteps in Amber-style +NetCDF file format. NetCDF files are binary, portable and +self-describing. This dump style will write only one file on the root +node. The dump style {netcdf} uses the "standard NetCDF +library"_netcdf-home. All data is collected on one processor and then +written to the dump file. Dump style {netcdf/mpiio} uses the +"parallel NetCDF library"_pnetcdf-home and MPI-IO to write to the dump +file in parallel; it has better performance on a larger number of +processors. Note that style {netcdf} outputs all atoms sorted by atom +tag while style {netcdf/mpiio} outputs atoms in order of their MPI +rank. + +NetCDF files can be directly visualized via the following tools: + +Ovito (http://www.ovito.org/). Ovito supports the AMBER convention and +all of the above extensions. :ule,b + +VMD (http://www.ks.uiuc.edu/Research/vmd/). :l + +AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye +contains a NetCDF reader that is not present in the standard +distribution of AtomEye. :l,ule + +In addition to per-atom data, global data can be included in the dump +file, which are the kinds of values output by the +"thermo_style"_thermo_style.html command . See "Section howto +6.15"_Section_howto.html#howto_15 for an explanation of per-atom +versus global data. The global output written into the dump file can +be from computes, fixes, or variables, by prefixing the compute/fix ID +or variable name with "c_" or "f_" or "v_" respectively, as in the +example above. These global values are specified via the "dump_modify +global"_dump_modify.html command. + +:link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/) +:link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/) + +:line + +[Restrictions:] + +The {netcdf} and {netcdf/mpiio} dump styles are part of the +USER-NETCDF package. They are only enabled if LAMMPS was built with +that package. See the "Making LAMMPS"_Section_start.html#start_3 +section for more info. + +:line + +[Related commands:] + +"dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html + diff --git a/doc/src/dump_vtk.txt b/doc/src/dump_vtk.txt new file mode 100644 index 0000000000..21502e7f49 --- /dev/null +++ b/doc/src/dump_vtk.txt @@ -0,0 +1,179 @@ + "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +dump vtk command :h3 + +[Syntax:] + +dump ID group-ID vtk N file args :pre + +ID = user-assigned name for the dump +group-ID = ID of the group of atoms to be dumped +vtk = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) +N = dump every this many timesteps +file = name of file to write dump info to +args = same as arguments for "dump_style custom"_dump.html :ul + +[Examples:] + +dump dmpvtk all vtk 100 dump*.myforce.vtk id type vx fx +dump dmpvtp flow vtk 100 dump*.%.displace.vtp id type c_myD\[1\] c_myD\[2\] c_myD\[3\] v_ke :pre + +[Description:] + +Dump a snapshot of atom quantities to one or more files every N +timesteps in a format readable by the "VTK visualization +toolkit"_http://www.vtk.org or other visualization tools that use it, +e.g. "ParaView"_http://www.paraview.org. The timesteps on which dump +output is written can also be controlled by a variable; see the +"dump_modify every"_dump_modify.html command for details. + +This dump style is similar to "dump_style custom"_dump.html but uses +the VTK library to write data to VTK simple legacy or XML format +depending on the filename extension specified for the dump file. This +can be either {*.vtk} for the legacy format or {*.vtp} and {*.vtu}, +respectively, for XML format; see the "VTK +homepage"_http://www.vtk.org/VTK/img/file-formats.pdf for a detailed +description of these formats. Since this naming convention conflicts +with the way binary output is usually specified (see below), the +"dump_modify binary"_dump_modify.html command allows setting of a +binary option for this dump style explicitly. + +Only information for atoms in the specified group is dumped. The +"dump_modify thresh and region"_dump_modify.html commands can also +alter what atoms are included; see details below. + +As described below, special characters ("*", "%") in the filename +determine the kind of output. + +IMPORTANT NOTE: Because periodic boundary conditions are enforced only +on timesteps when neighbor lists are rebuilt, the coordinates of an +atom written to a dump file may be slightly outside the simulation +box. + +IMPORTANT NOTE: Unless the "dump_modify sort"_dump_modify.html option +is invoked, the lines of atom information written to dump files will +be in an indeterminate order for each snapshot. This is even true +when running on a single processor, if the "atom_modify +sort"_atom_modify.html option is on, which it is by default. In this +case atoms are re-ordered periodically during a simulation, due to +spatial sorting. It is also true when running in parallel, because +data for a single snapshot is collected from multiple processors, each +of which owns a subset of the atoms. + +For the {vtk} style, sorting is off by default. See the +"dump_modify"_dump_modify.html doc page for details. + +:line + +The dimensions of the simulation box are written to a separate file +for each snapshot (either in legacy VTK or XML format depending on the +format of the main dump file) with the suffix {_boundingBox} appended +to the given dump filename. + +For an orthogonal simulation box this information is saved as a +rectilinear grid (legacy .vtk or .vtr XML format). + +Triclinic simulation boxes (non-orthogonal) are saved as +hexahedrons in either legacy .vtk or .vtu XML format. + +Style {vtk} allows you to specify a list of atom attributes to be +written to the dump file for each atom. The list of possible attributes +is the same as for the "dump_style custom"_dump.html command; see +its doc page for a listing and an explanation of each attribute. + +NOTE: Since position data is required to write VTK files the atom +attributes "x y z" do not have to be specified explicitly; they will +be included in the dump file regardless. Also, in contrast to the +{custom} style, the specified {vtk} attributes are rearranged to +ensure correct ordering of vector components (except for computes and +fixes - these have to be given in the right order) and duplicate +entries are removed. + +The VTK format uses a single snapshot of the system per file, thus +a wildcard "*" must be included in the filename, as discussed below. +Otherwise the dump files will get overwritten with the new snapshot +each time. + +:line + +Dumps are performed on timesteps that are a multiple of N (including +timestep 0) and on the last timestep of a minimization if the +minimization converges. Note that this means a dump will not be +performed on the initial timestep after the dump command is invoked, +if the current timestep is not a multiple of N. This behavior can be +changed via the "dump_modify first"_dump_modify.html command, which +can also be useful if the dump command is invoked after a minimization +ended on an arbitrary timestep. N can be changed between runs by +using the "dump_modify every"_dump_modify.html command. +The "dump_modify every"_dump_modify.html command +also allows a variable to be used to determine the sequence of +timesteps on which dump files are written. In this mode a dump on the +first timestep of a run will also not be written unless the +"dump_modify first"_dump_modify.html command is used. + +Dump filenames can contain two wildcard characters. If a "*" +character appears in the filename, then one file per snapshot is +written and the "*" character is replaced with the timestep value. +For example, tmp.dump*.vtk becomes tmp.dump0.vtk, tmp.dump10000.vtk, +tmp.dump20000.vtk, etc. Note that the "dump_modify pad"_dump_modify.html +command can be used to insure all timestep numbers are the same length +(e.g. 00010), which can make it easier to read a series of dump files +in order with some post-processing tools. + +If a "%" character appears in the filename, then each of P processors +writes a portion of the dump file, and the "%" character is replaced +with the processor ID from 0 to P-1 preceded by an underscore character. +For example, tmp.dump%.vtp becomes tmp.dump_0.vtp, tmp.dump_1.vtp, ... +tmp.dump_P-1.vtp, etc. This creates smaller files and can be a fast +mode of output on parallel machines that support parallel I/O for output. + +By default, P = the number of processors meaning one file per +processor, but P can be set to a smaller value via the {nfile} or +{fileper} keywords of the "dump_modify"_dump_modify.html command. +These options can be the most efficient way of writing out dump files +when running on large numbers of processors. + +For the legacy VTK format "%" is ignored and P = 1, i.e., only +processor 0 does write files. + +Note that using the "*" and "%" characters together can produce a +large number of small dump files! + +If {dump_modify binary} is used, the dump file (or files, if "*" or +"%" is also used) is written in binary format. A binary dump file +will be about the same size as a text version, but will typically +write out much faster. + +:line + +[Restrictions:] + +The {vtk} style does not support writing of gzipped dump files. + +The {vtk} dump style is part of the USER-VTK package. It is +only enabled if LAMMPS was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +To use this dump style, you also must link to the VTK library. See +the info in lib/vtk/README and insure the Makefile.lammps file in that +directory is appropriate for your machine. + +The {vtk} dump style supports neither buffering or custom format +strings. + +[Related commands:] + +"dump"_dump.html, "dump image"_dump_image.html, +"dump_modify"_dump_modify.html, "undump"_undump.html + +[Default:] + +By default, files are written in ASCII format. If the file extension +is not one of .vtk, .vtp or .vtu, the legacy VTK file format is used. + diff --git a/doc/src/fix_adapt.txt b/doc/src/fix_adapt.txt index a35357a7ec..d7c32bef3d 100644 --- a/doc/src/fix_adapt.txt +++ b/doc/src/fix_adapt.txt @@ -22,6 +22,11 @@ attribute = {pair} or {kspace} or {atom} :l pparam = parameter to adapt over time I,J = type pair(s) to set parameter for v_name = variable with name that calculates value of pparam + {bond} args = bstyle bparam I v_name + bstyle = bond style name, e.g. harmonic + bparam = parameter to adapt over time + I = type bond to set parameter for + v_name = variable with name that calculates value of bparam {kspace} arg = v_name v_name = variable with name that calculates scale factor on K-space terms {atom} args = aparam v_name @@ -42,7 +47,10 @@ keyword = {scale} or {reset} :l fix 1 all adapt 1 pair soft a 1 1 v_prefactor fix 1 all adapt 1 pair soft a 2* 3 v_prefactor fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut scale 3 3 v_scale2 scale yes reset yes -fix 1 all adapt 10 atom diameter v_size :pre +fix 1 all adapt 10 atom diameter v_size + +variable ramp_up equal "ramp(0.01,0.5)" +fix stretch all adapt 1 bond harmonic r0 1 v_ramp_up :pre [Description:] @@ -192,6 +200,19 @@ fix 1 all adapt 1 pair soft a * * v_prefactor :pre :line +The {bond} keyword uses the specified variable to change the value of +a bond coefficient over time, very similar to how the {pair} keyword +operates. The only difference is that now a bond coefficient for a +given bond type is adapted. + +Currently {bond} does not support bond_style hybrid nor bond_style +hybrid/overlay as bond styles. The only bonds that currently are +working with fix_adapt are + +"harmonic"_bond_harmonic.html: k,r0: type bonds :tb(c=3,s=:) + +:line + The {kspace} keyword used the specified variable as a scale factor on the energy, forces, virial calculated by whatever K-Space solver is defined by the "kspace_style"_kspace_style.html command. If the diff --git a/doc/src/fix_box_relax.txt b/doc/src/fix_box_relax.txt index 83e5a82652..54decd6282 100644 --- a/doc/src/fix_box_relax.txt +++ b/doc/src/fix_box_relax.txt @@ -245,8 +245,8 @@ appear the system is converging to your specified pressure. The solution for this is to either (a) zero the velocities of all atoms before performing the minimization, or (b) make sure you are monitoring the pressure without its kinetic component. The latter can -be done by outputting the pressure from the fix this command creates -(see below) or a pressure fix you define yourself. +be done by outputting the pressure from the pressure compute this +command creates (see below) or a pressure compute you define yourself. NOTE: Because pressure is often a very sensitive function of volume, it can be difficult for the minimizer to equilibrate the system the @@ -308,7 +308,7 @@ thermo_modify command (or in two separate commands), then the order in which the keywords are specified is important. Note that a "pressure compute"_compute_pressure.html defines its own temperature compute as an argument when it is specified. The {temp} keyword will override -this (for the pressure compute being used by fix npt), but only if the +this (for the pressure compute being used by fix box/relax), but only if the {temp} keyword comes after the {press} keyword. If the {temp} keyword comes before the {press} keyword, then the new pressure compute specified by the {press} keyword will be unaffected by the {temp} @@ -316,18 +316,16 @@ setting. This fix computes a global scalar which can be accessed by various "output commands"_Section_howto.html#howto_15. The scalar is the -pressure-volume energy, plus the strain energy, if it exists. - -This fix computes a global scalar which can be accessed by various -"output commands"_Section_howto.html#howto_15. The scalar is given -by the energy expression shown above. The energy values reported -at the end of a minimization run under "Minimization stats" include -this energy, and so differ from what LAMMPS normally reports as -potential energy. This fix does not support the -"fix_modify"_fix_modify.html {energy} option, -because that would result in double-counting of the fix energy in the -minimization energy. Instead, the fix energy can be explicitly -added to the potential energy using one of these two variants: +pressure-volume energy, plus the strain energy, if it exists, +as described above. +The energy values reported at the +end of a minimization run under "Minimization stats" include this +energy, and so differ from what LAMMPS normally reports as potential +energy. This fix does not support the "fix_modify"_fix_modify.html +{energy} option, because that would result in double-counting of the +fix energy in the minimization energy. Instead, the fix energy can be +explicitly added to the potential energy using one of these two +variants: variable emin equal pe+f_1 :pre diff --git a/doc/src/fix_cmap.txt b/doc/src/fix_cmap.txt index 5fcac589be..2b14a20c1d 100644 --- a/doc/src/fix_cmap.txt +++ b/doc/src/fix_cmap.txt @@ -87,8 +87,11 @@ the note below about how to include the CMAP energy when performing an [Restart, fix_modify, output, run start/stop, minimize info:] -No information about this fix is written to "binary restart -files"_restart.html. +This fix writes the list of CMAP crossterms to "binary restart +files"_restart.html. See the "read_restart"_read_restart.html command +for info on how to re-specify a fix in an input script that reads a +restart file, so that the operation of the fix continues in an +uninterrupted fashion. The "fix_modify"_fix_modify.html {energy} option is supported by this fix to add the potential "energy" of the CMAP interactions system's diff --git a/doc/src/fix_gcmc.txt b/doc/src/fix_gcmc.txt index 53973cdfb8..7ac607a2f1 100644 --- a/doc/src/fix_gcmc.txt +++ b/doc/src/fix_gcmc.txt @@ -317,7 +317,7 @@ solution is to start a new simulation after the equilibrium density has been reached. With some pair_styles, such as "Buckingham"_pair_buck.html, -"Born-Mayer-Huggins"_pair_born.html and "ReaxFF"_pair_reax_c.html, two +"Born-Mayer-Huggins"_pair_born.html and "ReaxFF"_pair_reaxc.html, two atoms placed close to each other may have an arbitrary large, negative potential energy due to the functional form of the potential. While these unphysical configurations are inaccessible to typical dynamical diff --git a/doc/src/fix_gle.txt b/doc/src/fix_gle.txt index ca7625e2d0..b8d3cc9b34 100644 --- a/doc/src/fix_gle.txt +++ b/doc/src/fix_gle.txt @@ -67,9 +67,10 @@ target value as the {Tstart} and {Tstop} arguments, so that the diffusion matrix that gives canonical sampling for a given A is computed automatically. However, the GLE framework also allow for non-equilibrium sampling, that can be used for instance to model inexpensively zero-point energy -effects "(Ceriotti2)"_#Ceriotti2. This is achieved specifying the -{noneq} keyword followed by the name of the file that contains the -static covariance matrix for the non-equilibrium dynamics. +effects "(Ceriotti2)"_#Ceriotti2. This is achieved specifying the {noneq} + keyword followed by the name of the file that contains the static covariance +matrix for the non-equilibrium dynamics. Please note, that the covariance +matrix is expected to be given in [temperature units]. Since integrating GLE dynamics can be costly when used together with simple potentials, one can use the {every} optional keyword to @@ -148,7 +149,7 @@ dpd/tstat"_pair_dpd.html, "fix gld"_fix_gld.html 1170-80 (2010) :link(GLE4MD) -[(GLE4MD)] "http://epfl-cosmo.github.io/gle4md/"_http://epfl-cosmo.github.io/gle4md/ +[(GLE4MD)] "http://gle4md.org/"_http://gle4md.org/ :link(Ceriotti2) [(Ceriotti2)] Ceriotti, Bussi and Parrinello, Phys Rev Lett 103, diff --git a/doc/src/fix_python.txt b/doc/src/fix_python.txt new file mode 100644 index 0000000000..c6e1ad9dac --- /dev/null +++ b/doc/src/fix_python.txt @@ -0,0 +1,76 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix python command :h3 + +[Syntax:] + +fix ID group-ID python N callback function_name :pre + +ID, group-ID are ignored by this fix :ulb,l +python = style name of this fix command :l +N = execute every N steps :l +callback = {post_force} or {end_of_step} :l + {post_force} = callback after force computations on atoms every N time steps + {end_of_step} = callback after every N time steps :pre +:ule + +[Examples:] + +python post_force_callback here """ +from lammps import lammps :pre + +def post_force_callback(lammps_ptr, vflag): + lmp = lammps(ptr=lammps_ptr) + # access LAMMPS state using Python interface +""" :pre + +python end_of_step_callback here """ +def end_of_step_callback(lammps_ptr): + lmp = lammps(ptr=lammps_ptr) + # access LAMMPS state using Python interface +""" :pre + +fix pf all python 50 post_force post_force_callback +fix eos all python 50 end_of_step end_of_step_callback :pre + +[Description:] + +This fix allows you to call a Python function during a simulation run. +The callback is either executed after forces have been applied to atoms +or at the end of every N time steps. + +Callback functions must be declared in the global scope of the +active Python interpreter. This can either be done by defining it +inline using the python command or by importing functions from other +Python modules. If LAMMPS is driven using the library interface from +Python, functions defined in the driving Python interpreter can also +be executed. + +Each callback is given a pointer object as first argument. This can be +used to initialize an instance of the lammps Python interface, which +gives access to the LAMMPS state from Python. + +IMPORTANT NOTE: While you can access the state of LAMMPS via library functions +from these callbacks, trying to execute input script commands will in the best +case not work or in the worst case result in undefined behavior. + +[Restrictions:] + +This fix is part of the PYTHON package. It is only enabled if +LAMMPS was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +Building LAMMPS with the PYTHON package will link LAMMPS with the +Python library on your system. Settings to enable this are in the +lib/python/Makefile.lammps file. See the lib/python/README file for +information on those settings. + +[Related commands:] + +"python command"_python.html diff --git a/doc/src/fix_qeq.txt b/doc/src/fix_qeq.txt index f9c8ecde63..22f4766896 100644 --- a/doc/src/fix_qeq.txt +++ b/doc/src/fix_qeq.txt @@ -74,7 +74,7 @@ NOTE: The "fix qeq/comb"_fix_qeq_comb.html command must still be used to perform charge equilibration with the "COMB potential"_pair_comb.html. The "fix qeq/reax"_fix_qeq_reax.html command can be used to perform charge equilibration with the "ReaxFF -force field"_pair_reax_c.html, although fix qeq/shielded yields the +force field"_pair_reaxc.html, although fix qeq/shielded yields the same results as fix qeq/reax if {Nevery}, {cutoff}, and {tolerance} are the same. Eventually the fix qeq/reax command will be deprecated. @@ -116,7 +116,7 @@ the shielded Coulomb is given by equation (13) of the "ReaxFF force field"_#vanDuin paper. The shielding accounts for charge overlap between charged particles at small separation. This style is the same as "fix qeq/reax"_fix_qeq_reax.html, and can be used with "pair_style -reax/c"_pair_reax_c.html. Only the {chi}, {eta}, and {gamma} +reax/c"_pair_reaxc.html. Only the {chi}, {eta}, and {gamma} parameters from the {qfile} file are used. This style solves partial charges on atoms via the matrix inversion method. A tolerance of 1.0e-6 is usually a good number. diff --git a/doc/src/fix_qeq_reax.txt b/doc/src/fix_qeq_reax.txt index 76c95e1117..aed043f6c0 100644 --- a/doc/src/fix_qeq_reax.txt +++ b/doc/src/fix_qeq_reax.txt @@ -30,7 +30,7 @@ fix 1 all qeq/reax 1 0.0 10.0 1.0e-6 param.qeq :pre Perform the charge equilibration (QEq) method as described in "(Rappe and Goddard)"_#Rappe2 and formulated in "(Nakano)"_#Nakano2. It is typically used in conjunction with the ReaxFF force field model as -implemented in the "pair_style reax/c"_pair_reax_c.html command, but +implemented in the "pair_style reax/c"_pair_reaxc.html command, but it can be used with any potential in LAMMPS, so long as it defines and uses charges on each atom. The "fix qeq/comb"_fix_qeq_comb.html command should be used to perform charge equilibration with the "COMB @@ -42,7 +42,7 @@ The QEq method minimizes the electrostatic energy of the system by adjusting the partial charge on individual atoms based on interactions with their neighbors. It requires some parameters for each atom type. If the {params} setting above is the word "reax/c", then these are -extracted from the "pair_style reax/c"_pair_reax_c.html command and +extracted from the "pair_style reax/c"_pair_reaxc.html command and the ReaxFF force field file it reads in. If a file name is specified for {params}, then the parameters are taken from the specified file and the file must contain one line for each atom type. The latter @@ -106,7 +106,7 @@ be used for periodic cell dimensions less than 10 angstroms. [Related commands:] -"pair_style reax/c"_pair_reax_c.html +"pair_style reax/c"_pair_reaxc.html [Default:] none diff --git a/doc/src/fix_reax_bonds.txt b/doc/src/fix_reax_bonds.txt index 1fd1b3ca5a..aadb0a9cbc 100644 --- a/doc/src/fix_reax_bonds.txt +++ b/doc/src/fix_reax_bonds.txt @@ -28,13 +28,30 @@ fix 1 all reax/c/bonds 100 bonds.reaxc :pre Write out the bond information computed by the ReaxFF potential specified by "pair_style reax"_pair_reax.html or "pair_style -reax/c"_pair_reax_c.html in the exact same format as the original +reax/c"_pair_reaxc.html in the exact same format as the original stand-alone ReaxFF code of Adri van Duin. The bond information is written to {filename} on timesteps that are multiples of {Nevery}, including timestep 0. For time-averaged chemical species analysis, please see the "fix reaxc/c/species"_fix_reaxc_species.html command. -The format of the output file should be self-explanatory. +The format of the output file should be reasonably self-explanatory. +The meaning of the column header abbreviations is as follows: + +id = atom id +type = atom type +nb = number of bonds +id_1 = atom id of first bond +id_nb = atom id of Nth bond +mol = molecule id +bo_1 = bond order of first bond +bo_nb = bond order of Nth bond +abo = atom bond order (sum of all bonds) +nlp = number of lone pairs +q = atomic charge :ul + +If the filename ends with ".gz", the output file is written in gzipped +format. A gzipped dump file will be about 3x smaller than the text +version, but will also take longer to write. :line @@ -80,14 +97,17 @@ reax"_pair_reax.html be invoked. This fix is part of the REAX package. It is only enabled if LAMMPS was built with that package, which also requires the REAX library be built and linked with LAMMPS. The fix reax/c/bonds command requires that the "pair_style -reax/c"_pair_reax_c.html be invoked. This fix is part of the +reax/c"_pair_reaxc.html be invoked. This fix is part of the USER-REAXC package. It is only enabled if LAMMPS was built with that package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info. +To write gzipped bond files, you must compile LAMMPS with the +-DLAMMPS_GZIP option. + [Related commands:] "pair_style reax"_pair_reax.html, "pair_style -reax/c"_pair_reax_c.html, "fix reax/c/species"_fix_reaxc_species.html +reax/c"_pair_reaxc.html, "fix reax/c/species"_fix_reaxc_species.html [Default:] none diff --git a/doc/src/fix_reaxc_species.txt b/doc/src/fix_reaxc_species.txt index 00db91900e..9a588356e0 100644 --- a/doc/src/fix_reaxc_species.txt +++ b/doc/src/fix_reaxc_species.txt @@ -41,7 +41,7 @@ fix 1 all reax/c/species 1 100 100 species.out element Au O H position 1000 AuOH [Description:] Write out the chemical species information computed by the ReaxFF -potential specified by "pair_style reax/c"_pair_reax_c.html. +potential specified by "pair_style reax/c"_pair_reaxc.html. Bond-order values (either averaged or instantaneous, depending on value of {Nrepeat}) are used to determine chemical bonds. Every {Nfreq} timesteps, chemical species information is written to @@ -52,6 +52,10 @@ number of molecules of each species. In this context, "species" means a unique molecule. The chemical formula of each species is given in the first line. +If the filename ends with ".gz", the output file is written in gzipped +format. A gzipped dump file will be about 3x smaller than the text version, +but will also take longer to write. + Optional keyword {cutoff} can be assigned to change the minimum bond-order values used in identifying chemical bonds between pairs of atoms. Bond-order cutoffs should be carefully chosen, as bond-order @@ -65,7 +69,7 @@ symbol printed for each LAMMPS atom type. The number of symbols must match the number of LAMMPS atom types and each symbol must consist of 1 or 2 alphanumeric characters. Normally, these symbols should be chosen to match the chemical identity of each LAMMPS atom type, as -specified using the "reax/c pair_coeff"_pair_reax_c.html command and +specified using the "reax/c pair_coeff"_pair_reaxc.html command and the ReaxFF force field file. The optional keyword {position} writes center-of-mass positions of @@ -158,19 +162,22 @@ more instructions on how to use the accelerated styles effectively. [Restrictions:] The fix species currently only works with -"pair_style reax/c"_pair_reax_c.html and it requires that the "pair_style -reax/c"_pair_reax_c.html be invoked. This fix is part of the +"pair_style reax/c"_pair_reaxc.html and it requires that the "pair_style +reax/c"_pair_reaxc.html be invoked. This fix is part of the USER-REAXC package. It is only enabled if LAMMPS was built with that package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info. +To write gzipped species files, you must compile LAMMPS with the +-DLAMMPS_GZIP option. + It should be possible to extend it to other reactive pair_styles (such as "rebo"_pair_airebo.html, "airebo"_pair_airebo.html, "comb"_pair_comb.html, and "bop"_pair_bop.html), but this has not yet been done. [Related commands:] -"pair_style reax/c"_pair_reax_c.html, "fix +"pair_style reax/c"_pair_reaxc.html, "fix reax/bonds"_fix_reax_bonds.html [Default:] diff --git a/doc/src/fixes.txt b/doc/src/fixes.txt index e8777d4113..ce73ed99e3 100644 --- a/doc/src/fixes.txt +++ b/doc/src/fixes.txt @@ -111,6 +111,7 @@ Fixes :h1 fix_press_berendsen fix_print fix_property_atom + fix_python fix_qbmsst fix_qeq fix_qeq_comb diff --git a/doc/src/improper_cossq.txt b/doc/src/improper_cossq.txt index 513f0b3151..e238063a8f 100644 --- a/doc/src/improper_cossq.txt +++ b/doc/src/improper_cossq.txt @@ -45,12 +45,9 @@ above, or in the data file or restart files read by the "read_data"_read_data.html or "read_restart"_read_restart.html commands: -K (energy/radian^2) +K (energy) X0 (degrees) :ul -X0 is specified in degrees, but LAMMPS converts it to radians -internally; hence the units of K are in energy/radian^2. - :line Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are diff --git a/doc/src/improper_ring.txt b/doc/src/improper_ring.txt index 705b1cf742..cba59399e7 100644 --- a/doc/src/improper_ring.txt +++ b/doc/src/improper_ring.txt @@ -49,12 +49,9 @@ above, or in the data file or restart files read by the "read_data"_read_data.html or "read_restart"_read_restart.html commands: -K (energy/radian^2) +K (energy) theta0 (degrees) :ul -theta0 is specified in degrees, but LAMMPS converts it to radians -internally; hence the units of K are in energy/radian^2. - :line Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are diff --git a/doc/src/kspace_modify.txt b/doc/src/kspace_modify.txt index 7a6c7191f0..b488df9627 100644 --- a/doc/src/kspace_modify.txt +++ b/doc/src/kspace_modify.txt @@ -290,9 +290,10 @@ to be specified using the {gewald/disp}, {mesh/disp}, {force/disp/real} or {force/disp/kspace} keywords, or the code will stop with an error message. When this option is set to {yes}, the error message will not appear and the simulation will start. -For a typical application, using the automatic parameter generation will provide -simulations that are either inaccurate or slow. Using this option is thus not -recommended. For guidelines on how to obtain good parameters, see the "How-To"_Section_howto.html#howto_23 discussion. +For a typical application, using the automatic parameter generation +will provide simulations that are either inaccurate or slow. Using this +option is thus not recommended. For guidelines on how to obtain good +parameters, see the "How-To"_Section_howto.html#howto_24 discussion. [Restrictions:] none diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 6c68955bc9..1769f29825 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -55,12 +55,12 @@ dihedral_style.html dimension.html displace_atoms.html dump.html -dump_custom_vtk.html dump_h5md.html dump_image.html dump_modify.html dump_molfile.html -dump_nc.html +dump_netcdf.html +dump_vtk.html echo.html fix.html fix_modify.html @@ -237,6 +237,7 @@ fix_pour.html fix_press_berendsen.html fix_print.html fix_property_atom.html +fix_python.html fix_qbmsst.html fix_qeq.html fix_qeq_comb.html @@ -432,6 +433,7 @@ pair_gauss.html pair_gayberne.html pair_gran.html pair_gromacs.html +pair_gw.html pair_hbond_dreiding.html pair_hybrid.html pair_kim.html @@ -467,9 +469,10 @@ pair_oxdna.html pair_oxdna2.html pair_peri.html pair_polymorphic.html +pair_python.html pair_quip.html pair_reax.html -pair_reax_c.html +pair_reaxc.html pair_resquared.html pair_sdk.html pair_smd_hertz.html diff --git a/doc/src/pair_buck.txt b/doc/src/pair_buck.txt index 1b9f333376..49161404c3 100644 --- a/doc/src/pair_buck.txt +++ b/doc/src/pair_buck.txt @@ -75,7 +75,7 @@ Lennard-Jones 12/6) given by :c,image(Eqs/pair_buck.jpg) where rho is an ionic-pair dependent length parameter, and Rc is the -cutoff on both terms. +cutoff on both terms. The styles with {coul/cut} or {coul/long} or {coul/msm} add a Coulombic term as described for the "lj/cut"_pair_lj.html pair styles. @@ -120,6 +120,9 @@ cutoff (distance units) cutoff2 (distance units) :ul The second coefficient, rho, must be greater than zero. +The coefficients A, rho, and C can be written as analytical expressions +of epsilon and sigma, in analogy to the Lennard-Jones potential +"(Khrapak)"_#Khrapak. The latter 2 coefficients are optional. If not specified, the global A,C and Coulombic cutoffs are used. If only one cutoff is specified, @@ -127,7 +130,6 @@ it is used as the cutoff for both A,C and Coulombic interactions for this type pair. If both coefficients are specified, they are used as the A,C and Coulombic cutoffs for this type pair. You cannot specify 2 cutoffs for style {buck}, since it has no Coulombic terms. - For {buck/coul/long} only the LJ cutoff can be specified since a Coulombic cutoff cannot be specified for an individual I,J type pair. All type pairs use the same global Coulombic cutoff specified in the @@ -194,3 +196,6 @@ only enabled if LAMMPS was built with that package. See the "pair_coeff"_pair_coeff.html, "pair_style born"_pair_born.html [Default:] none + +:link(Khrapak) +[(Khrapak)] Khrapak, Chaudhuri, and Morfill, J Chem Phys, 134, 054120 (2011). diff --git a/doc/src/pair_charmm.txt b/doc/src/pair_charmm.txt index ba6e60e121..9c5973c725 100644 --- a/doc/src/pair_charmm.txt +++ b/doc/src/pair_charmm.txt @@ -99,9 +99,10 @@ artifacts. NOTE: The newer {charmmfsw} or {charmmfsh} styles were released in March 2017. We recommend they be used instead of the older {charmm} -styles. Eventually code from the new styles will propagate into the -related pair styles (e.g. implicit, accelerator, free energy -variants). +styles. This includes the newer "dihedral_style +charmmfsw"_dihedral_charmm.html command. Eventually code from the new +styles will propagate into the related pair styles (e.g. implicit, +accelerator, free energy variants). The general CHARMM formulas are as follows diff --git a/doc/src/pair_edip.txt b/doc/src/pair_edip.txt index cdfc265752..86453859d3 100644 --- a/doc/src/pair_edip.txt +++ b/doc/src/pair_edip.txt @@ -7,11 +7,13 @@ :line pair_style edip command :h3 +pair_style edip/multi command :h3 [Syntax:] -pair_style edip :pre -pair_style edip/omp :pre +pair_style style :pre + +style = {edip} or {edip/multi} :ul [Examples:] @@ -20,11 +22,14 @@ pair_coeff * * Si.edip Si [Description:] -The {edip} style computes a 3-body "EDIP"_#EDIP potential which is -popular for modeling silicon materials where it can have advantages -over other models such as the "Stillinger-Weber"_pair_sw.html or -"Tersoff"_pair_tersoff.html potentials. In EDIP, the energy E of a -system of atoms is +The {edip} and {edip/multi} styles compute a 3-body "EDIP"_#EDIP +potential which is popular for modeling silicon materials where +it can have advantages over other models such as the +"Stillinger-Weber"_pair_sw.html or "Tersoff"_pair_tersoff.html +potentials. The {edip} style has been programmed for single element +potentials, while {edip/multi} supports multi-element EDIP runs. + +In EDIP, the energy E of a system of atoms is :c,image(Eqs/pair_edip.jpg) @@ -142,7 +147,7 @@ This pair style can only be used via the {pair} keyword of the [Restrictions:] -This angle style can only be used if LAMMPS was built with the +This pair style can only be used if LAMMPS was built with the USER-MISC package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info on packages. @@ -151,7 +156,7 @@ for pair interactions. The EDIP potential files provided with LAMMPS (see the potentials directory) are parameterized for metal "units"_units.html. -You can use the SW potential with any LAMMPS units, but you would need +You can use the EDIP potential with any LAMMPS units, but you would need to create your own EDIP potential file with coefficients listed in the appropriate units if your simulation doesn't use "metal" units. @@ -164,4 +169,4 @@ appropriate units if your simulation doesn't use "metal" units. :line :link(EDIP) -[(EDIP)] J. F. Justo et al., Phys. Rev. B 58, 2539 (1998). +[(EDIP)] J F Justo et al, Phys Rev B 58, 2539 (1998). diff --git a/doc/src/pair_gauss.txt b/doc/src/pair_gauss.txt index 92d8b51d8b..f6f46a2de8 100644 --- a/doc/src/pair_gauss.txt +++ b/doc/src/pair_gauss.txt @@ -128,7 +128,7 @@ The B parameter is converted to a distance (sigma), before mixing afterwards (using B=sigma^2). Negative A values are converted to positive A values (using abs(A)) before mixing, and converted back after mixing -(by multiplying by sign(Ai)*sign(Aj)). +(by multiplying by min(sign(Ai),sign(Aj))). This way, if either particle is repulsive (if Ai<0 or Aj<0), then the default interaction between both particles will be repulsive. diff --git a/doc/src/pair_gw.txt b/doc/src/pair_gw.txt new file mode 100644 index 0000000000..fcf63b1bc4 --- /dev/null +++ b/doc/src/pair_gw.txt @@ -0,0 +1,120 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +pair_style gw command :h3 +pair_style gw/zbl command :h3 + +[Syntax:] + +pair_style style :pre + +style = {gw} or {gw/zbl} :ul + +[Examples:] + +pair_style gw +pair_coeff * * SiC.gw Si C C + +pair_style gw/zbl +pair_coeff * * SiC.gw.zbl C Si :pre + +[Description:] + +The {gw} style computes a 3-body "Gao-Weber"_#Gao potential; +similarly {gw/zbl} combines this potential with a modified +repulsive ZBL core function in a similar fashion as implemented +in the "tersoff/zbl"_pair_tersoff_zbl.html pair style. + +Unfortunately the author of this contributed code has not been +able to submit a suitable documentation explaining the details +of the potentials. The LAMMPS developers thus have finally decided +to release the code anyway with only the technical explanations. +For details of the model and the parameters, please refer to the +linked publication. + +Only a single pair_coeff command is used with the {gw} and {gw/zbl} +styles which specifies a Gao-Weber potential file with parameters +for all needed elements. These are mapped to LAMMPS atom types by +specifying N additional arguments after the filename in the pair_coeff +command, where N is the number of LAMMPS atom types: + +filename +N element names = mapping of GW elements to atom types :ul + +See the "pair_coeff"_pair_coeff.html doc page for alternate ways +to specify the path for the potential file. + +As an example, imagine a file SiC.gw has Gao-Weber values for Si and C. +If your LAMMPS simulation has 4 atoms types and you want the first 3 to +be Si, and the 4th to be C, you would use the following pair_coeff command: + +pair_coeff * * SiC.gw Si Si Si C :pre + +The first 2 arguments must be * * so as to span all LAMMPS atom types. +The first three Si arguments map LAMMPS atom types 1,2,3 to the Si +element in the GW file. The final C argument maps LAMMPS atom type 4 +to the C element in the GW file. If a mapping value is specified as +NULL, the mapping is not performed. This can be used when a {gw} +potential is used as part of the {hybrid} pair style. The NULL values +are placeholders for atom types that will be used with other +potentials. + +Gao-Weber files in the {potentials} directory of the LAMMPS +distribution have a ".gw" suffix. Gao-Weber with ZBL files +have a ".gz.zbl" suffix. The structure of the potential files +is similar to other many-body potentials supported by LAMMPS. +You have to refer to the comments in the files and the literature +to learn more details. + +:line + +[Mixing, shift, table, tail correction, restart, rRESPA info]: + +For atom type pairs I,J and I != J, where types I and J correspond to +two different element types, mixing is performed by LAMMPS as +described above from values in the potential file. + +This pair style does not support the "pair_modify"_pair_modify.html +shift, table, and tail options. + +This pair style does not write its information to "binary restart +files"_restart.html, since it is stored in potential files. Thus, you +need to re-specify the pair_style and pair_coeff commands in an input +script that reads a restart file. + +This pair style can only be used via the {pair} keyword of the +"run_style respa"_run_style.html command. It does not support the +{inner}, {middle}, {outer} keywords. + +:line + +[Restrictions:] + +This pair style is part of the USER-MISC package. It is only enabled +if LAMMPS was built with that package. See +the "Making LAMMPS"_Section_start.html#start_3 section for more info. + +This pair style requires the "newton"_newton.html setting to be "on" +for pair interactions. + +The Gao-Weber potential files provided with LAMMPS (see the +potentials directory) are parameterized for metal "units"_units.html. +You can use the GW potential with any LAMMPS units, but you would need +to create your own GW potential file with coefficients listed in the +appropriate units if your simulation doesn't use "metal" units. + +[Related commands:] + +"pair_coeff"_pair_coeff.html + +[Default:] none + +:line + +:link(Gao) +[(Gao)] Gao and Weber, Nuclear Instruments and Methods in Physics Research B 191 (2012) 504. diff --git a/doc/src/pair_hybrid.txt b/doc/src/pair_hybrid.txt index 7ef54e7f07..5166fe1f84 100644 --- a/doc/src/pair_hybrid.txt +++ b/doc/src/pair_hybrid.txt @@ -73,7 +73,7 @@ pair_coeff command to assign parameters for the different type pairs. NOTE: There are two exceptions to this option to list an individual pair style multiple times. The first is for pair styles implemented as Fortran libraries: "pair_style meam"_pair_meam.html and "pair_style -reax"_pair_reax.html ("pair_style reax/c"_pair_reax_c.html is OK). +reax"_pair_reax.html ("pair_style reax/c"_pair_reaxc.html is OK). This is because unlike a C++ class, they can not be instantiated multiple times, due to the manner in which they were coded in Fortran. The second is for GPU-enabled pair styles in the GPU package. This is @@ -225,6 +225,12 @@ special_bonds lj/coul 1e-20 1e-20 0.5 pair_hybrid tersoff lj/cut/coul/long 12.0 pair_modify pair tersoff special lj/coul 1.0 1.0 1.0 :pre +For use with the various "compute */tally"_compute_tally.html +computes, the "pair_modify compute/tally"_pair_modify.html +command can be used to selectively turn off processing of +the compute tally styles, for example, if those pair styles +(e.g. manybody styles) do not support this feature. + See the "pair_modify"_pair_modify.html doc page for details on the specific syntax, requirements and restrictions. diff --git a/doc/src/pair_meam_spline.txt b/doc/src/pair_meam_spline.txt index a02125a6d9..2295a6640b 100644 --- a/doc/src/pair_meam_spline.txt +++ b/doc/src/pair_meam_spline.txt @@ -23,7 +23,8 @@ pair_coeff * * Ti.meam.spline Ti Ti Ti :pre The {meam/spline} style computes pairwise interactions for metals using a variant of modified embedded-atom method (MEAM) potentials -"(Lenosky)"_#Lenosky1. The total energy E is given by +"(Lenosky)"_#Lenosky1. For a single species ("old-style") MEAM, +the total energy E is given by :c,image(Eqs/pair_meam_spline.jpg) @@ -31,6 +32,20 @@ where rho_i is the density at atom I, theta_jik is the angle between atoms J, I, and K centered on atom I. The five functions Phi, U, rho, f, and g are represented by cubic splines. +The {meam/spline} style also supports a new style multicomponent +modified embedded-atom method (MEAM) potential "(Zhang)"_#Zhang4, where +the total energy E is given by + +:c,image(Eqs/pair_meam_spline_multicomponent.jpg) + +where the five functions Phi, U, rho, f, and g depend on the chemistry +of the atoms in the interaction. In particular, if there are N different +chemistries, there are N different U, rho, and f functions, while there +are N(N+1)/2 different Phi and g functions. The new style multicomponent +MEAM potential files are indicated by the second line in the file starts +with "meam/spline" followed by the number of elements and the name of each +element. + The cutoffs and the coefficients for these spline functions are listed in a parameter file which is specified by the "pair_coeff"_pair_coeff.html command. Parameter files for different @@ -59,7 +74,7 @@ N element names = mapping of spline-based MEAM elements to atom types :ul See the "pair_coeff"_pair_coeff.html doc page for alternate ways to specify the path for the potential file. -As an example, imagine the Ti.meam.spline file has values for Ti. If +As an example, imagine the Ti.meam.spline file has values for Ti (old style). If your LAMMPS simulation has 3 atoms types and they are all to be treated with this potentials, you would use the following pair_coeff command: @@ -72,10 +87,19 @@ in the potential file. If a mapping value is specified as NULL, the mapping is not performed. This can be used when a {meam/spline} potential is used as part of the {hybrid} pair style. The NULL values are placeholders for atom types that will be used with other -potentials. +potentials. The old-style potential maps any non-NULL species named +on the command line to that single type. -NOTE: The {meam/spline} style currently supports only single-element -MEAM potentials. It may be extended for alloy systems in the future. +An example with a two component spline (new style) is TiO.meam.spline, where +the command + +pair_coeff * * TiO.meam.spline Ti O :pre + +will map the 1st atom type to Ti and the second atom type to O. Note +in this case that the species names need to match exactly with the +names of the elements in the TiO.meam.spline file; otherwise an +error will be raised. This behavior is different than the old style +MEAM files. :line @@ -104,9 +128,6 @@ more instructions on how to use the accelerated styles effectively. [Mixing, shift, table, tail correction, restart, rRESPA info]: -The current version of this pair style does not support multiple -element types or mixing. It has been designed for pure elements only. - This pair style does not support the "pair_modify"_pair_modify.html shift, table, and tail options. @@ -142,3 +163,6 @@ for more info. [(Lenosky)] Lenosky, Sadigh, Alonso, Bulatov, de la Rubia, Kim, Voter, Kress, Modelling Simulation Materials Science Engineering, 8, 825 (2000). + +:link(Zhang4) +[(Zhang)] Zhang and Trinkle, Computational Materials Science, 124, 204-210 (2016). diff --git a/doc/src/pair_modify.txt b/doc/src/pair_modify.txt index 03fb80ae5e..34dbb5bc3d 100644 --- a/doc/src/pair_modify.txt +++ b/doc/src/pair_modify.txt @@ -15,11 +15,13 @@ pair_modify keyword values ... :pre one or more keyword/value pairs may be listed :ulb,l keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} or {tail} or {compute} :l {pair} values = sub-style N {special} which wt1 wt2 wt3 + or sub-style N {compute/tally} flag sub-style = sub-style of "pair hybrid"_pair_hybrid.html N = which instance of sub-style (only if sub-style is used multiple times) - {special} which wt1 wt2 wt3 = override {special_bonds} settings (optional) - which = {lj/coul} or {lj} or {coul} - w1,w2,w3 = 1-2, 1-3, and 1-4 weights from 0.0 to 1.0 inclusive + {special} which wt1 wt2 wt3 = override {special_bonds} settings (optional) + which = {lj/coul} or {lj} or {coul} + w1,w2,w3 = 1-2, 1-3, and 1-4 weights from 0.0 to 1.0 inclusive + {compute/tally} flag = {yes} or {no} {mix} value = {geometric} or {arithmetic} or {sixthpower} {shift} value = {yes} or {no} {table} value = N @@ -40,6 +42,7 @@ pair_modify shift yes mix geometric pair_modify tail yes pair_modify table 12 pair_modify pair lj/cut compute no +pair_modify pair tersoff compute/tally no pair_modify pair lj/cut/coul/long 1 special lj/coul 0.0 0.0 0.0 :pre [Description:] @@ -60,9 +63,12 @@ keywords will be applied to. Note that if the {pair} keyword is not used, and the pair style is {hybrid} or {hybrid/overlay}, then all the specified keywords will be applied to all sub-styles. -The {special} keyword can only be used in conjunction with the {pair} -keyword and must directly follow it. It allows to override the +The {special} and {compute/tally} keywords can [only] be used in +conjunction with the {pair} keyword and must directly follow it. +{special} allows to override the "special_bonds"_special_bonds.html settings for the specified sub-style. +{compute/tally} allows to disable or enable registering +"compute */tally"_compute_tally.html computes for a given sub-style. More details are given below. The {mix} keyword affects pair coefficients for interactions between @@ -231,6 +237,14 @@ setting. Substituting 1.0e-10 for 0.0 and 0.9999999999 for 1.0 is usually a sufficient workaround in this case without causing a significant error. +The {compute/tally} keyword takes exactly 1 argument ({no} or {yes}), +and allows to selectively disable or enable processing of the various +"compute */tally"_compute_tally.html styles for a given +"pair hybrid or hybrid/overlay"_pair_hybrid.html sub-style. + +NOTE: Any "pair_modify pair compute/tally" command must be issued +[before] the corresponding compute style is defined. + :line [Restrictions:] none @@ -240,8 +254,9 @@ conflicting options. You cannot use {tail} yes with 2d simulations. [Related commands:] -"pair_style"_pair_style.html, "pair_coeff"_pair_coeff.html, -"thermo_style"_thermo_style.html +"pair_style"_pair_style.html, "pair_style hybrid"_pair_hybrid.html, +pair_coeff"_pair_coeff.html, "thermo_style"_thermo_style.html, +"compute */tally"_compute_tally.html [Default:] diff --git a/doc/src/pair_python.txt b/doc/src/pair_python.txt new file mode 100644 index 0000000000..557db37bbb --- /dev/null +++ b/doc/src/pair_python.txt @@ -0,0 +1,216 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +pair_style python command :h3 + +[Syntax:] + +pair_style python cutoff :pre + +cutoff = global cutoff for interactions in python potential classes + +[Examples:] + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj :pre + +pair_style hybrid/overlay coul/long 12.0 python 12.0 +pair_coeff * * coul/long +pair_coeff * * python py_pot.LJCutSPCE OW NULL :pre + +[Description:] + +The {python} pair style provides a way to define pairwise additive +potential functions as python script code that is loaded into LAMMPS +from a python file which must contain specific python class definitions. +This allows to rapidly evaluate different potential functions without +having to modify and recompile LAMMPS. Due to python being an +interpreted language, however, the performance of this pair style is +going to be significantly slower (often between 20x and 100x) than +corresponding compiled code. This penalty can be significantly reduced +through generating tabulations from the python code through the +"pair_write"_pair_write.html command, which is supported by this style. + +Only a single pair_coeff command is used with the {python} pair style +which specifies a python class inside a python module or file that +LAMMPS will look up in the current directory, the folder pointed to by +the LAMMPS_POTENTIALS environment variable or somewhere in your python +path. A single python module can hold multiple python pair class +definitions. The class definitions itself have to follow specific +rules that are explained below. + +Atom types in the python class are specified through symbolic +constants, typically strings. These are mapped to LAMMPS atom types by +specifying N additional arguments after the class name in the +pair_coeff command, where N must be the number of currently defined +atom types: + +As an example, imagine a file {py_pot.py} has a python potential class +names {LJCutMelt} with parameters and potential functions for a two +Lennard-Jones atom types labeled as 'LJ1' and 'LJ2'. In your LAMMPS +input and you would have defined 3 atom types, out of which the first +two are supposed to be using the 'LJ1' parameters and the third the +'LJ2' parameters, then you would use the following pair_coeff command: + +pair_coeff * * py_pot.LJCutMelt LJ1 LJ1 LJ2 :pre + +The first two arguments [must] be * * so as to span all LAMMPS atom +types. The first two LJ1 arguments map LAMMPS atom types 1 and 2 to +the LJ1 atom type in the LJCutMelt class of the py_pot.py file. The +final LJ2 argument maps LAMMPS atom type 3 to the LJ2 atom type the +python file. If a mapping value is specified as NULL, the mapping is +not performed, any pair interaction with this atom type will be +skipped. This can be used when a {python} potential is used as part of +the {hybrid} or {hybrid/overlay} pair style. The NULL values are then +placeholders for atom types that will be used with other potentials. + +:line + +The python potential file has to start with the following code: + +from __future__ import print_function + +class LAMMPSPairPotential(object): + def __init__(self): + self.pmap=dict() + self.units='lj' + def map_coeff(self,name,ltype): + self.pmap\[ltype\]=name + def check_units(self,units): + if (units != self.units): + raise Exception("Conflicting units: %s vs. %s" % (self.units,units)) +:pre + +Any classes with definitions of specific potentials have to be derived +from this class and should be initialize in a similar fashion to the +example given below. + +NOTE: The class constructor has to set up a data structure containing +the potential parameters supported by this class. It should also +define a variable {self.units} containing a string matching one of the +options of LAMMPS' "units"_units.html command, which is used to +verify, that the potential definition in the python class and in the +LAMMPS input match. + +Here is an example for a single type Lennard-Jones potential class +{LJCutMelt} in reducted units, which defines an atom type {lj} for +which the parameters epsilon and sigma are both 1.0: + +class LJCutMelt(LAMMPSPairPotential): + def __init__(self): + super(LJCutMelt,self).__init__() + # set coeffs: 48*eps*sig**12, 24*eps*sig**6, + # 4*eps*sig**12, 4*eps*sig**6 + self.units = 'lj' + self.coeff = \{'lj' : \{'lj' : (48.0,24.0,4.0,4.0)\}\} +:pre + +The class also has to provide two methods for the computation of the +potential energy and forces, which have be named {compute_force}, +and {compute_energy}, which both take 3 numerical arguments: + + rsq = the square of the distance between a pair of atoms (float) :l + itype = the (numerical) type of the first atom :l + jtype = the (numerical) type of the second atom :ul + +This functions need to compute the force and the energy, respectively, +and use the result as return value. The functions need to use the +{pmap} dictionary to convert the LAMMPS atom type number to the symbolic +value of the internal potential parameter data structure. Following +the {LJCutMelt} example, here are the two functions: + + def compute_force(self,rsq,itype,jtype): + coeff = self.coeff\[self.pmap\[itype\]\]\[self.pmap\[jtype\]\] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj1 = coeff\[0\] + lj2 = coeff\[1\] + return (r6inv * (lj1*r6inv - lj2))*r2inv :pre + + def compute_energy(self,rsq,itype,jtype): + coeff = self.coeff\[self.pmap\[itype\]\]\[self.pmap\[jtype\]\] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj3 = coeff\[2\] + lj4 = coeff\[3\] + return (r6inv * (lj3*r6inv - lj4)) :pre + +NOTE: for consistency with the C++ pair styles in LAMMPS, the +{compute_force} function follows the conventions of the Pair::single() +methods and does not return the full force, but the force scaled by +the distance between the two atoms, so this value only needs to be +multiplied by delta x, delta y, and delta z to conveniently obtain the +three components of the force vector between these two atoms. + +:line + +NOTE: The evaluation of scripted python code will slow down the +computation pair-wise interactions quite significantly. However, this +can be largely worked around through using the python pair style not +for the actual simulation, but to generate tabulated potentials on the +fly using the "pair_write"_pair_write.html command. Please see below +for an example LAMMPS input of how to build a table file: + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj +shell rm -f melt.table +pair_write 1 1 2000 rsq 0.01 2.5 lj1_lj2.table lj :pre + +Note that it is strongly recommended to try to [delete] the potential +table file before generating it. Since the {pair_write} command will +always append to a table file, which pair style table will use the +first match. Thus when changing the potential function in the python +class, the table pair style will still read the old variant. + +After switching the pair style to {table}, the potential tables need +to be assigned to the LAMMPS atom types like this: + +pair_style table linear 2000 +pair_coeff 1 1 melt.table lj :pre + +This can also be done for more complex systems. Please see the +{examples/python} folders for a few more examples. + +:line + +[Mixing, shift, table, tail correction, restart, rRESPA info]: + +Mixing of potential parameters has to be handled inside the provided +python module. The python pair style simply assumes that force and +energy computation can be correctly performed for all pairs of atom +types as they are mapped to the atom type labels inside the python +potential class. + +This pair style does not support the "pair_modify"_pair_modify.html +shift, table, and tail options. + +This pair style does not write its information to "binary restart +files"_restart.html, since it is stored in potential files. Thus, you +need to re-specify the pair_style and pair_coeff commands in an input +script that reads a restart file. + +This pair style can only be used via the {pair} keyword of the +"run_style respa"_run_style.html command. It does not support the +{inner}, {middle}, {outer} keywords. + +:line + +[Restrictions:] + +This pair style is part of the PYTHON package. It is only enabled if +LAMMPS was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +[Related commands:] + +"pair_coeff"_pair_coeff.html, "pair_write"_pair_write.html, +"pair style table"_pair_table.html + +[Default:] none + + diff --git a/doc/src/pair_reax.txt b/doc/src/pair_reax.txt index 7215c12cee..1d13f93706 100644 --- a/doc/src/pair_reax.txt +++ b/doc/src/pair_reax.txt @@ -36,7 +36,7 @@ supplemental information of the following paper: the most up-to-date version of ReaxFF as of summer 2010. WARNING: pair style reax is now deprecated and will soon be retired. Users -should switch to "pair_style reax/c"_pair_reax_c.html. The {reax} style +should switch to "pair_style reax/c"_pair_reaxc.html. The {reax} style differs from the {reax/c} style in the lo-level implementation details. The {reax} style is a Fortran library, linked to LAMMPS. The {reax/c} style was initially @@ -82,7 +82,7 @@ be specified. Two examples using {pair_style reax} are provided in the examples/reax sub-directory, along with corresponding examples for -"pair_style reax/c"_pair_reax_c.html. Note that while the energy and force +"pair_style reax/c"_pair_reaxc.html. Note that while the energy and force calculated by both of these pair styles match very closely, the contributions due to the valence angles differ slightly due to the fact that with {pair_style reax/c} the default value of {thb_cutoff_sq} @@ -201,7 +201,7 @@ appropriate units if your simulation doesn't use "real" units. [Related commands:] -"pair_coeff"_pair_coeff.html, "pair_style reax/c"_pair_reax_c.html, +"pair_coeff"_pair_coeff.html, "pair_style reax/c"_pair_reaxc.html, "fix_reax_bonds"_fix_reax_bonds.html [Default:] diff --git a/doc/src/pair_reax_c.txt b/doc/src/pair_reaxc.txt similarity index 96% rename from doc/src/pair_reax_c.txt rename to doc/src/pair_reaxc.txt index c1d719d22e..76a8e6fd5c 100644 --- a/doc/src/pair_reax_c.txt +++ b/doc/src/pair_reaxc.txt @@ -17,6 +17,7 @@ cfile = NULL or name of a control file :ulb,l zero or more keyword/value pairs may be appended :l keyword = {checkqeq} or {lgvdw} or {safezone} or {mincap} {checkqeq} value = {yes} or {no} = whether or not to require qeq/reax fix + {enobonds} value = {yes} or {no} = whether or not to tally energy of atoms with no bonds {lgvdw} value = {yes} or {no} = whether or not to use a low gradient vdW correction {safezone} = factor used for array allocation {mincap} = minimum size for array allocation :pre @@ -127,6 +128,13 @@ recommended value for parameter {thb} is 0.01, which can be set in the control file. Note: Force field files are different for the original or lg corrected pair styles, using wrong ffield file generates an error message. +Using the optional keyword {enobonds} with the value {yes}, the energy +of atoms with no bonds (i.e. isolated atoms) is included in the total +potential energy and the per-atom energy of that atom. If the value +{no} is specified then the energy of atoms with no bonds is set to zero. +The latter behavior is usual not desired, as it causes discontinuities +in the potential energy when the bonding of an atom drops to zero. + Optional keywords {safezone} and {mincap} are used for allocating reax/c arrays. Increasing these values can avoid memory problems, such as segmentation faults and bondchk failed errors, that could occur under @@ -331,7 +339,7 @@ reax"_pair_reax.html [Default:] -The keyword defaults are checkqeq = yes, lgvdw = no, safezone = 1.2, +The keyword defaults are checkqeq = yes, enobonds = yes, lgvdw = no, safezone = 1.2, mincap = 50. :line diff --git a/doc/src/pair_sdk.txt b/doc/src/pair_sdk.txt index 212760e03d..1c348eaaf7 100644 --- a/doc/src/pair_sdk.txt +++ b/doc/src/pair_sdk.txt @@ -134,7 +134,7 @@ respa"_run_style.html command. [Restrictions:] -All of the lj/sdk pair styles are part of the USER-CG-CMM package. +All of the lj/sdk pair styles are part of the USER-CGSDK package. The {lj/sdk/coul/long} style also requires the KSPACE package to be built (which is enabled by default). They are only enabled if LAMMPS was built with that package. See the "Making diff --git a/doc/src/pair_srp.txt b/doc/src/pair_srp.txt index 3f54445ba8..e7f1e00d10 100644 --- a/doc/src/pair_srp.txt +++ b/doc/src/pair_srp.txt @@ -150,6 +150,8 @@ hybrid"_pair_hybrid.html. This pair style requires the "newton"_newton.html command to be {on} for non-bonded interactions. +This pair style is not compatible with "rigid body integrators"_fix_rigid.html + [Related commands:] "pair_style hybrid"_pair_hybrid.html, "pair_coeff"_pair_coeff.html, diff --git a/doc/src/pair_tersoff.txt b/doc/src/pair_tersoff.txt index eb4a8993cf..23a20ad0fd 100644 --- a/doc/src/pair_tersoff.txt +++ b/doc/src/pair_tersoff.txt @@ -18,7 +18,7 @@ pair_style tersoff/table/omp command :h3 pair_style style :pre -style = {tersoff} or {tersoff/table} or {tersoff/gpu} or {tersoff/omp} or {tersoff/table/omp} +style = {tersoff} or {tersoff/table} or {tersoff/gpu} or {tersoff/omp} or {tersoff/table/omp} :ul [Examples:] diff --git a/doc/src/pair_yukawa_colloid.txt b/doc/src/pair_yukawa_colloid.txt index 3d5294bbdb..ecdc1496ab 100644 --- a/doc/src/pair_yukawa_colloid.txt +++ b/doc/src/pair_yukawa_colloid.txt @@ -35,7 +35,7 @@ cutoff. In contrast to "pair_style yukawa"_pair_yukawa.html, this functional form arises from the Coulombic interaction between two colloid particles, screened due to the presence of an electrolyte, see the -book by "Safran"_#Safran for a derivation in the context of DVLO +book by "Safran"_#Safran for a derivation in the context of DLVO theory. "Pair_style yukawa"_pair_yukawa.html is a screened Coulombic potential between two point-charges and uses no such approximation. diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index 8694747dad..538e2a7268 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -36,6 +36,7 @@ Pair Styles :h1 pair_gayberne pair_gran pair_gromacs + pair_gw pair_hbond_dreiding pair_hybrid pair_kim @@ -71,9 +72,10 @@ Pair Styles :h1 pair_oxdna2 pair_peri pair_polymorphic + pair_python pair_quip pair_reax - pair_reax_c + pair_reaxc pair_resquared pair_sdk pair_smd_hertz diff --git a/doc/src/python.txt b/doc/src/python.txt index be6d1b215f..e00b90234c 100644 --- a/doc/src/python.txt +++ b/doc/src/python.txt @@ -14,7 +14,7 @@ python func keyword args ... :pre func = name of Python function :ulb,l one or more keyword/args pairs must be appended :l -keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or {here} or {exists} +keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or {here} or {exists} or {source} {invoke} arg = none = invoke the previously defined Python function {input} args = N i1 i2 ... iN N = # of inputs to function @@ -36,7 +36,12 @@ keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or { {here} arg = inline inline = one or more lines of Python code which defines func must be a single argument, typically enclosed between triple quotes - {exists} arg = none = Python code has been loaded by previous python command :pre + {exists} arg = none = Python code has been loaded by previous python command + {source} arg = {filename} or {inline} + filename = file of Python code which will be executed immediately + inline = one or more lines of Python code which will be executed immediately + must be a single argument, typically enclosed between triple quotes +:pre :ule [Examples:] @@ -50,7 +55,7 @@ def factorial(n): return n * factorial(n-1) """ :pre -python loop input 1 SELF return v_value format -f here """ +python loop input 1 SELF return v_value format pf here """ def loop(lmpptr,N,cut0): from lammps import lammps lmp = lammps(ptr=lmpptr) :pre @@ -59,7 +64,7 @@ def loop(lmpptr,N,cut0): for i in range(N): cut = cut0 + i*0.1 - lmp.set_variable("cut",cut) # set a variable in LAMMPS + lmp.set_variable("cut",cut) # set a variable in LAMMPS lmp.command("pair_style lj/cut $\{cut\}") # LAMMPS commands lmp.command("pair_coeff * * 1.0 1.0") lmp.command("run 100") @@ -67,12 +72,8 @@ def loop(lmpptr,N,cut0): [Description:] -NOTE: It is not currently possible to use the "python"_python.html -command described in this section with Python 3, only with Python 2. -The C API changed from Python 2 to 3 and the LAMMPS code is not -compatible with both. - -Define a Python function or execute a previously defined function. +Define a Python function or execute a previously defined function or +execute some arbitrary python code. Arguments, including LAMMPS variables, can be passed to the function from the LAMMPS input script and a value returned by the Python function to a LAMMPS variable. The Python code for the function can @@ -107,7 +108,8 @@ command. The {func} setting specifies the name of the Python function. The code for the function is defined using the {file} or {here} keywords -as explained below. +as explained below. In case of the {source} keyword, the name of +the function is ignored. If the {invoke} keyword is used, no other keywords can be used, and a previous python command must have defined the Python function @@ -116,6 +118,13 @@ previously defined arguments and return value processed as explained below. You can invoke the function as many times as you wish in your input script. +If the {source} keyword is used, no other keywords can be used. +The argument can be a filename or a string with python commands, +either on a single line enclosed in quotes, or as multiple lines +enclosed in triple quotes. These python commands will be passed +to the python interpreter and executed immediately without registering +a python function for future execution. + The {input} keyword defines how many arguments {N} the Python function expects. If it takes no arguments, then the {input} keyword should not be used. Each argument can be specified directly as a value, @@ -310,7 +319,7 @@ which corresponds to SELF in the python command. The first line of the function imports the Python module lammps.py in the python dir of the distribution. The second line creates a Python object "lmp" which wraps the instance of LAMMPS that called the function. The -"ptr=lmpptr" argument is what makes that happen. The thrid line +"ptr=lmpptr" argument is what makes that happen. The third line invokes the command() function in the LAMMPS library interface. It takes a single string argument which is a LAMMPS input script command for LAMMPS to execute, the same as if it appeared in your input @@ -396,6 +405,9 @@ or other variables may have hidden side effects as well. In these cases, LAMMPS has no simple way to check that something illogical is being attempted. +The same applies to Python functions called during a simulation run at +each time step using "fix python"_fix_python.html. + :line If you run Python code directly on your workstation, either @@ -477,19 +489,10 @@ python"_Section_python.html. Note that it is important that the stand-alone LAMMPS executable and the LAMMPS shared library be consistent (built from the same source code files) in order for this to work. If the two have been built at different times using -different source files, problems may occur. - -As described above, you can use the python command to invoke a Python -function which calls back to LAMMPS through its Python-wrapped library -interface. However you cannot do the opposite. I.e. you cannot call -LAMMPS from Python and invoke the python command to "callback" to -Python and execute a Python function. LAMMPS will generate an error -if you try to do that. Note that we think there actually should be a -way to do that, but haven't yet been able to figure out how to do it -successfully. +different source files, problems may occur. [Related commands:] -"shell"_shell.html, "variable"_variable.html +"shell"_shell.html, "variable"_variable.html, "fix python"_fix_python.html [Default:] none diff --git a/doc/src/rerun.txt b/doc/src/rerun.txt index 860ee68033..edf94cc711 100644 --- a/doc/src/rerun.txt +++ b/doc/src/rerun.txt @@ -15,7 +15,7 @@ rerun file1 file2 ... keyword args ... :pre file1,file2,... = dump file(s) to read :ulb,l one or more keywords may be appended, keyword {dump} must appear and be last :l keyword = {first} or {last} or {every} or {skip} or {start} or {stop} or {dump} - {first} args = Nfirts + {first} args = Nfirst Nfirst = dump timestep to start on {last} args = Nlast Nlast = dumptimestep to stop on diff --git a/doc/src/tutorial_pylammps.txt b/doc/src/tutorial_pylammps.txt index 5d3491782e..a4a7a4041e 100644 --- a/doc/src/tutorial_pylammps.txt +++ b/doc/src/tutorial_pylammps.txt @@ -55,7 +55,7 @@ using the generated {auto} Makefile. cd $LAMMPS_DIR/src :pre # generate custom Makefile -python2 Make.py -jpg -png -s ffmpeg exceptions -m mpi -a file :pre +python Make.py -jpg -png -s ffmpeg exceptions -m mpi -a file :pre # add packages if necessary make yes-MOLECULE :pre diff --git a/doc/src/velocity.txt b/doc/src/velocity.txt index 70ddb559fa..b8299a5acf 100644 --- a/doc/src/velocity.txt +++ b/doc/src/velocity.txt @@ -61,7 +61,7 @@ keyword/value parameters. Not all options are used by each style. Each option has a default as listed below. The {create} style generates an ensemble of velocities using a random -number generator with the specified seed as the specified temperature. +number generator with the specified seed at the specified temperature. The {set} style sets the velocities of all atoms in the group to the specified values. If any component is specified as NULL, then it is diff --git a/examples/ASPHERE/poly/in.poly b/examples/ASPHERE/poly/in.poly index 77df095e15..3496a774bb 100644 --- a/examples/ASPHERE/poly/in.poly +++ b/examples/ASPHERE/poly/in.poly @@ -62,6 +62,7 @@ pair_coeff 3 3 1.0 1.5 pair_coeff 1 4 0.0 1.0 0.5 pair_coeff 2 4 0.0 1.0 1.0 pair_coeff 3 4 0.0 1.0 0.75 +pair_coeff 4 4 0.0 1.0 0.0 delete_atoms overlap 1.0 small big diff --git a/examples/ASPHERE/poly/in.poly.mp b/examples/ASPHERE/poly/in.poly.mp index 5ced616e7c..1c6a1faee3 100644 --- a/examples/ASPHERE/poly/in.poly.mp +++ b/examples/ASPHERE/poly/in.poly.mp @@ -62,6 +62,7 @@ pair_coeff 3 3 1.0 1.5 pair_coeff 1 4 0.0 1.0 0.5 pair_coeff 2 4 0.0 1.0 1.0 pair_coeff 3 4 0.0 1.0 0.75 +pair_coeff 4 4 0.0 1.0 0.0 delete_atoms overlap 1.0 small big diff --git a/examples/USER/cg-cmm/README b/examples/USER/cgsdk/README similarity index 95% rename from examples/USER/cg-cmm/README rename to examples/USER/cgsdk/README index 6a283114ba..5d3a493779 100644 --- a/examples/USER/cg-cmm/README +++ b/examples/USER/cgsdk/README @@ -1,4 +1,4 @@ -LAMMPS USER-CMM-CG example problems +LAMMPS USER-CGSDK example problems Each of these sub-directories contains a sample problem for the SDK coarse grained MD potentials that you can run with LAMMPS. diff --git a/examples/USER/cg-cmm/peg-verlet/data.pegc12e8.gz b/examples/USER/cgsdk/peg-verlet/data.pegc12e8.gz similarity index 100% rename from examples/USER/cg-cmm/peg-verlet/data.pegc12e8.gz rename to examples/USER/cgsdk/peg-verlet/data.pegc12e8.gz diff --git a/examples/USER/cg-cmm/peg-verlet/in.pegc12e8 b/examples/USER/cgsdk/peg-verlet/in.pegc12e8 similarity index 100% rename from examples/USER/cg-cmm/peg-verlet/in.pegc12e8 rename to examples/USER/cgsdk/peg-verlet/in.pegc12e8 diff --git a/examples/USER/cg-cmm/peg-verlet/in.pegc12e8-angle b/examples/USER/cgsdk/peg-verlet/in.pegc12e8-angle similarity index 100% rename from examples/USER/cg-cmm/peg-verlet/in.pegc12e8-angle rename to examples/USER/cgsdk/peg-verlet/in.pegc12e8-angle diff --git a/examples/USER/cg-cmm/peg-verlet/log.pegc12e8 b/examples/USER/cgsdk/peg-verlet/log.pegc12e8 similarity index 100% rename from examples/USER/cg-cmm/peg-verlet/log.pegc12e8 rename to examples/USER/cgsdk/peg-verlet/log.pegc12e8 diff --git a/examples/USER/cg-cmm/peg-verlet/log.pegc12e8-angle b/examples/USER/cgsdk/peg-verlet/log.pegc12e8-angle similarity index 100% rename from examples/USER/cg-cmm/peg-verlet/log.pegc12e8-angle rename to examples/USER/cgsdk/peg-verlet/log.pegc12e8-angle diff --git a/examples/USER/cg-cmm/sds-monolayer/data.sds.gz b/examples/USER/cgsdk/sds-monolayer/data.sds.gz similarity index 100% rename from examples/USER/cg-cmm/sds-monolayer/data.sds.gz rename to examples/USER/cgsdk/sds-monolayer/data.sds.gz diff --git a/examples/USER/cg-cmm/sds-monolayer/in.sds-hybrid b/examples/USER/cgsdk/sds-monolayer/in.sds-hybrid similarity index 100% rename from examples/USER/cg-cmm/sds-monolayer/in.sds-hybrid rename to examples/USER/cgsdk/sds-monolayer/in.sds-hybrid diff --git a/examples/USER/cg-cmm/sds-monolayer/in.sds-regular b/examples/USER/cgsdk/sds-monolayer/in.sds-regular similarity index 100% rename from examples/USER/cg-cmm/sds-monolayer/in.sds-regular rename to examples/USER/cgsdk/sds-monolayer/in.sds-regular diff --git a/examples/USER/cg-cmm/sds-monolayer/log.sds-hybrid b/examples/USER/cgsdk/sds-monolayer/log.sds-hybrid similarity index 100% rename from examples/USER/cg-cmm/sds-monolayer/log.sds-hybrid rename to examples/USER/cgsdk/sds-monolayer/log.sds-hybrid diff --git a/examples/USER/cg-cmm/sds-monolayer/log.sds-regular b/examples/USER/cgsdk/sds-monolayer/log.sds-regular similarity index 100% rename from examples/USER/cg-cmm/sds-monolayer/log.sds-regular rename to examples/USER/cgsdk/sds-monolayer/log.sds-regular diff --git a/examples/USER/misc/edip/Si.edip b/examples/USER/misc/edip/Si.edip new file mode 100644 index 0000000000..b3b960e738 --- /dev/null +++ b/examples/USER/misc/edip/Si.edip @@ -0,0 +1,26 @@ +# DATE: 2011-09-15 CONTRIBUTOR: Unknown CITATION: Justo, Bazant, Kaxiras, Bulatov and Yip, Phys Rev B, 58, 2539 (1998) + +# EDIP parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units + +# format of a single entry (one or more lines) +# +# element 1, element 2, element 3, +# A B cutoffA cutoffC alpha beta eta +# gamma lambda mu rho sigma Q0 +# u1 u2 u3 u4 +# +# units for each parameters: +# A , lambda are in eV +# B, cutoffA, cutoffC, gamma, sigma are in Angstrom +# alpha, beta, eta, mu, rho, Q0, u1-u4 are pure numbers + +# Here are the original parameters in metal units, for Silicon from: +# J. F. Justo, M. Z. Bazant, E. Kaxiras, V. V. Bulatov, S. Yip +# Phys. Rev. B 58, 2539 (1998) +# + +Si Si Si 7.9821730 1.5075463 3.1213820 2.5609104 3.1083847 0.0070975 0.2523244 + 1.1247945 1.4533108 0.6966326 1.2085196 0.5774108 312.1341346 + -0.165799 32.557 0.286198 0.66 diff --git a/examples/USER/misc/edip/SiC.edip b/examples/USER/misc/edip/SiC.edip new file mode 100644 index 0000000000..0485d345bb --- /dev/null +++ b/examples/USER/misc/edip/SiC.edip @@ -0,0 +1,38 @@ +# DATE: 2017-05-16 CONTRIBUTOR: Laurent Pizzagalli CITATION: G. Lucas, M. Bertolus, and L. Pizzagalli, J. Phys. : Condens. Matter 22, 035802 (2010) +# element 1, element 2, element 3, +# A B cutoffA cutoffC alpha beta eta +# gamma lambda mu rho sigma Q0 +# u1 u2 u3 u4 +# +Si Si Si 5.488043 1.446435 2.941586 2.540193 3.066580 0.008593 0.589390 + 1.135256 2.417497 0.629131 1.343679 0.298443 208.924548 + -0.165799 32.557 0.286198 0.66 + +C C C 10.222599 0.959814 2.212263 1.741598 1.962090 0.025661 0.275605 + 1.084183 3.633621 0.594236 2.827634 0.536561 289.305617 + -0.165799 32.557 0.286198 0.66 + +C Si Si 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.432497 + 1.191567 3.025559 0.611684 2.061835 0.423863 249.115082 + -0.165799 32.557000 0.286198 0.660000 + +Si C C 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.432497 + 1.191567 3.025559 0.611684 2.061835 0.423863 249.115082 + -0.165799 32.557000 0.286198 0.660000 + +Si Si C 5.488043 1.446435 2.941586 2.540193 3.066580 0.008593 0.510944 + 1.135256 2.721528 0.620407 1.343679 0.298443 229.019815 + -0.165799 32.557000 0.286198 0.660000 + +Si C Si 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.510944 + 1.191567 2.721528 0.620407 2.061835 0.423863 229.019815 + -0.165799 32.557000 0.286198 0.660000 + +C C Si 10.222599 0.959814 2.212263 1.741598 1.962090 0.025661 0.354051 + 1.084183 3.329590 0.602960 2.827634 0.536561 269.210350 + -0.165799 32.557000 0.286198 0.660000 + +C Si C 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.354051 + 1.191567 3.329590 0.602960 2.061835 0.423863 269.210350 + -0.165799 32.557000 0.286198 0.660000 + diff --git a/examples/USER/misc/edip/data.SiC b/examples/USER/misc/edip/data.SiC new file mode 100644 index 0000000000..fa50c14803 --- /dev/null +++ b/examples/USER/misc/edip/data.SiC @@ -0,0 +1,138 @@ +Position data for Silicon-Carbon system + + 128 atoms + 2 atom types + -6.00 5.97232152 xlo xhi + -6.00 5.97232152 ylo yhi + -6.00 5.97232152 zlo zhi + + Atoms + +1 2 -2.9378454 -4.4592615 -4.8109196 +2 2 5.6222143 -2.7335026 -1.7157569 +3 2 -2.6614623 -5.5431059 1.6353686 +4 2 -5.4326838 -4.6174577 5.9452279 +5 2 5.8679239 -0.1120535 -3.5839373 +6 2 -3.7174621 -0.6623311 -0.3714789 +7 2 -5.0724728 -2.5671623 4.4103461 +8 2 -3.3951436 0.9341126 4.9310702 +9 2 -5.4347593 1.9523767 -5.6180938 +10 2 -4.5884719 2.2904528 -1.0597739 +11 2 -5.9058662 0.6212406 2.0127574 +12 2 -4.7680660 0.1965740 4.3267764 +13 2 -5.4228882 5.2569673 -4.5162920 +14 2 -5.2683965 -5.9193658 -2.8648668 +15 2 -2.8610884 1.0484664 2.0299077 +16 2 -4.0711084 5.3133026 3.8009514 +17 2 -0.1947147 -4.1677696 -5.6950931 +18 2 -2.9892710 -3.1647368 -1.6173910 +19 2 -0.9129311 -4.3819066 -0.1601859 +20 2 -2.4513693 -5.2466501 4.8882912 +21 2 -2.8879952 -0.1633446 -3.3401150 +22 1 -4.6738762 -1.3807254 -2.2946777 +23 2 -0.6973948 -1.4885343 0.6005156 +24 1 -2.7392164 -2.4774843 0.2387186 +25 2 -2.6551254 -2.7229952 2.6350264 +26 1 -3.4644263 -4.6028144 3.3817786 +27 2 0.7227614 -2.0709446 2.9214737 +28 1 -2.1000577 -3.2131296 5.7273437 +29 2 -3.1057649 2.3204819 -2.2725622 +30 1 -2.2298751 0.7168389 -1.3107201 +31 2 -1.8698261 1.4006751 0.7265108 +32 1 -4.1103409 -0.7093340 1.9341753 +33 2 -0.3505581 3.2707182 -0.2880656 +34 1 -3.4045407 -1.4383961 4.3903527 +35 2 -3.0940529 1.4132478 -5.3635505 +36 1 -4.4560663 1.2072875 -3.7310176 +37 2 -2.6061002 4.6373499 -4.6903941 +38 1 -3.3477444 4.6768137 -2.6284678 +39 2 0.8121697 4.8602418 -4.6710946 +40 1 -2.5756922 3.3740738 -0.2136350 +41 2 -0.3867976 5.8745611 -2.1119905 +42 1 -1.6766249 1.3374292 3.8741477 +43 2 -0.8770613 3.3735941 4.3846975 +44 1 -1.8609254 3.3158245 -5.9786556 +45 1 -5.2732321 -4.6073253 -0.9581754 +46 1 -2.7888697 -5.6910152 -0.7922023 +47 1 -2.4717165 4.5801880 2.5083210 +48 1 -3.8819950 5.8456589 -5.7563384 +49 2 2.2314782 -2.7729214 -5.2356862 +50 2 0.2981976 -3.1385279 -3.1608167 +51 2 2.8810785 -3.4658695 -0.5823196 +52 2 0.2509625 -5.7595229 2.7389761 +53 2 -0.2934120 -0.8029431 -3.3698507 +54 1 -1.0075690 -2.0481922 -1.9419298 +55 2 2.0729069 1.4922441 -2.3898096 +56 1 1.1110944 -3.2004208 0.9491078 +57 2 1.6774298 -0.7901860 2.5158773 +58 1 -0.8342297 -4.3342518 2.0971458 +59 2 3.2747406 -1.3107897 4.7884706 +60 1 1.7126246 -3.3691471 4.5581012 +61 2 0.4770605 1.7769008 -5.3339915 +62 1 0.2944391 0.5892781 -2.2030106 +63 2 2.2039275 3.1557557 -2.0276796 +64 1 -0.0404494 0.4767818 1.0396418 +65 2 1.1395867 2.3763443 2.3481007 +66 1 -0.9738374 -1.6325161 3.7538567 +67 2 -0.3291998 0.2996990 5.2770809 +68 1 -1.6185604 -0.3964274 -5.1771220 +69 2 2.5999949 -5.1977715 5.8230717 +70 1 -1.6270675 2.3210900 -3.6299941 +71 2 3.6532700 4.9282597 -5.4319276 +72 1 0.0788934 4.0241037 -2.5011530 +73 2 2.8556507 2.6168653 2.1125546 +74 1 0.9738989 2.6255364 4.3412121 +75 2 3.7452938 3.4521356 4.5946426 +76 1 2.0805182 4.7039015 5.3280260 +77 1 -1.0324174 -5.8155041 -4.3265820 +78 1 0.7622442 -4.3631629 -1.3156572 +79 1 0.3263684 3.9937357 1.6172321 +80 1 -0.4350105 -5.7997058 4.5959134 +81 2 3.9161132 -4.6052788 -3.3191717 +82 2 1.9240657 5.7345079 -1.9754251 +83 2 -5.9794488 -4.2369359 1.8646522 +84 2 4.3339975 -4.4845227 5.3737440 +85 2 2.2755456 -0.6327737 -5.7931837 +86 1 1.8728190 -1.5504906 -3.4560010 +87 2 3.4558100 -1.1054068 -1.8333071 +88 1 4.3788172 -1.9466494 -0.3284637 +89 2 2.5999235 -3.7548996 2.5740569 +90 1 3.9983910 -4.4856603 1.1968663 +91 2 -5.7295580 -2.1475672 -5.9963645 +92 1 4.2664051 -2.6988975 -5.8005478 +93 2 4.5254685 2.2906832 -3.4765798 +94 1 2.3603088 1.3416442 -4.4173836 +95 2 4.7767057 1.4061217 -0.7524620 +96 1 1.8072666 -0.7835973 -0.4581995 +97 2 4.4745018 0.3736224 2.1068274 +98 1 3.6081170 -1.7315713 2.4019053 +99 2 4.6281423 -0.2865409 4.4756524 +100 1 1.7975239 0.2893530 4.2330830 +101 2 5.8341452 4.4986472 -5.9664541 +102 1 3.2401308 4.1655227 -3.5070029 +103 2 4.8720339 4.8709982 -2.3364366 +104 1 3.5526476 1.2262752 0.6926826 +105 2 -5.8173342 4.5420479 1.5578881 +106 1 3.9683224 1.5441137 3.8284375 +107 2 -5.5349308 1.9067049 3.7504113 +108 1 4.4728615 2.6415574 -5.5952809 +109 1 1.7000950 -4.8115440 -4.1953920 +110 1 1.7221527 4.1878404 -0.3712681 +111 1 3.9218156 4.5935583 1.3263407 +112 1 3.1310195 -5.8922481 3.6001155 +113 1 4.7558719 -2.2877771 -3.4742052 +114 1 -5.5050300 -2.7027381 0.8748867 +115 1 5.8418594 -4.6064370 3.8714113 +116 1 -4.7516868 -3.1691984 -4.4099768 +117 1 3.9404971 0.7188702 -2.2898786 +118 1 -5.6869740 0.2042380 -0.1916738 +119 1 5.8949589 -1.2422560 3.1201292 +120 1 5.9675804 -0.0712572 5.8964022 +121 1 -5.6208517 3.3600036 -2.9493510 +122 1 5.2065263 3.4517912 -0.3800894 +123 1 -4.6994522 2.5489583 1.8297431 +124 1 -4.0758407 3.0726196 5.0647973 +125 1 4.1587591 -5.0896820 -1.1443498 +126 1 -4.6963753 -5.7429833 1.1357818 +127 1 5.5994192 4.6887008 3.5948264 +128 1 5.0988369 -5.3774409 -4.9051267 diff --git a/examples/USER/misc/edip/in.edip-Si b/examples/USER/misc/edip/in.edip-Si new file mode 100644 index 0000000000..b4c6669621 --- /dev/null +++ b/examples/USER/misc/edip/in.edip-Si @@ -0,0 +1,72 @@ + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a & + a1 1.0 0.0 0.0 & + a2 0.0 1.0 0.0 & + a3 0.0 0.0 1.0 & + basis 0.0 0.0 0.0 & + basis 0.0 0.5 0.5 & + basis 0.5 0.0 0.5 & + basis 0.5 0.5 0.0 & + basis 0.25 0.25 0.25 & + basis 0.25 0.75 0.75 & + basis 0.75 0.25 0.75 & + basis 0.75 0.75 0.25 + +region myreg block 0 4 & + 0 4 & + 0 4 +create_box 1 myreg +create_atoms 1 region myreg + +mass 1 28.06 + +group Si type 1 + +velocity all create $t 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +delete_atoms group del + +pair_style edip +pair_coeff * * Si.edip Si + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 + diff --git a/examples/USER/misc/edip/in.edip-Si-multi b/examples/USER/misc/edip/in.edip-Si-multi new file mode 100644 index 0000000000..73a2e09143 --- /dev/null +++ b/examples/USER/misc/edip/in.edip-Si-multi @@ -0,0 +1,72 @@ + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a & + a1 1.0 0.0 0.0 & + a2 0.0 1.0 0.0 & + a3 0.0 0.0 1.0 & + basis 0.0 0.0 0.0 & + basis 0.0 0.5 0.5 & + basis 0.5 0.0 0.5 & + basis 0.5 0.5 0.0 & + basis 0.25 0.25 0.25 & + basis 0.25 0.75 0.75 & + basis 0.75 0.25 0.75 & + basis 0.75 0.75 0.25 + +region myreg block 0 4 & + 0 4 & + 0 4 +create_box 1 myreg +create_atoms 1 region myreg + +mass 1 28.06 + +group Si type 1 + +velocity all create $t 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +delete_atoms group del + +pair_style edip/multi +pair_coeff * * Si.edip Si + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 + diff --git a/examples/USER/misc/edip/in.edip-SiC b/examples/USER/misc/edip/in.edip-SiC new file mode 100644 index 0000000000..ac95f6c4d1 --- /dev/null +++ b/examples/USER/misc/edip/in.edip-SiC @@ -0,0 +1,33 @@ +# Test of MEAM potential for SiC system + +units metal +boundary p p p + +atom_style atomic + +read_data data.SiC + +pair_style edip/multi +pair_coeff * * SiC.edip Si C + +mass 1 28.085 +mass 2 12.001 + +neighbor 1.0 bin +neigh_modify delay 1 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all atom 50 dump.meam + +#dump 2 all image 10 image.*.jpg element element & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 element Si C + +#dump 3 all movie 10 movie.mpg element element & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 element Si C + +run 100 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.1 b/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.1 new file mode 100644 index 0000000000..ab7d339023 --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.1 @@ -0,0 +1,167 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +lattice custom 5.431 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +Lattice spacing in x,y,z = 5.431 5.431 5.431 + +region myreg block 0 4 0 4 0 4 +create_box 1 myreg +Created orthogonal box = (0 0 0) to (21.724 21.724 21.724) + 1 by 1 by 1 MPI processor grid +create_atoms 1 region myreg +Created 512 atoms + +mass 1 28.06 + +group Si type 1 +512 atoms in group Si + +velocity all create $t 5287287 mom yes rot yes dist gaussian +velocity all create 1800 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +1 atoms in group del +delete_atoms group del +Deleted 1 atoms, new total = 511 + +pair_style edip/multi +pair_coeff * * Si.edip Si +Reading potential file Si.edip with DATE: 2011-09-15 + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 +fix 1 all nvt temp 1800 $t 0.1 +fix 1 all nvt temp 1800 1800 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.12138 + ghost atom cutoff = 4.12138 + binsize = 2.06069, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip/multi, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.979 | 2.979 | 2.979 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1802.5039 -2372.6618 0 -2253.8359 12261.807 + 10 952.62744 -2316.428 0 -2253.6283 723.08194 + 20 549.13801 -2289.442 0 -2253.2413 -2444.5204 + 30 1047.0106 -2321.1523 0 -2252.1305 9013.201 + 40 663.46141 -2294.2083 0 -2250.4711 2942.5348 + 50 504.74535 -2282.849 0 -2249.5748 -461.44909 + 60 1019.2173 -2315.5639 0 -2248.3744 7706.4286 + 70 844.51195 -2302.5251 0 -2246.8526 3116.8302 + 80 814.90407 -2299.3372 0 -2245.6166 794.77455 + 90 1269.5636 -2327.4775 0 -2243.7845 7729.3968 + 100 977.61563 -2306.1118 0 -2241.6647 2969.9939 + 110 843.08539 -2295.6547 0 -2240.0763 1393.4039 + 120 1161.6968 -2314.6587 0 -2238.0766 7398.3492 + 130 918.19451 -2296.4321 0 -2235.9022 2537.3997 + 140 881.42548 -2292.2768 0 -2234.1709 1550.3339 + 150 1231.1005 -2313.1054 0 -2231.9479 8112.7566 + 160 967.01862 -2293.332 0 -2229.5836 3422.9627 + 170 833.51248 -2282.7489 0 -2227.8015 43.991459 + 180 1240.8488 -2307.3633 0 -2225.5632 6557.8651 + 190 1126.4621 -2297.1922 0 -2222.9328 4289.0067 + 200 947.59571 -2283.29 0 -2220.822 586.2811 + 210 1228.153 -2299.4702 0 -2218.5071 5315.0425 + 220 1215.4104 -2295.9408 0 -2215.8176 4870.3417 + 230 1112.436 -2286.7552 0 -2213.4204 2527.1879 + 240 1300.081 -2296.6013 0 -2210.8965 5738.3708 + 250 1192.5738 -2286.8463 0 -2208.2286 4076.49 + 260 1004.7055 -2272.1753 0 -2205.9424 359.37589 + 270 1241.2018 -2285.3632 0 -2203.5399 4160.5763 + 280 1360.1974 -2290.325 0 -2200.6572 5802.3902 + 290 1151.9365 -2273.9467 0 -2198.008 1418.8887 + 300 1174.3518 -2273.0089 0 -2195.5925 1998.229 + 310 1329.2727 -2280.5049 0 -2192.8757 4721.7297 + 320 1284.4414 -2274.7519 0 -2190.0781 2985.4674 + 330 1328.3761 -2274.9545 0 -2187.3844 4543.2109 + 340 1446.3847 -2279.8693 0 -2184.5198 6254.4059 + 350 1366.2165 -2271.7475 0 -2181.6828 3637.8335 + 360 1358.9609 -2268.5982 0 -2179.0118 3049.5798 + 370 1552.208 -2278.4802 0 -2176.1545 6334.0058 + 380 1562.5295 -2276.1793 0 -2173.1732 5787.5547 + 390 1415.5498 -2263.7824 0 -2170.4655 3438.5766 + 400 1323.1568 -2255.1641 0 -2167.938 2427.2294 + 410 1260.7186 -2248.5373 0 -2165.4273 1208.6299 + 420 1282.1118 -2247.3718 0 -2162.8516 462.65374 + 430 1451.944 -2255.7551 0 -2160.0391 2037.8025 + 440 1568.9415 -2260.417 0 -2156.9882 3531.1602 + 450 1565.8262 -2257.2396 0 -2154.0162 2586.7886 + 460 1677.7143 -2261.7214 0 -2151.122 4112.9756 + 470 1762.9071 -2264.4244 0 -2148.2089 5053.2139 + 480 1704.5898 -2257.8678 0 -2145.4967 4077.4626 + 490 1731.2619 -2257.1048 0 -2142.9753 4710.5263 + 500 1723.9777 -2254.161 0 -2140.5118 4760.7295 +Loop time of 0.679564 on 1 procs for 500 steps with 511 atoms + +Performance: 63.570 ns/day, 0.378 hours/ns, 735.765 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.65181 | 0.65181 | 0.65181 | 0.0 | 95.92 +Neigh | 0.013857 | 0.013857 | 0.013857 | 0.0 | 2.04 +Comm | 0.0033884 | 0.0033884 | 0.0033884 | 0.0 | 0.50 +Output | 0.00070739 | 0.00070739 | 0.00070739 | 0.0 | 0.10 +Modify | 0.0083694 | 0.0083694 | 0.0083694 | 0.0 | 1.23 +Other | | 0.001432 | | | 0.21 + +Nlocal: 511 ave 511 max 511 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 845 ave 845 max 845 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 7902 ave 7902 max 7902 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7902 +Ave neighs/atom = 15.4638 +Neighbor list builds = 19 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.4 b/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.4 new file mode 100644 index 0000000000..91be601fa8 --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-Si-multi.4 @@ -0,0 +1,167 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +lattice custom 5.431 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +Lattice spacing in x,y,z = 5.431 5.431 5.431 + +region myreg block 0 4 0 4 0 4 +create_box 1 myreg +Created orthogonal box = (0 0 0) to (21.724 21.724 21.724) + 1 by 2 by 2 MPI processor grid +create_atoms 1 region myreg +Created 512 atoms + +mass 1 28.06 + +group Si type 1 +512 atoms in group Si + +velocity all create $t 5287287 mom yes rot yes dist gaussian +velocity all create 1800 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +1 atoms in group del +delete_atoms group del +Deleted 1 atoms, new total = 511 + +pair_style edip/multi +pair_coeff * * Si.edip Si +Reading potential file Si.edip with DATE: 2011-09-15 + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 +fix 1 all nvt temp 1800 $t 0.1 +fix 1 all nvt temp 1800 1800 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.12138 + ghost atom cutoff = 4.12138 + binsize = 2.06069, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip/multi, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.955 | 2.955 | 2.955 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1802.3816 -2372.6618 0 -2253.844 12260.967 + 10 938.75954 -2315.5185 0 -2253.6329 558.21646 + 20 534.27233 -2288.4721 0 -2253.2514 -2710.768 + 30 1043.7796 -2320.9485 0 -2252.1398 8679.4381 + 40 658.0916 -2293.8597 0 -2250.4765 2165.3742 + 50 517.93009 -2283.7238 0 -2249.5805 -1124.9373 + 60 1063.3594 -2318.4409 0 -2248.3414 7277.8526 + 70 868.14006 -2304.0134 0 -2246.7832 2050.2848 + 80 826.37805 -2300.0187 0 -2245.5416 91.099408 + 90 1289.6772 -2328.7151 0 -2243.6961 8180.7423 + 100 976.36208 -2305.9371 0 -2241.5727 3614.0499 + 110 810.81713 -2293.4705 0 -2240.0193 1359.368 + 120 1165.707 -2314.9026 0 -2238.056 7336.45 + 130 929.81245 -2297.139 0 -2235.8432 2793.8451 + 140 804.47874 -2287.2074 0 -2234.174 704.92455 + 150 1182.4141 -2310.0266 0 -2232.0787 7822.2337 + 160 979.92391 -2294.2969 0 -2229.6977 3206.7458 + 170 830.14748 -2282.6079 0 -2227.8824 -296.87377 + 180 1271.1133 -2309.4274 0 -2225.6322 7199.614 + 190 1209.6006 -2302.6407 0 -2222.9006 5528.3784 + 200 954.67693 -2283.6621 0 -2220.7273 47.02795 + 210 1260.814 -2301.5582 0 -2218.442 4829.788 + 220 1274.9954 -2299.7285 0 -2215.6774 5518.0597 + 230 1048.0074 -2282.398 0 -2213.3106 1754.4144 + 240 1261.7072 -2294.1108 0 -2210.9356 5233.2712 + 250 1272.6178 -2292.0793 0 -2208.1849 4795.9325 + 260 989.14205 -2271.0278 0 -2205.8209 -820.1828 + 270 1212.0445 -2283.4212 0 -2203.52 3395.8634 + 280 1391.9572 -2292.3809 0 -2200.6194 6666.2451 + 290 1093.1204 -2270.0421 0 -2197.9807 206.94523 + 300 1159.4831 -2272.102 0 -2195.6657 778.53806 + 310 1407.3528 -2285.6228 0 -2192.8463 5223.048 + 320 1236.7163 -2271.5389 0 -2190.0113 1865.3943 + 330 1258.8275 -2270.4611 0 -2187.4758 2333.3209 + 340 1507.9519 -2283.9906 0 -2184.5824 6775.5456 + 350 1366.5116 -2271.7287 0 -2181.6446 3432.115 + 360 1305.2829 -2265.1092 0 -2179.0614 1498.4073 + 370 1581.4335 -2280.4645 0 -2176.2122 6518.5597 + 380 1589.5319 -2277.9428 0 -2173.1567 6334.6506 + 390 1402.6781 -2262.9323 0 -2170.464 3278.3038 + 400 1374.9587 -2258.5717 0 -2167.9307 3608.7284 + 410 1295.7416 -2250.7752 0 -2165.3565 1877.5222 + 420 1278.6727 -2247.1099 0 -2162.8164 1599.4181 + 430 1508.1328 -2259.4245 0 -2160.0044 4300.2224 + 440 1624.2957 -2263.9806 0 -2156.9026 4432.625 + 450 1597.3356 -2259.263 0 -2153.9624 3370.3816 + 460 1772.0922 -2267.9106 0 -2151.0895 5788.3214 + 470 1806.4047 -2267.304 0 -2148.221 5950.1166 + 480 1593.0406 -2250.7469 0 -2145.7294 2518.0576 + 490 1660.9767 -2252.894 0 -2143.398 4282.1643 + 500 1714.283 -2253.9295 0 -2140.9194 5740.0247 +Loop time of 0.205398 on 4 procs for 500 steps with 511 atoms + +Performance: 210.324 ns/day, 0.114 hours/ns, 2434.304 timesteps/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.16285 | 0.1688 | 0.17446 | 1.1 | 82.18 +Neigh | 0.0035172 | 0.0036234 | 0.0038214 | 0.2 | 1.76 +Comm | 0.018727 | 0.024851 | 0.030996 | 2.9 | 12.10 +Output | 0.0013061 | 0.0014012 | 0.0015635 | 0.3 | 0.68 +Modify | 0.0046582 | 0.0048603 | 0.0050988 | 0.2 | 2.37 +Other | | 0.001861 | | | 0.91 + +Nlocal: 127.75 ave 131 max 124 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Nghost: 433.75 ave 441 max 426 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 1979.5 ave 2040 max 1895 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 7918 +Ave neighs/atom = 15.4951 +Neighbor list builds = 19 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-Si.1 b/examples/USER/misc/edip/log.4May2017.g++.edip-Si.1 new file mode 100644 index 0000000000..f7ce00371f --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-Si.1 @@ -0,0 +1,167 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +lattice custom 5.431 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +Lattice spacing in x,y,z = 5.431 5.431 5.431 + +region myreg block 0 4 0 4 0 4 +create_box 1 myreg +Created orthogonal box = (0 0 0) to (21.724 21.724 21.724) + 1 by 1 by 1 MPI processor grid +create_atoms 1 region myreg +Created 512 atoms + +mass 1 28.06 + +group Si type 1 +512 atoms in group Si + +velocity all create $t 5287287 mom yes rot yes dist gaussian +velocity all create 1800 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +1 atoms in group del +delete_atoms group del +Deleted 1 atoms, new total = 511 + +pair_style edip +pair_coeff * * Si.edip Si +Reading potential file Si.edip with DATE: 2011-09-15 + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 +fix 1 all nvt temp 1800 $t 0.1 +fix 1 all nvt temp 1800 1800 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.12138 + ghost atom cutoff = 4.12138 + binsize = 2.06069, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.979 | 2.979 | 2.979 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1802.5039 -2372.6618 0 -2253.8359 12261.807 + 10 952.62744 -2316.428 0 -2253.6283 723.08283 + 20 549.138 -2289.442 0 -2253.2413 -2444.5194 + 30 1047.0106 -2321.1522 0 -2252.1305 9013.2015 + 40 663.46143 -2294.2083 0 -2250.4711 2942.5358 + 50 504.74533 -2282.849 0 -2249.5748 -461.44817 + 60 1019.2173 -2315.5639 0 -2248.3744 7706.429 + 70 844.51197 -2302.5251 0 -2246.8526 3116.8313 + 80 814.90406 -2299.3372 0 -2245.6165 794.77536 + 90 1269.5635 -2327.4775 0 -2243.7845 7729.3971 + 100 977.61566 -2306.1118 0 -2241.6647 2969.9952 + 110 843.08538 -2295.6547 0 -2240.0763 1393.4046 + 120 1161.6968 -2314.6587 0 -2238.0766 7398.3495 + 130 918.19453 -2296.4321 0 -2235.9022 2537.4011 + 140 881.42546 -2292.2768 0 -2234.1709 1550.3345 + 150 1231.1005 -2313.1054 0 -2231.9479 8112.7568 + 160 967.01865 -2293.332 0 -2229.5836 3422.964 + 170 833.51246 -2282.7489 0 -2227.8015 43.99251 + 180 1240.8487 -2307.3633 0 -2225.5632 6557.8652 + 190 1126.4621 -2297.1922 0 -2222.9328 4289.0083 + 200 947.5957 -2283.29 0 -2220.8219 586.28203 + 210 1228.153 -2299.4702 0 -2218.5071 5315.0427 + 220 1215.4104 -2295.9407 0 -2215.8176 4870.343 + 230 1112.436 -2286.7552 0 -2213.4204 2527.1887 + 240 1300.081 -2296.6013 0 -2210.8965 5738.3711 + 250 1192.5739 -2286.8463 0 -2208.2286 4076.4913 + 260 1004.7055 -2272.1753 0 -2205.9424 359.3769 + 270 1241.2018 -2285.3632 0 -2203.5399 4160.5764 + 280 1360.1974 -2290.325 0 -2200.6572 5802.3912 + 290 1151.9366 -2273.9467 0 -2198.008 1418.8905 + 300 1174.3518 -2273.0089 0 -2195.5925 1998.2297 + 310 1329.2726 -2280.5049 0 -2192.8757 4721.7304 + 320 1284.4414 -2274.7519 0 -2190.0781 2985.4687 + 330 1328.3761 -2274.9545 0 -2187.3844 4543.2115 + 340 1446.3847 -2279.8693 0 -2184.5198 6254.4071 + 350 1366.2165 -2271.7475 0 -2181.6828 3637.8351 + 360 1358.9609 -2268.5982 0 -2179.0118 3049.5811 + 370 1552.2079 -2278.4802 0 -2176.1545 6334.0061 + 380 1562.5295 -2276.1793 0 -2173.1731 5787.5565 + 390 1415.5498 -2263.7823 0 -2170.4655 3438.5782 + 400 1323.1568 -2255.1641 0 -2167.938 2427.2311 + 410 1260.7186 -2248.5373 0 -2165.4273 1208.6316 + 420 1282.1118 -2247.3718 0 -2162.8516 462.65508 + 430 1451.9439 -2255.7551 0 -2160.0391 2037.8027 + 440 1568.9415 -2260.417 0 -2156.9882 3531.1613 + 450 1565.8261 -2257.2396 0 -2154.0161 2586.7896 + 460 1677.7143 -2261.7214 0 -2151.122 4112.976 + 470 1762.9071 -2264.4244 0 -2148.2089 5053.2148 + 480 1704.5898 -2257.8678 0 -2145.4966 4077.4649 + 490 1731.2619 -2257.1048 0 -2142.9753 4710.5276 + 500 1723.9777 -2254.161 0 -2140.5118 4760.7316 +Loop time of 0.312472 on 1 procs for 500 steps with 511 atoms + +Performance: 138.252 ns/day, 0.174 hours/ns, 1600.143 timesteps/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.28525 | 0.28525 | 0.28525 | 0.0 | 91.29 +Neigh | 0.013753 | 0.013753 | 0.013753 | 0.0 | 4.40 +Comm | 0.0033333 | 0.0033333 | 0.0033333 | 0.0 | 1.07 +Output | 0.00071096 | 0.00071096 | 0.00071096 | 0.0 | 0.23 +Modify | 0.008044 | 0.008044 | 0.008044 | 0.0 | 2.57 +Other | | 0.001385 | | | 0.44 + +Nlocal: 511 ave 511 max 511 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 845 ave 845 max 845 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 7902 ave 7902 max 7902 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7902 +Ave neighs/atom = 15.4638 +Neighbor list builds = 19 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-Si.4 b/examples/USER/misc/edip/log.4May2017.g++.edip-Si.4 new file mode 100644 index 0000000000..e33f0116f4 --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-Si.4 @@ -0,0 +1,167 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task + +units metal + +atom_style atomic +atom_modify map array +boundary p p p +atom_modify sort 0 0.0 + +# temperature + +variable t equal 1800.0 + +# coordination number cutoff + +variable r equal 2.835 + +# minimization parameters + +variable etol equal 1.0e-5 +variable ftol equal 1.0e-5 +variable maxiter equal 100 +variable maxeval equal 100 +variable dmax equal 1.0e-1 + +# diamond unit cell + +variable a equal 5.431 +lattice custom $a a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +lattice custom 5.431 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 basis 0.0 0.5 0.5 basis 0.5 0.0 0.5 basis 0.5 0.5 0.0 basis 0.25 0.25 0.25 basis 0.25 0.75 0.75 basis 0.75 0.25 0.75 basis 0.75 0.75 0.25 +Lattice spacing in x,y,z = 5.431 5.431 5.431 + +region myreg block 0 4 0 4 0 4 +create_box 1 myreg +Created orthogonal box = (0 0 0) to (21.724 21.724 21.724) + 1 by 2 by 2 MPI processor grid +create_atoms 1 region myreg +Created 512 atoms + +mass 1 28.06 + +group Si type 1 +512 atoms in group Si + +velocity all create $t 5287287 mom yes rot yes dist gaussian +velocity all create 1800 5287287 mom yes rot yes dist gaussian + +# make a vacancy + +group del id 300 +1 atoms in group del +delete_atoms group del +Deleted 1 atoms, new total = 511 + +pair_style edip +pair_coeff * * Si.edip Si +Reading potential file Si.edip with DATE: 2011-09-15 + +thermo 10 + +fix 1 all nvt temp $t $t 0.1 +fix 1 all nvt temp 1800 $t 0.1 +fix 1 all nvt temp 1800 1800 0.1 + +timestep 1.0e-3 +neighbor 1.0 bin +neigh_modify every 1 delay 10 check yes + +# equilibrate + +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.12138 + ghost atom cutoff = 4.12138 + binsize = 2.06069, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.955 | 2.955 | 2.955 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1802.3816 -2372.6618 0 -2253.8439 12260.967 + 10 938.75954 -2315.5185 0 -2253.6329 558.21736 + 20 534.27232 -2288.4721 0 -2253.2514 -2710.767 + 30 1043.7796 -2320.9485 0 -2252.1398 8679.4385 + 40 658.09162 -2293.8597 0 -2250.4765 2165.3752 + 50 517.93008 -2283.7238 0 -2249.5805 -1124.9362 + 60 1063.3594 -2318.4409 0 -2248.3414 7277.853 + 70 868.14007 -2304.0133 0 -2246.7832 2050.2859 + 80 826.37803 -2300.0187 0 -2245.5416 91.100098 + 90 1289.6772 -2328.7151 0 -2243.6961 8180.7427 + 100 976.36211 -2305.9371 0 -2241.5727 3614.0511 + 110 810.81711 -2293.4705 0 -2240.0193 1359.3687 + 120 1165.707 -2314.9026 0 -2238.056 7336.4505 + 130 929.81248 -2297.139 0 -2235.8432 2793.8463 + 140 804.47872 -2287.2074 0 -2234.174 704.92524 + 150 1182.414 -2310.0266 0 -2232.0787 7822.2339 + 160 979.92395 -2294.2969 0 -2229.6977 3206.7474 + 170 830.14746 -2282.6079 0 -2227.8824 -296.87288 + 180 1271.1133 -2309.4274 0 -2225.6322 7199.614 + 190 1209.6006 -2302.6407 0 -2222.9006 5528.3799 + 200 954.67692 -2283.6621 0 -2220.7272 47.02925 + 210 1260.814 -2301.5582 0 -2218.442 4829.7879 + 220 1274.9954 -2299.7285 0 -2215.6774 5518.0611 + 230 1048.0074 -2282.398 0 -2213.3106 1754.4157 + 240 1261.7071 -2294.1107 0 -2210.9356 5233.2714 + 250 1272.6179 -2292.0793 0 -2208.1849 4795.934 + 260 989.14207 -2271.0278 0 -2205.8209 -820.18098 + 270 1212.0444 -2283.4212 0 -2203.52 3395.8631 + 280 1391.9572 -2292.3809 0 -2200.6194 6666.2464 + 290 1093.1205 -2270.0421 0 -2197.9807 206.94752 + 300 1159.483 -2272.102 0 -2195.6657 778.53823 + 310 1407.3528 -2285.6227 0 -2192.8463 5223.0487 + 320 1236.7164 -2271.5389 0 -2190.0112 1865.3963 + 330 1258.8275 -2270.4611 0 -2187.4758 2333.321 + 340 1507.9519 -2283.9906 0 -2184.5824 6775.546 + 350 1366.5116 -2271.7287 0 -2181.6446 3432.1175 + 360 1305.2828 -2265.1091 0 -2179.0614 1498.4079 + 370 1581.4334 -2280.4645 0 -2176.2122 6518.5598 + 380 1589.5319 -2277.9428 0 -2173.1566 6334.6527 + 390 1402.6782 -2262.9323 0 -2170.464 3278.3048 + 400 1374.9587 -2258.5717 0 -2167.9307 3608.7293 + 410 1295.7416 -2250.7752 0 -2165.3565 1877.5245 + 420 1278.6727 -2247.1099 0 -2162.8164 1599.4189 + 430 1508.1328 -2259.4245 0 -2160.0044 4300.2235 + 440 1624.2957 -2263.9806 0 -2156.9026 4432.6267 + 450 1597.3356 -2259.263 0 -2153.9623 3370.3829 + 460 1772.0921 -2267.9105 0 -2151.0895 5788.3219 + 470 1806.4047 -2267.304 0 -2148.221 5950.1188 + 480 1593.0406 -2250.7469 0 -2145.7294 2518.0601 + 490 1660.9766 -2252.894 0 -2143.398 4282.1654 + 500 1714.2831 -2253.9295 0 -2140.9194 5740.0268 +Loop time of 0.109584 on 4 procs for 500 steps with 511 atoms + +Performance: 394.220 ns/day, 0.061 hours/ns, 4562.726 timesteps/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.074678 | 0.077817 | 0.084705 | 1.4 | 71.01 +Neigh | 0.0036662 | 0.0037943 | 0.0039661 | 0.2 | 3.46 +Comm | 0.013665 | 0.020312 | 0.023178 | 2.7 | 18.54 +Output | 0.0010247 | 0.0010931 | 0.0012922 | 0.3 | 1.00 +Modify | 0.0043213 | 0.0047521 | 0.0051889 | 0.6 | 4.34 +Other | | 0.001814 | | | 1.66 + +Nlocal: 127.75 ave 131 max 124 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Nghost: 433.75 ave 441 max 426 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 1979.5 ave 2040 max 1895 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 7918 +Ave neighs/atom = 15.4951 +Neighbor list builds = 19 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.1 b/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.1 new file mode 100644 index 0000000000..125106c504 --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.1 @@ -0,0 +1,92 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# Test of MEAM potential for SiC system + +units metal +boundary p p p + +atom_style atomic + +read_data data.SiC + orthogonal box = (-6 -6 -6) to (5.97232 5.97232 5.97232) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 128 atoms + +pair_style edip/multi +pair_coeff * * SiC.edip Si C +Reading potential file SiC.edip with DATE: 2017-05-16 + +mass 1 28.085 +mass 2 12.001 + +neighbor 1.0 bin +neigh_modify delay 1 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all atom 50 dump.meam + +#dump 2 all image 10 image.*.jpg element element # axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 element Si C + +#dump 3 all movie 10 movie.mpg element element # axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 element Si C + +run 100 +Neighbor list info ... + update every 1 steps, delay 1 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.94159 + ghost atom cutoff = 3.94159 + binsize = 1.97079, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip/multi, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.692 | 2.692 | 2.692 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -563.61621 0 -563.61621 -726147.34 + 10 4224.3601 -633.24829 0 -563.90103 -312355.55 + 20 4528.5661 -638.15183 0 -563.81071 -20091.291 + 30 4817.3654 -642.92111 0 -563.83905 106625.5 + 40 4619.4324 -639.6884 0 -563.85562 107180.42 + 50 4783.0025 -642.26961 0 -563.75166 75134.335 + 60 4525.145 -638.06177 0 -563.77681 71591.713 + 70 4685.2578 -640.72377 0 -563.8104 63956.042 + 80 4621.8393 -639.75912 0 -563.88682 18177.383 + 90 4834.7702 -643.34582 0 -563.97805 15282.823 + 100 4424.0589 -636.60208 0 -563.97656 47963.501 +Loop time of 0.0552888 on 1 procs for 100 steps with 128 atoms + +Performance: 156.270 ns/day, 0.154 hours/ns, 1808.685 timesteps/s +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.051872 | 0.051872 | 0.051872 | 0.0 | 93.82 +Neigh | 0.0023525 | 0.0023525 | 0.0023525 | 0.0 | 4.25 +Comm | 0.0004518 | 0.0004518 | 0.0004518 | 0.0 | 0.82 +Output | 0.00014806 | 0.00014806 | 0.00014806 | 0.0 | 0.27 +Modify | 0.00024796 | 0.00024796 | 0.00024796 | 0.0 | 0.45 +Other | | 0.0002165 | | | 0.39 + +Nlocal: 128 ave 128 max 128 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 473 ave 473 max 473 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 2376 ave 2376 max 2376 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2376 +Ave neighs/atom = 18.5625 +Neighbor list builds = 11 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.4 b/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.4 new file mode 100644 index 0000000000..eb6955703e --- /dev/null +++ b/examples/USER/misc/edip/log.4May2017.g++.edip-SiC.4 @@ -0,0 +1,92 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# Test of MEAM potential for SiC system + +units metal +boundary p p p + +atom_style atomic + +read_data data.SiC + orthogonal box = (-6 -6 -6) to (5.97232 5.97232 5.97232) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 128 atoms + +pair_style edip/multi +pair_coeff * * SiC.edip Si C +Reading potential file SiC.edip with DATE: 2017-05-16 + +mass 1 28.085 +mass 2 12.001 + +neighbor 1.0 bin +neigh_modify delay 1 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all atom 50 dump.meam + +#dump 2 all image 10 image.*.jpg element element # axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 element Si C + +#dump 3 all movie 10 movie.mpg element element # axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 element Si C + +run 100 +Neighbor list info ... + update every 1 steps, delay 1 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.94159 + ghost atom cutoff = 3.94159 + binsize = 1.97079, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair edip/multi, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.686 | 2.686 | 2.686 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -563.61621 0 -563.61621 -726147.34 + 10 4224.3601 -633.24829 0 -563.90103 -312355.55 + 20 4528.5661 -638.15183 0 -563.81071 -20091.291 + 30 4817.3654 -642.92111 0 -563.83905 106625.5 + 40 4619.4324 -639.6884 0 -563.85562 107180.42 + 50 4783.0025 -642.26961 0 -563.75166 75134.335 + 60 4525.145 -638.06177 0 -563.77681 71591.713 + 70 4685.2578 -640.72377 0 -563.8104 63956.042 + 80 4621.8393 -639.75912 0 -563.88682 18177.383 + 90 4834.7702 -643.34582 0 -563.97805 15282.823 + 100 4424.0589 -636.60208 0 -563.97656 47963.501 +Loop time of 0.020755 on 4 procs for 100 steps with 128 atoms + +Performance: 416.285 ns/day, 0.058 hours/ns, 4818.118 timesteps/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.011816 | 0.013825 | 0.016871 | 1.6 | 66.61 +Neigh | 0.00061321 | 0.00066817 | 0.00074816 | 0.0 | 3.22 +Comm | 0.0023363 | 0.0054012 | 0.0075014 | 2.7 | 26.02 +Output | 0.00020909 | 0.00022268 | 0.00025558 | 0.0 | 1.07 +Modify | 8.3208e-05 | 9.346e-05 | 0.00010395 | 0.0 | 0.45 +Other | | 0.0005446 | | | 2.62 + +Nlocal: 32 ave 36 max 25 min +Histogram: 1 0 0 0 0 0 0 1 1 1 +Nghost: 262.75 ave 273 max 255 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 594 ave 687 max 453 min +Histogram: 1 0 0 0 0 0 1 1 0 1 + +Total # of neighbors = 2376 +Ave neighs/atom = 18.5625 +Neighbor list builds = 11 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/misc/filter-corotate/data.bpti b/examples/USER/misc/filter_corotate/data.bpti similarity index 100% rename from examples/USER/misc/filter-corotate/data.bpti rename to examples/USER/misc/filter_corotate/data.bpti diff --git a/examples/USER/misc/filter-corotate/data.peptide b/examples/USER/misc/filter_corotate/data.peptide similarity index 100% rename from examples/USER/misc/filter-corotate/data.peptide rename to examples/USER/misc/filter_corotate/data.peptide diff --git a/examples/USER/misc/filter-corotate/in.bpti b/examples/USER/misc/filter_corotate/in.bpti similarity index 100% rename from examples/USER/misc/filter-corotate/in.bpti rename to examples/USER/misc/filter_corotate/in.bpti diff --git a/examples/USER/misc/filter-corotate/in.peptide b/examples/USER/misc/filter_corotate/in.peptide similarity index 100% rename from examples/USER/misc/filter-corotate/in.peptide rename to examples/USER/misc/filter_corotate/in.peptide diff --git a/examples/USER/misc/filter-corotate/log.10Mar2017.bpti.g++.1 b/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.1 similarity index 100% rename from examples/USER/misc/filter-corotate/log.10Mar2017.bpti.g++.1 rename to examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.1 diff --git a/examples/USER/misc/filter-corotate/log.10Mar2017.bpti.g++.4 b/examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.4 similarity index 100% rename from examples/USER/misc/filter-corotate/log.10Mar2017.bpti.g++.4 rename to examples/USER/misc/filter_corotate/log.10Mar2017.bpti.g++.4 diff --git a/examples/USER/misc/filter-corotate/log.10Mar2017.peptide.g++.1 b/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.1 similarity index 100% rename from examples/USER/misc/filter-corotate/log.10Mar2017.peptide.g++.1 rename to examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.1 diff --git a/examples/USER/misc/filter-corotate/log.10Mar2017.peptide.g++.4 b/examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.4 similarity index 100% rename from examples/USER/misc/filter-corotate/log.10Mar2017.peptide.g++.4 rename to examples/USER/misc/filter_corotate/log.10Mar2017.peptide.g++.4 diff --git a/examples/USER/flow_gauss/README b/examples/USER/misc/flow_gauss/README similarity index 100% rename from examples/USER/flow_gauss/README rename to examples/USER/misc/flow_gauss/README diff --git a/examples/USER/flow_gauss/in.GD b/examples/USER/misc/flow_gauss/in.GD similarity index 100% rename from examples/USER/flow_gauss/in.GD rename to examples/USER/misc/flow_gauss/in.GD diff --git a/examples/USER/misc/gauss-diel/data.gauss-diel b/examples/USER/misc/gauss_diel/data.gauss-diel similarity index 100% rename from examples/USER/misc/gauss-diel/data.gauss-diel rename to examples/USER/misc/gauss_diel/data.gauss-diel diff --git a/examples/USER/misc/gauss-diel/in.gauss-diel b/examples/USER/misc/gauss_diel/in.gauss-diel similarity index 100% rename from examples/USER/misc/gauss-diel/in.gauss-diel rename to examples/USER/misc/gauss_diel/in.gauss-diel diff --git a/examples/USER/misc/gauss-diel/in.gauss-diel-cg b/examples/USER/misc/gauss_diel/in.gauss-diel-cg similarity index 100% rename from examples/USER/misc/gauss-diel/in.gauss-diel-cg rename to examples/USER/misc/gauss_diel/in.gauss-diel-cg diff --git a/examples/USER/misc/gauss-diel/in.gauss-diel-split b/examples/USER/misc/gauss_diel/in.gauss-diel-split similarity index 100% rename from examples/USER/misc/gauss-diel/in.gauss-diel-split rename to examples/USER/misc/gauss_diel/in.gauss-diel-split diff --git a/examples/USER/misc/gauss-diel/log.gauss-diel b/examples/USER/misc/gauss_diel/log.gauss-diel similarity index 100% rename from examples/USER/misc/gauss-diel/log.gauss-diel rename to examples/USER/misc/gauss_diel/log.gauss-diel diff --git a/examples/USER/misc/gauss-diel/log.gauss-diel-cg b/examples/USER/misc/gauss_diel/log.gauss-diel-cg similarity index 100% rename from examples/USER/misc/gauss-diel/log.gauss-diel-cg rename to examples/USER/misc/gauss_diel/log.gauss-diel-cg diff --git a/examples/USER/misc/gauss-diel/log.gauss-diel-split b/examples/USER/misc/gauss_diel/log.gauss-diel-split similarity index 100% rename from examples/USER/misc/gauss-diel/log.gauss-diel-split rename to examples/USER/misc/gauss_diel/log.gauss-diel-split diff --git a/examples/USER/misc/i-pi/C.opt.tersoff b/examples/USER/misc/ipi/C.opt.tersoff similarity index 100% rename from examples/USER/misc/i-pi/C.opt.tersoff rename to examples/USER/misc/ipi/C.opt.tersoff diff --git a/examples/USER/misc/i-pi/README b/examples/USER/misc/ipi/README similarity index 100% rename from examples/USER/misc/i-pi/README rename to examples/USER/misc/ipi/README diff --git a/examples/USER/misc/i-pi/data.graphene b/examples/USER/misc/ipi/data.graphene similarity index 100% rename from examples/USER/misc/i-pi/data.graphene rename to examples/USER/misc/ipi/data.graphene diff --git a/examples/USER/misc/i-pi/i-pi_input.xml b/examples/USER/misc/ipi/i-pi_input.xml similarity index 100% rename from examples/USER/misc/i-pi/i-pi_input.xml rename to examples/USER/misc/ipi/i-pi_input.xml diff --git a/examples/USER/misc/i-pi/i-pi_positions.xyz b/examples/USER/misc/ipi/i-pi_positions.xyz similarity index 100% rename from examples/USER/misc/i-pi/i-pi_positions.xyz rename to examples/USER/misc/ipi/i-pi_positions.xyz diff --git a/examples/USER/misc/i-pi/in.graphene b/examples/USER/misc/ipi/in.graphene similarity index 100% rename from examples/USER/misc/i-pi/in.graphene rename to examples/USER/misc/ipi/in.graphene diff --git a/examples/USER/misc/kolmogorov-crespi/data.bilayer-graphene b/examples/USER/misc/kolmogorov_crespi/data.bilayer-graphene similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/data.bilayer-graphene rename to examples/USER/misc/kolmogorov_crespi/data.bilayer-graphene diff --git a/examples/USER/misc/kolmogorov-crespi/data.graphene-adsorbant b/examples/USER/misc/kolmogorov_crespi/data.graphene-adsorbant similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/data.graphene-adsorbant rename to examples/USER/misc/kolmogorov_crespi/data.graphene-adsorbant diff --git a/examples/USER/misc/kolmogorov-crespi/in.atom-diffusion b/examples/USER/misc/kolmogorov_crespi/in.atom-diffusion similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/in.atom-diffusion rename to examples/USER/misc/kolmogorov_crespi/in.atom-diffusion diff --git a/examples/USER/misc/kolmogorov-crespi/in.bilayer-graphene b/examples/USER/misc/kolmogorov_crespi/in.bilayer-graphene similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/in.bilayer-graphene rename to examples/USER/misc/kolmogorov_crespi/in.bilayer-graphene diff --git a/examples/USER/misc/kolmogorov-crespi/log.atom-diffusion b/examples/USER/misc/kolmogorov_crespi/log.atom-diffusion similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/log.atom-diffusion rename to examples/USER/misc/kolmogorov_crespi/log.atom-diffusion diff --git a/examples/USER/misc/kolmogorov-crespi/log.bilayer-graphene b/examples/USER/misc/kolmogorov_crespi/log.bilayer-graphene similarity index 100% rename from examples/USER/misc/kolmogorov-crespi/log.bilayer-graphene rename to examples/USER/misc/kolmogorov_crespi/log.bilayer-graphene diff --git a/examples/USER/misc/meam_spline/Si_1.meam.spline b/examples/USER/misc/meam_spline/Si_1.meam.spline new file mode 100644 index 0000000000..1ebd09cf84 --- /dev/null +++ b/examples/USER/misc/meam_spline/Si_1.meam.spline @@ -0,0 +1,63 @@ +DATE: 2012-02-01 CONTRIBUTOR: Alexander Stukowski, stukowski@mm.tu-darmstadt.de CITATION: Lenosky, Sadigh, Alonso, Bulatov, de la Rubia, Kim, Voter and Kress, Modell Simul Mater Sci Eng, 8, 825 (2000) COMMENT: Spline-based MEAM potential for Si. Reference: T. J. Lenosky, B. Sadigh, E. Alonso, V. V. Bulatov, T. D. de la Rubia, J. Kim, A. F. Voter, and J. D. Kress, Modell. Simul. Mater. Sci. Eng. 8, 825 (2000) +10 +-4.266966781858503300e+01 0.000000000000000000e+00 +1 0 1 0 +1.500000000000000000e+00 6.929943430771341000e+00 1.653321602557917600e+02 +1.833333333333333300e+00 -4.399503747408950400e-01 3.941543472528634600e+01 +2.166666666666666500e+00 -1.701233725061446700e+00 6.871065423413908100e+00 +2.500000000000000000e+00 -1.624732919215791800e+00 5.340648014033163800e+00 +2.833333333333333000e+00 -9.969641728342462100e-01 1.534811309391571000e+00 +3.166666666666667000e+00 -2.739141845072665100e-01 -6.334706186546093900e+00 +3.500000000000000000e+00 -2.499156963774082700e-02 -1.798864729909626500e+00 +3.833333333333333500e+00 -1.784331481529976400e-02 4.743496636420091500e-01 +4.166666666666666100e+00 -9.612303290166881000e-03 -4.006506271304824400e-02 +4.500000000000000000e+00 0.000000000000000000e+00 -2.394996574779807200e-01 +11 +-1.000000000000000000e+00 0.000000000000000000e+00 +1 0 0 0 +1.500000000000000000e+00 1.374674212682983900e-01 -3.227795813279568500e+00 +1.700000000000000000e+00 -1.483141815327918000e-01 -6.411648793604404900e+00 +1.899999999999999900e+00 -5.597204896096039700e-01 1.003068519633888300e+01 +2.100000000000000100e+00 -7.310964379372824100e-01 2.293461970618954700e+00 +2.299999999999999800e+00 -7.628287071954063000e-01 1.742018781618444500e+00 +2.500000000000000000e+00 -7.291769685066557000e-01 5.460640949384478700e-01 +2.700000000000000200e+00 -6.662022220044453400e-01 4.721760106467195500e-01 +2.899999999999999900e+00 -5.732830582550895200e-01 2.056894449546524200e+00 +3.100000000000000100e+00 -4.069014309729406300e-01 2.319615721086100800e+00 +3.299999999999999800e+00 -1.666155295956388300e-01 -2.497162196179187900e-01 +3.500000000000000000e+00 0.000000000000000000e+00 -1.237130660986393100e+01 +8 +7.351364478015182100e-01 6.165217237728655200e-01 +1 1 1 1 +-1.770934559908718700e+00 -1.074925682941420000e+00 -1.482768170233858500e-01 +-3.881557649503457600e-01 -2.004503493658201000e-01 -1.492100354067345500e-01 +9.946230300080272100e-01 4.142241371345077300e-01 -7.012475119623896900e-02 +2.377401824966400000e+00 8.793892953828742500e-01 -3.944355024164965900e-02 +3.760180619924772900e+00 1.266888024536562100e+00 -1.581431192239436000e-02 +5.142959414883146800e+00 1.629979548834614900e+00 2.611224310900800400e-02 +6.525738209841518900e+00 1.977379549636293600e+00 -1.378738550324104500e-01 +7.908517004799891800e+00 2.396177220616657200e+00 7.494253977092666400e-01 +10 +-3.618936018538757300e+00 0.000000000000000000e+00 +1 0 1 0 +1.500000000000000000e+00 1.250311510312851300e+00 2.790400588857243500e+01 +1.722222222222222300e+00 8.682060369372680600e-01 -4.522554291731776900e+00 +1.944444444444444400e+00 6.084604017544847900e-01 5.052931618779816800e+00 +2.166666666666666500e+00 4.875624808097850400e-01 1.180825096539679600e+00 +2.388888888888888800e+00 4.416345603457190700e-01 -6.673769465415171400e-01 +2.611111111111111200e+00 3.760976313325982700e-01 -8.938118490837722000e-01 +2.833333333333333000e+00 2.714524157414608400e-01 -5.090324763524399800e-01 +3.055555555555555400e+00 1.481440300150710900e-01 6.623665830603995300e-01 +3.277777777777777700e+00 4.854596610856590900e-02 7.403702452268122700e-01 +3.500000000000000000e+00 0.000000000000000000e+00 2.578982318481970500e+00 +8 +-1.395041572145673000e+01 1.134616739799360700e+00 +1 1 1 1 +-1.000000000000000900e+00 5.254163992149617700e+00 1.582685381253900500e+01 +-7.428367052748285900e-01 2.359149452448745100e+00 3.117611233789983400e+01 +-4.856734105496561800e-01 1.195946960915646100e+00 1.658962813584905800e+01 +-2.285101158244838800e-01 1.229952028074150000e+00 1.108360928564026400e+01 +2.865317890068852500e-02 2.035650777568434500e+00 9.088861456447702400e+00 +2.858164736258610400e-01 3.424741418405580000e+00 5.489943377538379500e+00 +5.429797683510331200e-01 4.948585892304984100e+00 -1.882291580187675700e+01 +8.001430630762056400e-01 5.617988713941801200e+00 -7.718625571850646200e+00 diff --git a/examples/USER/misc/meam_spline/TiO.meam.spline b/examples/USER/misc/meam_spline/TiO.meam.spline new file mode 100644 index 0000000000..ed2a67a962 --- /dev/null +++ b/examples/USER/misc/meam_spline/TiO.meam.spline @@ -0,0 +1,130 @@ +# Ti-O cubic spline potential where O is in the dilute limit. DATE: 2016-06-05 CONTRIBUTOR: Pinchao Zhang, Dallas R. Trinkle +meam/spline 2 Ti O +spline3eq +13 +-20 0 +1.742692837 3.744277175966 99.4865081627958 +2.05580176725 0.910839730906 10.8702523265355 +2.3689106975 0.388045896634 -1.55322418749562 +2.68201962775 -0.018840906533 2.43630041329215 +2.995128558 -0.248098929639 2.67912713976835 +3.30823748825 -0.264489550297 -0.125056384603077 +3.6213464185 -0.227196189283 1.10662555360438 +3.93445534875 -0.129293090176 -0.592053676745914 +4.247564279 -0.059685366933 -0.470123414607672 +4.56067320925 -0.031100025561 -0.0380739973059663 +4.8737821395 -0.013847363202 -0.0711547960695406 +5.18689106975 -0.003203412728 -0.081768292420175 +5.5 0 -0.0571422964883619 +spline3eq +5 +0.155001355787331 0 +1.9 0.533321679606674 0 +2.8 0.456402081843862 -1.60311717015859 +3.7 -0.324281383502201 1.19940299483249 +4.6 -0.474029826906675 1.47909794595154 +5.5 0 -2.49521499855605 +spline3eq +13 +0 0 +1.742692837 0 0 +2.05580176725 0 0 +2.3689106975 0 0 +2.68201962775 0 0 +2.995128558 0 0 +3.30823748825 0 0 +3.6213464185 0 0 +3.93445534875 0 0 +4.247564279 0 0 +4.56067320925 0 0 +4.8737821395 0 0 +5.18689106975 0 0 +5.5 0 0 +spline3eq +11 +-1 0 +2.055801767 1.7475279661 -525.869786904802 +2.2912215903 -5.8677963945 252.796316927755 +2.5266414136 -8.3376288737 71.7318388721015 +2.7620612369 -5.8398712842 -1.93587742753693 +2.9974810602 -3.1140648231 -39.2999192667503 +3.2329008835 -1.7257245065 14.3424136002004 +3.4683207068 -0.4428977017 -29.4925534559498 +3.7037405301 -0.1466643003 -3.18010534572236 +3.9391603534 -0.2095507945 3.33490838803603 +4.1745801767 -0.1442384563 3.71918691359508 +4.41 0 -9.66717019857564 +spline3eq +5 +-61.9827585211652 0 +1.9 11.2293641315584 0 +2.8 -27.9976343076148 122.648031332411 +3.7 -8.32979773113248 -54.3340881766381 +4.6 -1.00863195297399 3.23150064581724 +5.5 0 -5.3514242228123 +spline3eq +4 +0.00776934946045395 0.105197706160344 +-55.14233165 -0.29745568008 0.00152870603877451 +-44.7409899033333 -0.15449458722 0.00038933722543571 +-34.3396481566667 0.05098657168 0.00038124926922248 +-23.93830641 0.57342694704 0.0156639264890892 +spline3eq +5 +-0.00676745157022662 -0.0159520381982146 +-23.9928 0.297607384684645 0 +-15.9241175 0.216691597077105 -0.0024248755353942 +-7.855435 0.0637598673719069 0.00306245895013358 +0.213247499999998 -0.00183450621970427 -0.00177588407633909 +8.28193 -0.111277018874367 0 +spline3eq +10 +2.77327511656661 0 +2.055801767 -0.1485215264 72.2010867146919 +2.31737934844444 1.6845304918 -47.2744689053404 +2.57895692988889 2.0113365977 -15.1859578405326 +2.84053451133333 1.1444092747 3.33978204841873 +3.10211209277778 0.2861606803 2.587867603808 +3.36368967422222 -0.3459281126 6.14070694084556 +3.62526725566667 -0.6257480601 3.7397696717154 +3.88684483711111 -0.6119510826 4.64749084871402 +4.14842241855556 -0.3112059651 2.83275746415936 +4.41 0 -15.0612086827734 +spline3eq +5 +12.3315547862781 0 +1.9 2.62105440156724 0 +2.8 10.2850803058354 -25.439802988016 +3.7 3.23933763743897 -7.20203673434025 +4.6 -5.79049355858613 39.5509978688682 +5.5 0 -41.221771373642 +spline3eq +8 +8.33642274810572 -60.4024574736564 +-1 0.07651409193 -110.652321293778 +-0.724509054371429 0.14155824541 44.8853405500508 +-0.449018108742857 0.75788697341 -25.3065115342002 +-0.173527163114286 0.63011570378 -2.48510144915082 +0.101963782514286 0.09049597305 2.68769386908235 +0.377454728142857 -0.35741586657 -1.01558570129633 +0.652945673771428 -0.65293217647 13.4224786001212 +0.9284366194 -6.00912190653 -452.752542694929 +spline3eq +5 +0.137191606537625 -1.55094230968985 +-1 0.0513843442016519 0 +-0.5 0.0179024412245673 -2.44986494990154 +0 -0.260650876879273 3.91774583656401 +0.5 -0.190163791764901 -4.84414871911743 +1 -0.763795416646599 0 +spline3eq +8 +0 0 +-1 0 0 +-0.724509054371429 0 0 +-0.449018108742857 0 0 +-0.173527163114286 0 0 +0.101963782514286 0 0 +0.377454728142857 0 0 +0.652945673771428 0 0 +0.9284366194 0 0 diff --git a/examples/USER/misc/meam_spline/in.meam-spline.Si b/examples/USER/misc/meam_spline/in.meam-spline.Si new file mode 100644 index 0000000000..7f270ccecd --- /dev/null +++ b/examples/USER/misc/meam_spline/in.meam-spline.Si @@ -0,0 +1,22 @@ +# Si fcc phase + +units metal +boundary p p p + +atom_style atomic +lattice fcc 3.98 +region box block 0 5 0 5 0 5 +create_box 1 box +create_atoms 1 box + +pair_style meam/spline +pair_coeff * * Si_1.meam.spline Si +mass * 28.085 + +velocity all create 500.0 44226611 + +fix 1 all nvt temp 500.0 500.0 1.0 + +thermo 50 +run 500 + diff --git a/examples/USER/misc/meam_spline/in.meam-spline.TiO2 b/examples/USER/misc/meam_spline/in.meam-spline.TiO2 new file mode 100644 index 0000000000..06c8b7f8cd --- /dev/null +++ b/examples/USER/misc/meam_spline/in.meam-spline.TiO2 @@ -0,0 +1,92 @@ +# + +variable T_depart equal 300 + +variable dt equal 0.0002 + +variable a equal 4.5937 +variable c equal 2.9587 +variable ca equal ${c}/${a} + +variable nx equal 6 +variable ny equal 6 +variable nz equal 11 + +variable bx equal ${a}*${nx} +variable by equal ${a}*${ny} +variable bz equal ${c}*${nz} +# ======================================================================= + +units metal +atom_style atomic +dimension 3 +boundary p p p + + +lattice sc 1.0 +region box_vide prism 0 ${bx} 0 ${by} 0 ${bz} 0.0 0.0 0.0 +create_box 2 box_vide + +#lattice sc 1.0 +#region box_TiO2 block 0 ${bx} 0 ${by} 0 ${bz} + +# titanium atoms +lattice custom ${a} origin 0.0 0.0 0.0 & + orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 & + a1 1.0 0.0 0.0 & + a2 0.0 1.0 0.0 & + a3 0.0 0.0 ${ca} & + basis 0.0 0.0 0.0 & + basis 0.5 0.5 0.5 + +create_atoms 2 region box_vide + +# Oxygen atoms +lattice custom ${a} origin 0.0 0.0 0.0 & + orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 & + a1 1.0 0.0 0.0 & + a2 0.0 1.0 0.0 & + a3 0.0 0.0 ${ca} & + basis 0.30478 0.30478 0.0 & + basis 0.69522 0.69522 0.0 & + basis 0.19522 0.80478 0.5 & + basis 0.80478 0.19522 0.5 + +create_atoms 1 region box_vide + + +mass 1 16.00 +group Oxy type 1 + +mass 2 47.867 +group Ti type 2 + +velocity all create ${T_depart} 277387 + +pair_style meam/spline +pair_coeff * * TiO.meam.spline O Ti + +neighbor 0.5 bin +neigh_modify every 2 delay 0 check yes + +timestep ${dt} + +thermo_style custom step temp press pe ke etotal lx ly lz vol +thermo 10 + + +#dump 5 all custom 500 boxAlpha_alumina.lammpstrj id type q x y z + +fix 3 all nve +run 100 + +unfix 3 +fix 1 all box/relax tri 0.0 vmax 0.001 +minimize 1.0e-3 1.0e-5 1000 10000 + +unfix 1 +reset_timestep 0 +thermo 50 +fix 3 all npt temp 300 300 0.1 aniso 1.0 1.0 1.0 +run 500 + diff --git a/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.1 b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.1 new file mode 100644 index 0000000000..ebf0855029 --- /dev/null +++ b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.1 @@ -0,0 +1,88 @@ +LAMMPS (13 Apr 2017) + using 1 OpenMP thread(s) per MPI task +# Si fcc phase + +units metal +boundary p p p + +atom_style atomic +lattice fcc 3.98 +Lattice spacing in x,y,z = 3.98 3.98 3.98 +region box block 0 5 0 5 0 5 +create_box 1 box +Created orthogonal box = (0 0 0) to (19.9 19.9 19.9) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 500 atoms + +pair_style meam/spline +pair_coeff * * Si_1.meam.spline Si +Reading potential file Si_1.meam.spline with DATE: 2012-02-01 +mass * 28.085 + +velocity all create 500.0 44226611 + +fix 1 all nvt temp 500.0 500.0 1.0 + +thermo 50 +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 6.5 + ghost atom cutoff = 6.5 + binsize = 3.25, bins = 7 7 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair meam/spline, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (2) pair meam/spline, perpetual, half/full from (1) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 3.892 | 3.892 | 3.892 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 500 -1847.729 0 -1815.4786 1813162.7 + 50 1934.0932 -1940.8016 0 -1816.051 -48657.676 + 100 2570.1286 -1984.8725 0 -1819.0971 8002.4248 + 150 2566.7917 -1990.2724 0 -1824.7123 16819.447 + 200 2555.1319 -1995.2233 0 -1830.4152 5891.5313 + 250 2487.2881 -1995.8302 0 -1835.3981 -4339.7172 + 300 2381.4836 -1994.2492 0 -1840.6415 16508.04 + 350 2330.8663 -1996.6588 0 -1846.3161 24194.447 + 400 2212.6035 -1994.9278 0 -1852.2131 -9856.3709 + 450 2257.7531 -2003.8187 0 -1858.1918 -8029.6019 + 500 2211.4385 -2006.9846 0 -1864.345 4152.4867 +Loop time of 5.13837 on 1 procs for 500 steps with 500 atoms + +Performance: 8.407 ns/day, 2.855 hours/ns, 97.307 timesteps/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.0952 | 5.0952 | 5.0952 | 0.0 | 99.16 +Neigh | 0.026447 | 0.026447 | 0.026447 | 0.0 | 0.51 +Comm | 0.0063307 | 0.0063307 | 0.0063307 | 0.0 | 0.12 +Output | 0.0001905 | 0.0001905 | 0.0001905 | 0.0 | 0.00 +Modify | 0.0082877 | 0.0082877 | 0.0082877 | 0.0 | 0.16 +Other | | 0.00187 | | | 0.04 + +Nlocal: 500 ave 500 max 500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1767 ave 1767 max 1767 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 18059 ave 18059 max 18059 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 36118 ave 36118 max 36118 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 36118 +Ave neighs/atom = 72.236 +Neighbor list builds = 14 +Dangerous builds = 0 + +Total wall time: 0:00:05 diff --git a/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.4 b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.4 new file mode 100644 index 0000000000..3f059d7cee --- /dev/null +++ b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.Si.g++.4 @@ -0,0 +1,88 @@ +LAMMPS (13 Apr 2017) + using 1 OpenMP thread(s) per MPI task +# Si fcc phase + +units metal +boundary p p p + +atom_style atomic +lattice fcc 3.98 +Lattice spacing in x,y,z = 3.98 3.98 3.98 +region box block 0 5 0 5 0 5 +create_box 1 box +Created orthogonal box = (0 0 0) to (19.9 19.9 19.9) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box +Created 500 atoms + +pair_style meam/spline +pair_coeff * * Si_1.meam.spline Si +Reading potential file Si_1.meam.spline with DATE: 2012-02-01 +mass * 28.085 + +velocity all create 500.0 44226611 + +fix 1 all nvt temp 500.0 500.0 1.0 + +thermo 50 +run 500 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 6.5 + ghost atom cutoff = 6.5 + binsize = 3.25, bins = 7 7 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair meam/spline, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (2) pair meam/spline, perpetual, half/full from (1) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 3.861 | 3.861 | 3.861 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 500 -1847.729 0 -1815.4786 1813162.7 + 50 1923.4262 -1940.0936 0 -1816.0311 -38700.835 + 100 2535.2542 -1982.6249 0 -1819.0989 10216.821 + 150 2592.8247 -1992.1569 0 -1824.9176 4839.3385 + 200 2484.7391 -1990.8452 0 -1830.5775 14040.141 + 250 2597.4401 -2003.1619 0 -1835.625 1261.5199 + 300 2513.0793 -2002.942 0 -1840.8463 6690.9815 + 350 2390.933 -2001.0761 0 -1846.859 -4880.1146 + 400 2269.0782 -1999.3441 0 -1852.9867 -4921.4391 + 450 2287.5096 -2006.8236 0 -1859.2774 -7313.6151 + 500 2303.0918 -2014.0693 0 -1865.518 -9995.1789 +Loop time of 1.46588 on 4 procs for 500 steps with 500 atoms + +Performance: 29.470 ns/day, 0.814 hours/ns, 341.093 timesteps/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.4273 | 1.4292 | 1.432 | 0.1 | 97.50 +Neigh | 0.0068567 | 0.0070301 | 0.0073655 | 0.2 | 0.48 +Comm | 0.019111 | 0.022127 | 0.024148 | 1.2 | 1.51 +Output | 0.00023174 | 0.00024784 | 0.00029206 | 0.0 | 0.02 +Modify | 0.005043 | 0.0052016 | 0.0054417 | 0.2 | 0.35 +Other | | 0.002066 | | | 0.14 + +Nlocal: 125 ave 131 max 118 min +Histogram: 1 0 0 1 0 0 0 0 1 1 +Nghost: 979.25 ave 986 max 975 min +Histogram: 1 1 0 1 0 0 0 0 0 1 +Neighs: 4541.75 ave 4712 max 4362 min +Histogram: 1 1 0 0 0 0 0 0 0 2 +FullNghs: 9083.5 ave 9485 max 8601 min +Histogram: 1 0 0 1 0 0 0 0 1 1 + +Total # of neighbors = 36334 +Ave neighs/atom = 72.668 +Neighbor list builds = 14 +Dangerous builds = 0 + +Total wall time: 0:00:01 diff --git a/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.1 b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.1 new file mode 100644 index 0000000000..aaeadec668 --- /dev/null +++ b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.1 @@ -0,0 +1,248 @@ +LAMMPS (13 Apr 2017) + using 1 OpenMP thread(s) per MPI task +# + +variable T_depart equal 300 + +variable dt equal 0.0002 + +variable a equal 4.5937 +variable c equal 2.9587 +variable ca equal ${c}/${a} +variable ca equal 2.9587/${a} +variable ca equal 2.9587/4.5937 + +variable nx equal 6 +variable ny equal 6 +variable nz equal 11 + +variable bx equal ${a}*${nx} +variable bx equal 4.5937*${nx} +variable bx equal 4.5937*6 +variable by equal ${a}*${ny} +variable by equal 4.5937*${ny} +variable by equal 4.5937*6 +variable bz equal ${c}*${nz} +variable bz equal 2.9587*${nz} +variable bz equal 2.9587*11 +# ======================================================================= + +units metal +atom_style atomic +dimension 3 +boundary p p p + + +lattice sc 1.0 +Lattice spacing in x,y,z = 1 1 1 +region box_vide prism 0 ${bx} 0 ${by} 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 ${by} 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 27.5622 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 27.5622 0 32.5457 0.0 0.0 0.0 +create_box 2 box_vide +Created triclinic box = (0 0 0) to (27.5622 27.5622 32.5457) with tilt (0 0 0) + 1 by 1 by 1 MPI processor grid + +#lattice sc 1.0 +#region box_TiO2 block 0 ${bx} 0 ${by} 0 ${bz} + +# titanium atoms +lattice custom ${a} origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 0.644077758669482 basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +Lattice spacing in x,y,z = 4.5937 4.5937 2.9587 + +create_atoms 2 region box_vide +Created 792 atoms + +# Oxygen atoms +lattice custom ${a} origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 0.644077758669482 basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +Lattice spacing in x,y,z = 4.5937 4.5937 2.9587 + +create_atoms 1 region box_vide +Created 1584 atoms + + +mass 1 16.00 +group Oxy type 1 +1584 atoms in group Oxy + +mass 2 47.867 +group Ti type 2 +792 atoms in group Ti + +velocity all create ${T_depart} 277387 +velocity all create 300 277387 + +pair_style meam/spline +pair_coeff * * TiO.meam.spline O Ti +Reading potential file TiO.meam.spline with DATE: 2016-06-05 + +neighbor 0.5 bin +neigh_modify every 2 delay 0 check yes + +timestep ${dt} +timestep 0.0002 + +thermo_style custom step temp press pe ke etotal lx ly lz vol +thermo 10 + + +#dump 5 all custom 500 boxAlpha_alumina.lammpstrj id type q x y z + +fix 3 all nve +run 100 +Neighbor list info ... + update every 2 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 6 + ghost atom cutoff = 6 + binsize = 3, bins = 10 10 11 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair meam/spline, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (2) pair meam/spline, perpetual, half/full from (1) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 5.146 | 5.146 | 5.146 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 0 300 22403.656 -14374.073 92.097853 -14281.975 27.5622 27.5622 32.5457 24724.15 + 10 301.41345 23612.297 -14374.507 92.531772 -14281.975 27.5622 27.5622 32.5457 24724.15 + 20 305.11674 25127.832 -14375.643 93.668657 -14281.974 27.5622 27.5622 32.5457 24724.15 + 30 313.28903 26655.89 -14378.151 96.17749 -14281.974 27.5622 27.5622 32.5457 24724.15 + 40 328.94567 26999.049 -14382.957 100.98397 -14281.974 27.5622 27.5622 32.5457 24724.15 + 50 354.05827 23023.294 -14390.667 108.69336 -14281.974 27.5622 27.5622 32.5457 24724.15 + 60 390.48404 13594.655 -14401.849 119.87581 -14281.973 27.5622 27.5622 32.5457 24724.15 + 70 442.69928 151.15709 -14417.877 135.90551 -14281.972 27.5622 27.5622 32.5457 24724.15 + 80 516.89551 -14984.124 -14440.654 158.68322 -14281.971 27.5622 27.5622 32.5457 24724.15 + 90 618.22135 -29948.066 -14471.76 189.78953 -14281.971 27.5622 27.5622 32.5457 24724.15 + 100 747.6193 -41964.291 -14511.487 229.51378 -14281.973 27.5622 27.5622 32.5457 24724.15 +Loop time of 38.7948 on 1 procs for 100 steps with 2376 atoms + +Performance: 0.045 ns/day, 538.817 hours/ns, 2.578 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 38.774 | 38.774 | 38.774 | 0.0 | 99.95 +Neigh | 0.010751 | 0.010751 | 0.010751 | 0.0 | 0.03 +Comm | 0.0039313 | 0.0039313 | 0.0039313 | 0.0 | 0.01 +Output | 0.00048804 | 0.00048804 | 0.00048804 | 0.0 | 0.00 +Modify | 0.0039241 | 0.0039241 | 0.0039241 | 0.0 | 0.01 +Other | | 0.001809 | | | 0.00 + +Nlocal: 2376 ave 2376 max 2376 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 4479 ave 4479 max 4479 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 106396 ave 106396 max 106396 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 212792 ave 212792 max 212792 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 212792 +Ave neighs/atom = 89.5589 +Neighbor list builds = 1 +Dangerous builds = 0 + +unfix 3 +fix 1 all box/relax tri 0.0 vmax 0.001 +minimize 1.0e-3 1.0e-5 1000 10000 +WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168) +Per MPI rank memory allocation (min/avg/max) = 6.271 | 6.271 | 6.271 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 100 747.6193 -41964.291 -14511.487 229.51378 -14281.973 27.5622 27.5622 32.5457 24724.15 + 101 747.6193 -39284.65 -14517.424 229.51378 -14287.91 27.569615 27.569695 32.513154 24712.789 +Loop time of 0.814693 on 1 procs for 1 steps with 2376 atoms + +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +Minimization stats: + Stopping criterion = energy tolerance + Energy initial, next-to-last, final = + -14511.4866189 -14511.4866189 -14517.4235162 + Force two-norm initial, final = 5602.25 5486.97 + Force max component initial, final = 5232.05 5109.43 + Final line search alpha, max atom move = 1.9113e-07 0.000976563 + Iterations, force evaluations = 1 1 + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.81429 | 0.81429 | 0.81429 | 0.0 | 99.95 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 6.485e-05 | 6.485e-05 | 6.485e-05 | 0.0 | 0.01 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.0003347 | | | 0.04 + +Nlocal: 2376 ave 2376 max 2376 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 4449 ave 4449 max 4449 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 105639 ave 105639 max 105639 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 211278 ave 211278 max 211278 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 211278 +Ave neighs/atom = 88.9217 +Neighbor list builds = 0 +Dangerous builds = 0 + +unfix 1 +reset_timestep 0 +thermo 50 +fix 3 all npt temp 300 300 0.1 aniso 1.0 1.0 1.0 +run 500 +Per MPI rank memory allocation (min/avg/max) = 5.162 | 5.162 | 5.162 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 0 747.6193 -39284.65 -14517.424 229.51378 -14287.91 27.569615 27.569695 32.513154 24712.789 + 50 1155.2849 30650.319 -14678.807 354.6642 -14324.143 27.608688 27.60914 32.375311 24678.15 + 100 790.03926 99869.991 -14678.858 242.5364 -14436.322 27.777994 27.77799 32.017001 24704.857 + 150 938.86463 -21488.442 -14803.782 288.22472 -14515.557 27.996584 27.995139 31.67008 24822.003 + 200 420.11331 -790.80799 -14671.687 128.97178 -14542.715 28.126911 28.125909 31.431033 24864.93 + 250 352.18149 -3244.2491 -14665.007 108.1172 -14556.889 28.222686 28.223673 31.238649 24883.078 + 300 622.91245 3657.7097 -14758.201 191.22967 -14566.972 28.301771 28.30503 31.07216 24891.363 + 350 888.25374 26274.358 -14852.568 272.68754 -14579.881 28.370312 28.375107 30.937051 24904.656 + 400 735.44163 63109.066 -14823.872 225.77532 -14598.097 28.446905 28.45227 30.838015 24959.642 + 450 804.81905 6221.0364 -14861.113 247.07369 -14614.039 28.543942 28.548719 30.775793 25078.977 + 500 628.19106 -33912.026 -14814.726 192.85016 -14621.876 28.611997 28.615169 30.74081 25168.642 +Loop time of 176.167 on 1 procs for 500 steps with 2376 atoms + +Performance: 0.049 ns/day, 489.353 hours/ns, 2.838 timesteps/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 175.9 | 175.9 | 175.9 | 0.0 | 99.85 +Neigh | 0.17043 | 0.17043 | 0.17043 | 0.0 | 0.10 +Comm | 0.018243 | 0.018243 | 0.018243 | 0.0 | 0.01 +Output | 0.00040984 | 0.00040984 | 0.00040984 | 0.0 | 0.00 +Modify | 0.067142 | 0.067142 | 0.067142 | 0.0 | 0.04 +Other | | 0.00828 | | | 0.00 + +Nlocal: 2376 ave 2376 max 2376 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 4358 ave 4358 max 4358 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 102634 ave 102634 max 102634 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 205268 ave 205268 max 205268 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 205268 +Ave neighs/atom = 86.3923 +Neighbor list builds = 16 +Dangerous builds = 0 + +Total wall time: 0:03:37 diff --git a/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.4 b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.4 new file mode 100644 index 0000000000..6c2c949acb --- /dev/null +++ b/examples/USER/misc/meam_spline/log.4May2017.meam-spline.TiO2.g++.4 @@ -0,0 +1,248 @@ +LAMMPS (13 Apr 2017) + using 1 OpenMP thread(s) per MPI task +# + +variable T_depart equal 300 + +variable dt equal 0.0002 + +variable a equal 4.5937 +variable c equal 2.9587 +variable ca equal ${c}/${a} +variable ca equal 2.9587/${a} +variable ca equal 2.9587/4.5937 + +variable nx equal 6 +variable ny equal 6 +variable nz equal 11 + +variable bx equal ${a}*${nx} +variable bx equal 4.5937*${nx} +variable bx equal 4.5937*6 +variable by equal ${a}*${ny} +variable by equal 4.5937*${ny} +variable by equal 4.5937*6 +variable bz equal ${c}*${nz} +variable bz equal 2.9587*${nz} +variable bz equal 2.9587*11 +# ======================================================================= + +units metal +atom_style atomic +dimension 3 +boundary p p p + + +lattice sc 1.0 +Lattice spacing in x,y,z = 1 1 1 +region box_vide prism 0 ${bx} 0 ${by} 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 ${by} 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 27.5622 0 ${bz} 0.0 0.0 0.0 +region box_vide prism 0 27.5622 0 27.5622 0 32.5457 0.0 0.0 0.0 +create_box 2 box_vide +Created triclinic box = (0 0 0) to (27.5622 27.5622 32.5457) with tilt (0 0 0) + 1 by 2 by 2 MPI processor grid + +#lattice sc 1.0 +#region box_TiO2 block 0 ${bx} 0 ${by} 0 ${bz} + +# titanium atoms +lattice custom ${a} origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 0.644077758669482 basis 0.0 0.0 0.0 basis 0.5 0.5 0.5 +Lattice spacing in x,y,z = 4.5937 4.5937 2.9587 + +create_atoms 2 region box_vide +Created 792 atoms + +# Oxygen atoms +lattice custom ${a} origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 ${ca} basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +lattice custom 4.5937 origin 0.0 0.0 0.0 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1 a1 1.0 0.0 0.0 a2 0.0 1.0 0.0 a3 0.0 0.0 0.644077758669482 basis 0.30478 0.30478 0.0 basis 0.69522 0.69522 0.0 basis 0.19522 0.80478 0.5 basis 0.80478 0.19522 0.5 +Lattice spacing in x,y,z = 4.5937 4.5937 2.9587 + +create_atoms 1 region box_vide +Created 1584 atoms + + +mass 1 16.00 +group Oxy type 1 +1584 atoms in group Oxy + +mass 2 47.867 +group Ti type 2 +792 atoms in group Ti + +velocity all create ${T_depart} 277387 +velocity all create 300 277387 + +pair_style meam/spline +pair_coeff * * TiO.meam.spline O Ti +Reading potential file TiO.meam.spline with DATE: 2016-06-05 + +neighbor 0.5 bin +neigh_modify every 2 delay 0 check yes + +timestep ${dt} +timestep 0.0002 + +thermo_style custom step temp press pe ke etotal lx ly lz vol +thermo 10 + + +#dump 5 all custom 500 boxAlpha_alumina.lammpstrj id type q x y z + +fix 3 all nve +run 100 +Neighbor list info ... + update every 2 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 6 + ghost atom cutoff = 6 + binsize = 3, bins = 10 10 11 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair meam/spline, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (2) pair meam/spline, perpetual, half/full from (1) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 3.922 | 3.922 | 3.922 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 0 300 22403.656 -14374.073 92.097853 -14281.975 27.5622 27.5622 32.5457 24724.15 + 10 301.16725 23582.084 -14374.431 92.456192 -14281.975 27.5622 27.5622 32.5457 24724.15 + 20 304.58237 25059.749 -14375.479 93.504609 -14281.974 27.5622 27.5622 32.5457 24724.15 + 30 312.41477 26504.358 -14377.883 95.9091 -14281.974 27.5622 27.5622 32.5457 24724.15 + 40 327.67099 26687.057 -14382.566 100.59265 -14281.974 27.5622 27.5622 32.5457 24724.15 + 50 352.32125 22677.292 -14390.134 108.1601 -14281.974 27.5622 27.5622 32.5457 24724.15 + 60 388.40592 12472.705 -14401.211 119.23784 -14281.973 27.5622 27.5622 32.5457 24724.15 + 70 439.97199 -1520.4694 -14417.04 135.06825 -14281.972 27.5622 27.5622 32.5457 24724.15 + 80 513.34361 -16733.316 -14439.564 157.59282 -14281.971 27.5622 27.5622 32.5457 24724.15 + 90 613.3542 -31099.591 -14470.267 188.29535 -14281.971 27.5622 27.5622 32.5457 24724.15 + 100 741.02836 -42358.226 -14509.464 227.4904 -14281.973 27.5622 27.5622 32.5457 24724.15 +Loop time of 8.92317 on 4 procs for 100 steps with 2376 atoms + +Performance: 0.194 ns/day, 123.933 hours/ns, 11.207 timesteps/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 8.8912 | 8.9 | 8.9064 | 0.2 | 99.74 +Neigh | 0.0027034 | 0.0028808 | 0.0032032 | 0.4 | 0.03 +Comm | 0.010964 | 0.017648 | 0.026568 | 5.0 | 0.20 +Output | 0.00037575 | 0.00047809 | 0.00053835 | 0.0 | 0.01 +Modify | 0.00099134 | 0.001001 | 0.0010085 | 0.0 | 0.01 +Other | | 0.001162 | | | 0.01 + +Nlocal: 594 ave 599 max 589 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Nghost: 2290.25 ave 2296 max 2282 min +Histogram: 1 0 0 0 1 0 0 0 1 1 +Neighs: 26671.5 ave 26934 max 26495 min +Histogram: 1 0 0 2 0 0 0 0 0 1 +FullNghs: 53343 ave 53828 max 52922 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 213372 +Ave neighs/atom = 89.803 +Neighbor list builds = 1 +Dangerous builds = 0 + +unfix 3 +fix 1 all box/relax tri 0.0 vmax 0.001 +minimize 1.0e-3 1.0e-5 1000 10000 +WARNING: Resetting reneighboring criteria during minimization (../min.cpp:168) +Per MPI rank memory allocation (min/avg/max) = 5.047 | 5.047 | 5.047 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 100 741.02836 -42358.226 -14509.464 227.4904 -14281.973 27.5622 27.5622 32.5457 24724.15 + 101 741.02836 -39686.588 -14515.398 227.4904 -14287.907 27.569587 27.569656 32.513154 24712.729 +Loop time of 0.193516 on 4 procs for 1 steps with 2376 atoms + +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +Minimization stats: + Stopping criterion = energy tolerance + Energy initial, next-to-last, final = + -14509.46351 -14509.46351 -14515.3978891 + Force two-norm initial, final = 5602.69 5487.77 + Force max component initial, final = 5235.27 5113.06 + Final line search alpha, max atom move = 1.91012e-07 0.000976657 + Iterations, force evaluations = 1 1 + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.19287 | 0.19299 | 0.19318 | 0.0 | 99.73 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00014043 | 0.00033247 | 0.00045896 | 0.0 | 0.17 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.0001886 | | | 0.10 + +Nlocal: 594 ave 601 max 586 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Nghost: 2263.25 ave 2271 max 2251 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Neighs: 26425.8 ave 26807 max 26121 min +Histogram: 1 0 0 1 1 0 0 0 0 1 +FullNghs: 52851.5 ave 53580 max 52175 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 211406 +Ave neighs/atom = 88.9756 +Neighbor list builds = 0 +Dangerous builds = 0 + +unfix 1 +reset_timestep 0 +thermo 50 +fix 3 all npt temp 300 300 0.1 aniso 1.0 1.0 1.0 +run 500 +Per MPI rank memory allocation (min/avg/max) = 3.937 | 3.937 | 3.937 Mbytes +Step Temp Press PotEng KinEng TotEng Lx Ly Lz Volume + 0 741.02836 -39686.588 -14515.398 227.4904 -14287.907 27.569587 27.569656 32.513154 24712.729 + 50 1157.347 29332.549 -14679.321 355.29725 -14324.024 27.60903 27.609325 32.375509 24678.772 + 100 777.55858 101883.12 -14674.854 238.70492 -14436.149 27.778518 27.777373 32.017262 24704.976 + 150 945.49014 -18305.383 -14806.687 290.25871 -14516.428 27.998313 27.99535 31.670225 24823.838 + 200 427.46608 -4045.0095 -14674.887 131.22903 -14543.658 28.130283 28.127147 31.431578 24869.438 + 250 362.82166 -7283.1332 -14669.07 111.38365 -14557.687 28.225232 28.222707 31.238451 24884.314 + 300 626.2858 7228.0309 -14760.128 192.26526 -14567.862 28.302384 28.299949 31.070038 24885.734 + 350 859.84293 30084.735 -14845.064 263.96563 -14581.099 28.372349 28.369334 30.934424 24899.261 + 400 755.26136 54745.408 -14830.701 231.85983 -14598.842 28.450301 28.448361 30.836159 24957.691 + 450 802.52344 5690.2863 -14860.193 246.36895 -14613.824 28.542311 28.541672 30.773339 25069.354 + 500 631.84734 -31473.795 -14816.101 193.97261 -14622.128 28.605857 28.605891 30.737955 25152.746 +Loop time of 39.7881 on 4 procs for 500 steps with 2376 atoms + +Performance: 0.217 ns/day, 110.522 hours/ns, 12.567 timesteps/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 39.617 | 39.633 | 39.653 | 0.2 | 99.61 +Neigh | 0.043624 | 0.046792 | 0.051708 | 1.4 | 0.12 +Comm | 0.05215 | 0.072616 | 0.092142 | 5.6 | 0.18 +Output | 0.00042915 | 0.00045079 | 0.00051546 | 0.0 | 0.00 +Modify | 0.029836 | 0.030341 | 0.03094 | 0.2 | 0.08 +Other | | 0.004489 | | | 0.01 + +Nlocal: 594 ave 606 max 582 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Nghost: 2226 ave 2238 max 2214 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 25652.8 ave 26129 max 25153 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +FullNghs: 51305.5 ave 52398 max 50251 min +Histogram: 1 0 0 0 1 1 0 0 0 1 + +Total # of neighbors = 205222 +Ave neighs/atom = 86.3729 +Neighbor list builds = 16 +Dangerous builds = 0 + +Total wall time: 0:00:49 diff --git a/examples/USER/vtk/.gitignore b/examples/USER/vtk/.gitignore new file mode 100644 index 0000000000..995bba6cb2 --- /dev/null +++ b/examples/USER/vtk/.gitignore @@ -0,0 +1 @@ +dump diff --git a/examples/USER/vtk/in.vtk b/examples/USER/vtk/in.vtk new file mode 100644 index 0000000000..6c294d2332 --- /dev/null +++ b/examples/USER/vtk/in.vtk @@ -0,0 +1,35 @@ +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 1 box +create_atoms 1 box +mass 1 1.0 + +velocity all create 3.0 87287 + +pair_style lj/cut 2.5 +pair_coeff 1 1 1.0 1.0 2.5 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +shell mkdir dump + +dump dmpvtk all vtk 50 dump/dump*.vtk id type vx vy vz fx fy fz + +#dump 2 all image 25 image.*.jpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 + +#dump 3 all movie 25 movie.mpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 + +thermo 50 +run 250 diff --git a/examples/USER/vtk/in.vtp b/examples/USER/vtk/in.vtp new file mode 100644 index 0000000000..ff5ab8d9ab --- /dev/null +++ b/examples/USER/vtk/in.vtp @@ -0,0 +1,35 @@ +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 1 box +create_atoms 1 box +mass 1 1.0 + +velocity all create 3.0 87287 + +pair_style lj/cut 2.5 +pair_coeff 1 1 1.0 1.0 2.5 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +shell mkdir dump + +dump dmpvtk all vtk 50 dump/dump*.vtp id type vx vy vz fx fy fz + +#dump 2 all image 25 image.*.jpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 + +#dump 3 all movie 25 movie.mpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 + +thermo 50 +run 250 diff --git a/examples/cmap/in.cmap b/examples/cmap/in.cmap index d2b2714b82..3b6f2767ed 100644 --- a/examples/cmap/in.cmap +++ b/examples/cmap/in.cmap @@ -9,11 +9,10 @@ boundary p p p atom_style full bond_style harmonic angle_style charmm -dihedral_style charmm +dihedral_style charmmfsw improper_style harmonic -pair_style lj/charmm/coul/charmm 8 12 -#pair_style lj/charmmfsw/coul/charmmfsh 8 12 +pair_style lj/charmmfsw/coul/charmmfsh 8 12 pair_modify mix arithmetic fix cmap all cmap charmm22.cmap diff --git a/examples/cmap/log.11Apr17.cmap.g++.1 b/examples/cmap/log.11Apr17.cmap.g++.1 new file mode 100644 index 0000000000..9b4fc29991 --- /dev/null +++ b/examples/cmap/log.11Apr17.cmap.g++.1 @@ -0,0 +1,205 @@ +LAMMPS (31 Mar 2017) +# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016 + +units real +neigh_modify delay 2 every 1 +#newton off + +boundary p p p + +atom_style full +bond_style harmonic +angle_style charmm +dihedral_style charmmfsw +improper_style harmonic + +pair_style lj/charmmfsw/coul/charmmfsh 8 12 +pair_modify mix arithmetic + +fix cmap all cmap charmm22.cmap +Reading potential file charmm22.cmap with DATE: 2016-09-26 +fix_modify cmap energy yes + +read_data gagg.data fix cmap crossterm CMAP + orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 34 atoms + scanning bonds ... + 4 = max bonds/atom + scanning angles ... + 6 = max angles/atom + scanning dihedrals ... + 12 = max dihedrals/atom + scanning impropers ... + 1 = max impropers/atom + reading bonds ... + 33 bonds + reading angles ... + 57 angles + reading dihedrals ... + 75 dihedrals + reading impropers ... + 7 impropers + 4 = max # of 1-2 neighbors + 7 = max # of 1-3 neighbors + 13 = max # of 1-4 neighbors + 16 = max # of special neighbors + +special_bonds charmm +fix 1 all nve + +#fix 1 all nvt temp 300 300 100.0 +#fix 2 all shake 1e-9 500 0 m 1.0 + +velocity all create 0.0 12345678 dist uniform + +thermo 1000 +thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp +timestep 2.0 + +run 100000 +Neighbor list info ... + update every 1 steps, delay 2 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/charmmfsw/coul/charmmfsh, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 14.96 | 14.96 | 14.96 Mbytes +Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro + 0 16.287573 -0.85933785 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023 + 1000 18.816462 -0.84379243 0.78931817 2.7554247 4.4371421 -2.7762038 0.12697656 + 2000 18.091571 -1.045888 0.72306589 3.0951524 4.6725102 -2.3580092 0.22712496 + 3000 17.835596 -1.2171641 0.72666403 2.6696491 5.4373798 -2.0737041 0.075101693 + 4000 16.211232 -0.42713611 0.99472642 3.8961462 5.2009895 -2.5626866 0.17356243 + 5000 17.72183 -0.57081189 0.90733068 3.4376382 4.5457582 -2.3727543 0.12354518 + 6000 18.753977 -1.5772499 0.81468321 2.9236782 4.6033216 -2.3380859 0.12835782 + 7000 18.186024 -0.84205608 0.58996182 3.0329585 4.7221473 -2.5733243 0.10047631 + 8000 18.214306 -1.1360938 0.72597611 3.7493028 4.7319958 -2.8957969 0.2006046 + 9000 17.248408 -0.48641993 0.90266229 2.9721743 4.7651056 -2.1473354 0.1302043 + 10000 17.760655 -1.2968444 0.92384663 3.7007455 4.7378947 -2.2147779 0.06940579 + 11000 17.633929 -0.57368413 0.84872849 3.4277114 4.285393 -2.236944 0.17204973 + 12000 18.305835 -1.0675148 0.75879532 2.8853173 4.685027 -2.409087 0.087538866 + 13000 17.391558 -0.9975291 0.66671947 3.8065638 5.2285578 -2.4198822 0.06253594 + 14000 17.483387 -0.67727643 0.91966477 3.7317031 4.7770445 -2.6080027 0.11487095 + 15000 18.131749 -1.1918751 1.0025684 3.1238131 4.789742 -2.2546745 0.13782813 + 16000 16.972343 -0.43926531 0.60644597 3.7551592 4.8658618 -2.2627659 0.12353145 + 17000 18.080785 -1.2073565 0.7867072 3.5671106 4.43754 -2.5092904 0.17429146 + 18000 17.474576 -0.97836065 0.8678524 3.7961537 4.3409032 -1.8922572 0.134048 + 19000 17.000911 -1.2286864 0.83615834 3.9322908 4.9319492 -2.3281576 0.056689619 + 20000 17.043286 -0.8506561 0.80966589 3.5087339 4.8603878 -2.3365263 0.096794824 + 21000 17.314495 -1.1430889 0.95363892 4.2446032 4.2756745 -2.1829483 0.17119518 + 22000 18.954881 -0.998673 0.58688334 2.71536 4.6634319 -2.6862804 0.20328442 + 23000 17.160427 -0.97803282 0.86894041 4.0897736 4.3146238 -2.1962289 0.075339092 + 24000 17.602026 -1.0833323 0.94888776 3.7341878 4.3084335 -2.1640414 0.081493681 + 25000 17.845584 -1.3432612 0.93497086 3.8911043 4.468032 -2.3475883 0.093204333 + 26000 17.833261 -1.1020534 0.77931087 3.7628141 4.512381 -2.3134761 0.15568465 + 27000 17.68607 -1.3222026 1.1985872 3.5817624 4.6360755 -2.3492774 0.08427906 + 28000 18.326649 -1.2669291 0.74809075 3.2624429 4.4698564 -2.3679076 0.14677293 + 29000 17.720933 -1.0773886 0.83099482 3.7652834 4.6584594 -2.8255303 0.23092596 + 30000 18.201999 -1.0168706 1.0637455 3.453095 4.3738593 -2.8063214 0.18658217 + 31000 17.823502 -1.2685768 0.84805585 3.8600661 4.2195821 -2.1169716 0.12517101 + 32000 16.883133 -0.62062648 0.84434922 3.5042683 5.1264906 -2.2674699 0.030138165 + 33000 17.805715 -1.679553 1.2430372 4.314677 4.2523894 -2.3008321 0.18591872 + 34000 16.723767 -0.54189072 1.1282827 3.8542159 4.3026559 -2.2186336 0.05392425 + 35000 17.976909 -0.72092075 0.5876319 2.9726396 5.0881439 -2.491692 0.17356291 + 36000 18.782492 -1.514246 0.63237955 3.2777164 4.6077164 -2.502574 0.082537318 + 37000 17.247716 -0.6344626 0.79885976 3.452491 4.7618281 -2.3902444 0.11450271 + 38000 17.996494 -1.6712877 1.0111769 4.1689136 4.46963 -2.4076725 0.11875756 + 39000 17.586857 -0.74508086 0.95970486 3.7395038 4.6011357 -2.9854953 0.30143284 + 40000 17.494879 -0.30772446 0.72047991 3.2604877 4.7283734 -2.3812495 0.16399034 + 41000 15.855772 -0.49642605 0.82496448 4.5139653 4.76884 -2.214141 0.10899661 + 42000 17.898568 -1.3078863 1.1505144 4.0429873 4.3889581 -2.8696559 0.23336417 + 43000 19.014372 -1.6325979 1.1553166 3.5660772 4.4047997 -2.9302044 0.13672127 + 44000 18.250782 -0.97211613 0.72714301 3.2258362 4.7257298 -2.5533613 0.11968073 + 45000 17.335174 0.24746331 1.0415866 3.3220992 4.5251095 -3.0415216 0.24453084 + 46000 17.72846 -0.9541418 0.88153841 3.7893452 4.5251883 -2.4003613 0.051809816 + 47000 18.226762 -0.67057787 0.84352989 3.0609522 4.5449078 -2.4694254 0.073703949 + 48000 17.838074 -0.88768441 1.3812262 3.5890492 4.5827868 -3.0137515 0.21417113 + 49000 17.973733 -0.75118705 0.69667886 3.3989025 4.7058886 -2.8243945 0.26665792 + 50000 17.461583 -0.65040016 0.68943524 2.9374743 5.6971777 -2.4438011 0.1697603 + 51000 16.79766 -0.010684434 0.89795555 3.959039 4.56763 -2.5101098 0.15048853 + 52000 17.566543 -0.7262764 0.74354418 3.3423185 4.8426523 -2.4187649 0.16908776 + 53000 17.964274 -0.9270914 1.065952 3.0397181 4.4682262 -2.2179503 0.07873406 + 54000 17.941256 -0.5807578 0.76516121 3.7262371 4.6975126 -3.179899 0.24433708 + 55000 17.079478 -0.48559832 0.95364453 3.0414645 5.2811414 -2.7064882 0.30102814 + 56000 17.632179 -0.75403299 0.97577942 3.3672363 4.4851336 -2.3683659 0.051117638 + 57000 16.17128 -0.44699325 0.76341543 4.267716 5.0881056 -2.4122329 0.16671692 + 58000 16.899276 -0.76481024 1.0400825 3.973493 4.8823309 -2.4270284 0.048716383 + 59000 18.145412 -0.84968335 0.71698306 3.2024358 4.6115739 -2.2520353 0.19466966 + 60000 17.578258 -1.0067331 0.72822527 3.5375208 4.9110255 -2.2319607 0.11922362 + 61000 17.434762 -1.0244393 0.90593099 3.8446915 4.8571191 -2.6228357 0.23259208 + 62000 17.580489 -1.1135917 0.79577432 3.7043524 4.6058114 -2.351492 0.042904152 + 63000 18.207335 -1.1512268 0.82684507 3.4114738 4.351069 -2.1878441 0.082922105 + 64000 18.333083 -1.1182287 0.74058959 3.6905164 4.3226172 -2.7110393 0.14721704 + 65000 16.271579 -0.7122151 1.0200168 4.6983643 4.3681131 -2.194921 0.12831024 + 66000 17.316444 -0.5729385 0.85254108 3.5769963 4.5526705 -2.3321328 0.040452643 + 67000 17.19011 -0.8814312 1.1381258 3.8605789 4.4183813 -2.299607 0.091527355 + 68000 18.223367 -1.362189 0.74472056 3.259165 4.486512 -2.2181134 0.048952796 + 69000 17.646348 -0.91647162 0.73990335 3.9313692 5.2663097 -3.3816778 0.27769877 + 70000 18.173493 -1.3107718 0.96484426 3.219728 4.5045124 -2.3349534 0.082327407 + 71000 17.0627 -0.58509083 0.85964129 3.8490884 4.437895 -2.1673348 0.24151404 + 72000 17.809764 -0.35128902 0.65479258 3.3945008 4.6160508 -2.5486166 0.10829531 + 73000 18.27769 -1.0739758 0.80890957 3.6070901 4.6256762 -2.4576547 0.080025736 + 74000 18.109437 -1.0691837 0.66679323 3.5923203 4.4825716 -2.5048169 0.21372319 + 75000 17.914569 -1.3500765 1.2993494 3.362421 4.4160377 -2.1278163 0.19397641 + 76000 16.563928 -0.16539261 1.0067302 3.5742755 4.8581915 -2.1362429 0.059822408 + 77000 18.130477 -0.38361279 0.43406954 3.4725995 4.7005855 -2.8836242 0.11958174 + 78000 16.746204 -1.1732959 0.7455507 3.6296638 5.6344113 -2.459208 0.16099803 + 79000 18.243999 -1.5850155 1.0108545 3.4727867 4.3367411 -2.316686 0.070480814 + 80000 16.960715 -0.84100929 0.91604996 3.862215 4.780949 -2.3711596 0.073916605 + 81000 17.697722 -1.1126605 0.952804 3.7114455 4.4216316 -2.2770085 0.091372066 + 82000 17.835901 -1.3091474 0.71867629 3.8168122 5.0150205 -2.4730634 0.062592852 + 83000 19.168418 -1.476938 0.75592316 3.2304519 4.3946471 -2.2991395 0.13083324 + 84000 17.945778 -1.5223622 1.0859941 3.4334011 5.0286682 -2.7550892 0.2476269 + 85000 17.950251 -0.85843846 0.86888218 3.3101287 4.5511879 -2.3640013 0.12080834 + 86000 17.480699 -0.97493649 0.85049761 3.4973085 4.6344922 -2.343121 0.2009677 + 87000 17.980244 -1.114983 0.88796989 3.4113329 4.3535853 -2.2535412 0.14494917 + 88000 18.023866 -1.226683 0.62339706 3.7649269 4.5923973 -2.3923523 0.10464375 + 89000 16.362829 -0.311462 1.0265375 4.0101723 4.4184777 -2.0314129 0.056570704 + 90000 17.533149 -0.41526788 1.0362029 3.4247412 4.2734431 -2.4776658 0.16960663 + 91000 17.719099 -1.1956801 1.0069945 3.2380672 4.8982805 -2.2154906 0.12950936 + 92000 17.762654 -1.170027 0.95814525 3.5217717 4.5405343 -2.5983677 0.15037754 + 93000 17.393958 -0.45641026 0.6579069 3.6002204 4.5942053 -2.5559641 0.12026544 + 94000 16.8182 -0.92962066 0.86801362 4.2914398 4.659848 -2.5251987 0.18000415 + 95000 17.642086 -0.7994896 0.7003756 3.8036697 4.5252487 -2.4166307 0.15686517 + 96000 18.114292 -1.5102104 1.2635908 3.2764427 5.0659496 -2.2777806 0.054309645 + 97000 18.575765 -1.6015311 0.69500699 3.1649317 4.9945742 -2.4012125 0.067373724 + 98000 16.578893 -0.78030229 0.91524222 4.4429655 4.4622392 -2.4052655 0.15355705 + 99000 17.26063 -0.57832833 0.7098846 3.9000046 4.5576484 -2.5333026 0.25517222 + 100000 18.377235 -0.89109577 0.68988617 2.8751751 4.4115591 -2.3560731 0.12185212 +Loop time of 2.96043 on 1 procs for 100000 steps with 34 atoms + +Performance: 5836.990 ns/day, 0.004 hours/ns, 33778.875 timesteps/s +99.9% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.074 | 1.074 | 1.074 | 0.0 | 36.28 +Bond | 1.6497 | 1.6497 | 1.6497 | 0.0 | 55.72 +Neigh | 0.007576 | 0.007576 | 0.007576 | 0.0 | 0.26 +Comm | 0.012847 | 0.012847 | 0.012847 | 0.0 | 0.43 +Output | 0.0010746 | 0.0010746 | 0.0010746 | 0.0 | 0.04 +Modify | 0.16485 | 0.16485 | 0.16485 | 0.0 | 5.57 +Other | | 0.05037 | | | 1.70 + +Nlocal: 34 ave 34 max 34 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 395 ave 395 max 395 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 395 +Ave neighs/atom = 11.6176 +Ave special neighs/atom = 9.52941 +Neighbor list builds = 253 +Dangerous builds = 0 +Total wall time: 0:00:02 diff --git a/examples/cmap/log.11Apr17.cmap.g++.4 b/examples/cmap/log.11Apr17.cmap.g++.4 new file mode 100644 index 0000000000..ec471d5a7e --- /dev/null +++ b/examples/cmap/log.11Apr17.cmap.g++.4 @@ -0,0 +1,205 @@ +LAMMPS (31 Mar 2017) +# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016 + +units real +neigh_modify delay 2 every 1 +#newton off + +boundary p p p + +atom_style full +bond_style harmonic +angle_style charmm +dihedral_style charmmfsw +improper_style harmonic + +pair_style lj/charmmfsw/coul/charmmfsh 8 12 +pair_modify mix arithmetic + +fix cmap all cmap charmm22.cmap +Reading potential file charmm22.cmap with DATE: 2016-09-26 +fix_modify cmap energy yes + +read_data gagg.data fix cmap crossterm CMAP + orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 34 atoms + scanning bonds ... + 4 = max bonds/atom + scanning angles ... + 6 = max angles/atom + scanning dihedrals ... + 12 = max dihedrals/atom + scanning impropers ... + 1 = max impropers/atom + reading bonds ... + 33 bonds + reading angles ... + 57 angles + reading dihedrals ... + 75 dihedrals + reading impropers ... + 7 impropers + 4 = max # of 1-2 neighbors + 7 = max # of 1-3 neighbors + 13 = max # of 1-4 neighbors + 16 = max # of special neighbors + +special_bonds charmm +fix 1 all nve + +#fix 1 all nvt temp 300 300 100.0 +#fix 2 all shake 1e-9 500 0 m 1.0 + +velocity all create 0.0 12345678 dist uniform + +thermo 1000 +thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp +timestep 2.0 + +run 100000 +Neighbor list info ... + update every 1 steps, delay 2 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/charmmfsw/coul/charmmfsh, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 14.94 | 15.57 | 16.2 Mbytes +Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro + 0 16.287573 -0.85933785 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023 + 1000 18.816462 -0.84379243 0.78931817 2.7554247 4.4371421 -2.7762038 0.12697656 + 2000 18.091571 -1.045888 0.72306589 3.0951524 4.6725102 -2.3580092 0.22712496 + 3000 17.835596 -1.2171641 0.72666403 2.6696491 5.4373798 -2.0737041 0.075101693 + 4000 16.211232 -0.42713611 0.99472642 3.8961462 5.2009895 -2.5626866 0.17356243 + 5000 17.72183 -0.57081189 0.90733068 3.4376382 4.5457582 -2.3727543 0.12354518 + 6000 18.753977 -1.5772499 0.81468321 2.9236782 4.6033216 -2.3380859 0.12835782 + 7000 18.186024 -0.84205609 0.58996181 3.0329584 4.7221473 -2.5733244 0.10047631 + 8000 18.214306 -1.1360934 0.72597583 3.7493032 4.7319959 -2.8957975 0.20060467 + 9000 17.248415 -0.48642024 0.90266262 2.9721744 4.7651003 -2.1473349 0.13020438 + 10000 17.760663 -1.2968458 0.92384687 3.7007432 4.7378917 -2.2147799 0.06940514 + 11000 17.63395 -0.57366075 0.84871737 3.4276851 4.2853865 -2.2369491 0.17205075 + 12000 18.305713 -1.0672299 0.75876262 2.8852171 4.6850229 -2.4090072 0.087568888 + 13000 17.383367 -0.99678627 0.66712651 3.8060954 5.233865 -2.4180629 0.062014239 + 14000 17.510901 -0.68723297 0.92448551 3.7550867 4.7321218 -2.6059088 0.11504409 + 15000 18.080165 -1.13316 0.99982253 3.09947 4.8171402 -2.2713372 0.14580371 + 16000 17.383245 -0.4535296 0.57826268 3.6453593 4.6541138 -2.2434512 0.13285609 + 17000 17.111153 -0.3414839 0.73667584 3.7485311 4.6262965 -2.6166049 0.12635815 + 18000 16.862046 -1.3592061 1.2371142 4.4878937 4.2937117 -2.2112584 0.066145125 + 19000 18.313891 -1.654238 0.90644101 3.3934089 4.550735 -2.1862171 0.081267736 + 20000 19.083561 -1.3081747 0.56257812 2.7633848 4.6211438 -2.5196707 0.13763071 + 21000 18.23741 -1.051353 0.64408722 3.1735565 4.6912533 -2.2491947 0.099394904 + 22000 17.914515 -0.89769621 0.61793801 3.1224992 4.8683543 -2.282475 0.14524537 + 23000 16.756122 -0.98277883 1.2554905 3.7916115 4.7301443 -2.3094994 0.10226772 + 24000 16.109857 -0.54593177 0.86934462 4.4293574 4.926985 -2.2652264 0.11414331 + 25000 18.590559 -1.497327 1.1898361 2.9134403 4.7854107 -2.4437918 0.067416154 + 26000 18.493391 -1.0533797 0.4889578 3.6563013 4.6171721 -2.3240835 0.11607829 + 27000 18.646522 -1.1229601 0.67956815 2.7937638 4.8991207 -2.4068997 0.10109147 + 28000 18.545103 -1.7237438 0.72488022 3.8041665 4.6459974 -2.4339333 0.21943258 + 29000 17.840505 -1.0909667 0.88133248 3.3698456 5.0311644 -2.5116617 0.08102693 + 30000 17.649527 -0.65409177 0.86781692 3.24112 4.9903073 -2.6234925 0.14799777 + 31000 18.156812 -0.77476556 0.83192789 2.9620784 4.9160635 -2.8571635 0.22283201 + 32000 18.251583 -1.3384075 0.8059007 3.2588176 4.4365328 -2.1875071 0.087883637 + 33000 17.702785 -0.88311587 0.98573641 3.4645713 4.2650091 -2.0909158 0.14233004 + 34000 17.123413 -1.4873429 1.0419563 4.2628178 4.6318762 -2.2292095 0.105354 + 35000 18.162061 -1.0136007 0.82436129 3.6365024 4.5801677 -2.6856989 0.28648222 + 36000 17.65618 -1.094718 0.8872444 3.5075241 4.6382423 -2.3895134 0.18116961 + 37000 17.336475 -1.0657995 0.98869254 3.9252927 4.4383632 -2.2048244 0.22285949 + 38000 17.369467 -0.97623132 0.6712095 4.1349304 4.597754 -2.4088341 0.14608514 + 39000 18.170206 -1.2344285 0.77546195 3.6451049 4.7482287 -2.9895286 0.25768859 + 40000 16.210866 -0.81407781 0.99246271 4.2676233 5.0253763 -2.2929865 0.13348624 + 41000 17.641798 -1.0868157 0.80119513 3.4302526 5.280872 -2.4025406 0.22747391 + 42000 18.349848 -1.613759 1.1497004 3.7800682 4.3237683 -2.8676401 0.2120425 + 43000 19.130245 -1.196778 0.71845659 2.9325758 4.3684415 -2.433424 0.12240982 + 44000 18.061321 -1.2410101 1.0329373 3.0751569 4.7138313 -2.2880904 0.075814461 + 45000 18.162713 -1.4414622 1.009159 4.2298758 4.589593 -2.8502298 0.21606844 + 46000 18.591574 -0.99730412 1.0955215 3.3965004 4.359466 -3.1049731 0.17322629 + 47000 18.380259 -1.2717381 0.72291269 3.3958016 4.6099628 -2.4605065 0.19825185 + 48000 18.130478 -1.5051279 1.2087492 3.2488529 4.6690881 -2.2518174 0.05633061 + 49000 16.419912 -0.89320635 0.98926144 4.0388252 4.9919488 -2.1699511 0.15646479 + 50000 16.453196 -1.0433497 0.778346 4.6078069 4.7320614 -2.3760788 0.17161976 + 51000 18.245221 -0.89550444 0.9310446 3.0758194 4.3944595 -2.3082379 0.19983428 + 52000 17.839632 -1.0221781 0.76425017 3.3331547 4.5368437 -2.0988773 0.21098435 + 53000 18.693035 -1.4231915 0.76333082 3.1612761 4.583242 -2.4485762 0.089191206 + 54000 16.334672 -0.36309884 1.0200365 4.6700448 4.1628702 -2.1713841 0.11431995 + 55000 17.33842 -0.61522682 0.89847366 3.4970659 4.673495 -2.4743036 0.068004878 + 56000 17.790294 -1.0150845 0.73697112 3.6000297 4.5988343 -2.4822509 0.11434632 + 57000 18.913486 -1.0985507 1.0231848 2.7483267 4.4421755 -2.574424 0.1763388 + 58000 17.586896 -0.98284126 0.96965633 3.3330357 4.5325543 -2.1936869 0.083230915 + 59000 17.77788 -1.1649953 0.83092298 3.8004148 4.3940176 -2.3136642 0.017207608 + 60000 17.013042 -0.21728023 1.1688832 3.5374476 4.5462244 -2.4425301 0.15028297 + 61000 17.236242 -1.1342147 1.0301086 3.685948 4.6842331 -2.328108 0.070210812 + 62000 17.529852 -1.2961547 1.0323133 3.4474598 5.1435839 -2.4553423 0.060842687 + 63000 18.754704 -1.1816999 0.51806039 3.140172 4.5832701 -2.2713213 0.06327871 + 64000 17.54594 -1.3592836 0.9694558 4.1363258 4.3547729 -2.3818433 0.12634448 + 65000 16.962312 -0.54192775 0.90321315 4.0788618 4.2008255 -2.1376711 0.039504515 + 66000 18.078619 -1.3552947 1.0716861 3.3285374 4.7229362 -2.3331115 0.21978698 + 67000 17.132732 -1.4376876 0.91486534 4.4461852 4.6894176 -2.3655045 0.068150385 + 68000 18.69286 -1.2856207 0.3895394 3.0620063 4.9922992 -2.3459189 0.079879643 + 69000 18.329552 -1.1545957 0.88632275 3.1741058 4.4562418 -2.7094867 0.25329613 + 70000 16.681168 -0.94434373 1.2450393 4.5737944 4.4902996 -2.4581775 0.15313095 + 71000 17.375032 -1.0514442 1.0741595 3.4896146 4.8407713 -2.5302576 0.13640847 + 72000 17.833013 -0.9047134 0.87067876 3.1658924 4.8825932 -2.4398117 0.2343991 + 73000 17.421411 -1.2190741 0.73706811 4.2895 4.6464636 -2.3872727 0.19696525 + 74000 17.383158 -0.34208984 0.71333984 3.2718891 4.2718495 -2.2484281 0.10827022 + 75000 17.20885 -1.2710479 1.125102 3.8414467 5.3222741 -2.375505 0.12910797 + 76000 16.811578 -0.545162 0.59076961 3.9118604 4.8031296 -2.2777895 0.063015508 + 77000 16.679231 -0.080955983 0.7253398 3.4203454 5.0987608 -2.379614 0.12961874 + 78000 18.164524 -1.3115525 0.92526408 3.5764487 4.3814882 -2.3712488 0.073436724 + 79000 17.738686 -1.0697859 1.2186866 3.0593848 4.6551053 -2.2505871 0.075340661 + 80000 16.767483 -0.84777477 1.03128 4.1982958 4.6992227 -2.4146425 0.079774219 + 81000 16.257265 0.62803774 0.84032194 3.3873471 5.0961071 -2.7219776 0.20467848 + 82000 18.232082 -1.2129302 0.50746051 3.9207128 4.5073437 -2.599371 0.094522372 + 83000 16.618985 -0.60917055 0.8825847 3.805497 4.9560959 -2.2194726 0.14852687 + 84000 17.90762 -0.82336075 0.90504161 3.0324198 4.7444271 -2.5036073 0.15860682 + 85000 16.699883 -0.50297228 0.83405307 3.8598996 4.7971968 -2.2427788 0.10338668 + 86000 16.353038 -0.0096880616 0.80705167 4.0865115 4.5364338 -2.4548873 0.098456203 + 87000 17.887331 -0.75281219 1.0030148 4.0117123 4.3443074 -2.9774392 0.16190152 + 88000 18.583708 -1.4867053 0.86324814 3.3971237 4.3526221 -2.221239 0.14459352 + 89000 17.684828 -1.283764 1.0021118 3.5426808 4.9057005 -2.3921967 0.05844702 + 90000 17.2597 -0.84306489 0.99797936 3.8896866 4.4315457 -2.5662899 0.18270206 + 91000 16.705581 -0.44704047 0.75239556 3.470805 4.976868 -2.1894571 0.12312848 + 92000 17.548071 -1.2222664 0.92898812 4.0813773 4.3432647 -2.1631158 0.14071343 + 93000 17.163675 -0.94994776 0.96876981 3.9137692 4.4388666 -2.1260232 0.13187968 + 94000 18.842071 -1.2822113 0.58767049 3.1393475 4.5820965 -2.7264682 0.10406266 + 95000 18.112287 -1.1011381 0.63546648 3.4672667 4.486275 -2.2991936 0.041589685 + 96000 17.102713 -0.6877313 0.8389032 3.6892719 4.5676004 -2.1905327 0.13507011 + 97000 16.778253 -1.2902153 1.1588744 4.2820083 4.9537657 -2.4798159 0.35696636 + 98000 18.34638 -1.2908146 1.185356 3.0739807 4.4575453 -2.3959144 0.22407922 + 99000 17.995148 -1.3939639 0.7727299 3.8774144 4.4345458 -2.1142776 0.13550099 + 100000 18.444746 -1.2456693 0.86061526 3.468696 4.5264336 -2.4239851 0.074369539 +Loop time of 2.52011 on 4 procs for 100000 steps with 34 atoms + +Performance: 6856.851 ns/day, 0.004 hours/ns, 39680.850 timesteps/s +98.8% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.072506 | 0.28131 | 0.69088 | 46.2 | 11.16 +Bond | 0.050544 | 0.45307 | 0.9416 | 57.6 | 17.98 +Neigh | 0.0060885 | 0.0061619 | 0.0062056 | 0.1 | 0.24 +Comm | 0.44686 | 1.3679 | 2.0111 | 53.5 | 54.28 +Output | 0.0028057 | 0.0029956 | 0.003264 | 0.3 | 0.12 +Modify | 0.028202 | 0.095174 | 0.15782 | 19.8 | 3.78 +Other | | 0.3135 | | | 12.44 + +Nlocal: 8.5 ave 14 max 2 min +Histogram: 1 0 1 0 0 0 0 0 0 2 +Nghost: 25.5 ave 32 max 20 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Neighs: 98.75 ave 242 max 31 min +Histogram: 2 0 1 0 0 0 0 0 0 1 + +Total # of neighbors = 395 +Ave neighs/atom = 11.6176 +Ave special neighs/atom = 9.52941 +Neighbor list builds = 246 +Dangerous builds = 0 +Total wall time: 0:00:02 diff --git a/examples/cmap/log.5Oct16.cmap.g++.1 b/examples/cmap/log.5Oct16.cmap.g++.1 deleted file mode 100644 index fbfc2b8baf..0000000000 --- a/examples/cmap/log.5Oct16.cmap.g++.1 +++ /dev/null @@ -1,201 +0,0 @@ -LAMMPS (5 Oct 2016) -# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016 - -units real -neigh_modify delay 2 every 1 -#newton off - -boundary p p p - -atom_style full -bond_style harmonic -angle_style charmm -dihedral_style charmm -improper_style harmonic - -pair_style lj/charmm/coul/charmm 8 12 -#pair_style lj/charmmfsw/coul/charmmfsh 8 12 -pair_modify mix arithmetic - -fix cmap all cmap charmm22.cmap -Reading potential file charmm22.cmap with DATE: 2016-09-26 -fix_modify cmap energy yes - -read_data gagg.data fix cmap crossterm CMAP - orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 34 atoms - scanning bonds ... - 4 = max bonds/atom - scanning angles ... - 6 = max angles/atom - scanning dihedrals ... - 12 = max dihedrals/atom - scanning impropers ... - 1 = max impropers/atom - reading bonds ... - 33 bonds - reading angles ... - 57 angles - reading dihedrals ... - 75 dihedrals - reading impropers ... - 7 impropers - 4 = max # of 1-2 neighbors - 7 = max # of 1-3 neighbors - 13 = max # of 1-4 neighbors - 16 = max # of special neighbors - -special_bonds charmm -fix 1 all nve - -#fix 1 all nvt temp 300 300 100.0 -#fix 2 all shake 1e-9 500 0 m 1.0 - -velocity all create 0.0 12345678 dist uniform - -thermo 1000 -thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp -timestep 2.0 - -run 100000 -Neighbor list info ... - 1 neighbor list requests - update every 1 steps, delay 2 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 12 12 12 -Memory usage per processor = 14.6355 Mbytes -Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro - 0 26.542777 -0.93822087 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023 - 1000 28.673005 -0.47724367 0.80029132 3.151679 4.4684446 -2.3928648 0.18604953 - 2000 27.67955 -1.170342 0.72018905 4.0400131 4.4713764 -2.5490207 0.21834436 - 3000 29.256656 -0.35856055 0.73303546 3.7411606 4.4710568 -2.8939692 0.37728884 - 4000 30.097549 -1.1353905 0.79007053 3.0688444 4.4091469 -2.3383587 0.20743631 - 5000 28.357525 -1.0723742 0.9180297 3.6579424 4.8792664 -2.3185572 0.088366962 - 6000 29.214175 -0.95299225 0.81926009 3.6805429 4.6742897 -2.9343577 0.26697813 - 7000 27.018614 -0.52423475 0.72502764 3.8840137 4.7780956 -2.3916009 0.24952584 - 8000 29.682167 -1.0939711 0.76111486 3.1090116 4.9359719 -2.5662984 0.1411154 - 9000 27.909695 -0.80905986 0.78952533 4.203187 4.1301204 -2.000402 0.088859259 - 10000 27.480298 -0.86273377 1.1293962 4.3857421 4.899282 -3.3895621 0.12126215 - 11000 28.303203 -1.0221152 0.62762348 4.055414 4.5863024 -2.5842816 0.17996907 - 12000 28.311127 -0.94227367 0.91859012 3.6673926 4.7018632 -3.902715 0.30065704 - 13000 30.818607 -1.5220116 0.95710386 3.3364371 4.543427 -3.0423067 0.16712905 - 14000 27.643736 -1.0144117 0.95806952 4.1046912 4.800236 -4.0534389 0.29293405 - 15000 27.660491 -1.0390086 0.78061056 4.1139174 4.7197202 -2.3670379 0.22126985 - 16000 27.845157 -0.63654502 0.78007478 3.9365994 4.949418 -3.1470214 0.22335355 - 17000 28.44772 -1.0255112 0.70402007 4.0573343 4.2887527 -2.2099596 0.048050962 - 18000 27.128323 -0.96218536 1.1327159 4.3222585 4.326607 -2.2881766 0.13491257 - 19000 27.337633 -0.78999574 0.80152298 4.2239689 4.7073478 -2.2924164 0.12710292 - 20000 27.780537 -0.46458072 0.79707671 3.7232618 4.943417 -2.5290628 0.26191223 - 21000 26.435484 -0.7803224 1.0753217 4.4196051 5.9945933 -2.3340925 0.16448475 - 22000 28.619429 -1.1623149 0.9401731 3.8508844 5.1636737 -2.5551846 0.25318434 - 23000 28.399338 -0.79700962 0.85575503 4.488526 4.5975422 -2.5663549 0.13601693 - 24000 29.645532 -1.158744 0.83180313 3.8193399 4.60319 -2.6847864 0.24260466 - 25000 28.695339 -1.4802204 0.76583757 3.6786272 4.8959496 -2.3627896 0.080867326 - 26000 28.149711 -1.029689 0.79383806 3.7885067 4.3345813 -2.1041553 0.14598209 - 27000 29.580373 -1.0525813 1.0262723 3.7767318 4.6119758 -2.2802386 0.088556038 - 28000 28.44308 -0.93411225 0.8794395 3.948079 4.780246 -2.1814583 0.14340149 - 29000 29.335621 -1.6087988 0.71803091 3.7819186 4.6688385 -2.4282242 0.16061111 - 30000 28.706138 -1.3938241 0.67713818 4.031275 4.4756505 -2.1807056 0.11461356 - 31000 27.451944 0.010297225 0.65064883 3.6402029 4.3607811 -2.5511516 0.12637237 - 32000 27.070878 -1.103158 1.1932199 5.1329709 4.5201653 -2.2224479 0.11215427 - 33000 29.889976 -1.6228316 0.69407996 3.5361991 4.3502767 -1.9847454 0.09089949 - 34000 28.223151 -0.927208 1.043253 3.4650939 5.1028142 -2.8127219 0.10648823 - 35000 27.985986 -0.48153861 0.63878449 3.3724641 4.9551679 -2.6565919 0.12123115 - 36000 28.580688 -1.4500694 1.055762 4.0490064 4.423782 -2.3103578 0.072747638 - 37000 29.192947 -0.49678176 1.1146731 2.9233947 4.5738603 -2.4376144 0.22874047 - 38000 26.954594 -0.53812359 0.79230685 4.3356989 5.0284656 -2.3791255 0.0486081 - 39000 27.567555 -0.57870028 0.73614374 4.191991 4.9209556 -2.6122044 0.08635571 - 40000 28.494172 -0.79057135 0.79072816 4.1893209 4.4826919 -2.4179635 0.14612898 - 41000 28.44904 -1.1002948 0.93405654 4.3586358 4.4338415 -2.2950944 0.15705834 - 42000 28.95725 -1.0297067 1.1632348 4.274711 4.9979487 -2.7611464 0.15944725 - 43000 28.640394 -0.70938882 0.68100893 3.1844315 5.1817454 -2.2837487 0.14189233 - 44000 27.997558 -1.0115099 0.59125208 4.0883422 4.6033907 -2.2775964 0.094273258 - 45000 27.67163 -0.67992747 1.1225552 3.9020703 4.8171056 -2.1952679 0.041418433 - 46000 28.822607 -0.6687953 0.74160059 3.3193715 4.5546965 -2.3024572 0.047569065 - 47000 29.20147 -1.4456785 0.79223353 3.8288813 4.5811826 -2.5154936 0.061230141 - 48000 27.843026 -1.0222301 0.87322137 4.3432743 4.4266307 -2.1414153 0.06802794 - 49000 28.199573 -1.1887794 1.2781088 4.0779644 4.5881353 -2.319775 0.094803547 - 50000 28.759212 -1.354416 0.68534569 3.8394841 4.2308134 -2.1281844 0.1395951 - 51000 27.876455 -1.5705462 0.76557156 4.5335223 4.523708 -2.203702 0.14679803 - 52000 27.930587 -1.2277489 0.96071516 3.960953 5.1152188 -2.4101451 0.060949521 - 53000 27.031236 -1.4746477 1.2341141 5.0540975 4.3656865 -2.1288513 0.092725656 - 54000 28.809394 -1.1162427 0.94350207 3.4013958 4.4755547 -2.3342811 0.18855912 - 55000 28.948415 -1.1560418 0.6260139 3.5386373 4.5244978 -2.340212 0.17474657 - 56000 28.048368 -0.95784532 0.76432571 4.1404665 4.4570033 -2.0899628 0.045693628 - 57000 28.707642 -1.366574 0.9907873 3.729903 4.3131997 -2.2777698 0.065420213 - 58000 26.361663 -1.0424403 1.0452563 5.0977108 4.7035231 -2.3101244 0.13671642 - 59000 29.218218 -1.2210564 0.62435875 3.4236327 4.5481681 -2.1575943 0.037984042 - 60000 27.655546 -1.1053224 0.86323501 3.7641375 4.8946898 -2.2422249 0.077725979 - 61000 27.252108 -1.3744824 1.1150806 5.0444848 4.4878135 -2.2743829 0.058331257 - 62000 27.163469 -1.1715781 0.72099321 4.5295501 4.9509918 -2.2993961 0.050401105 - 63000 29.581575 -1.2238537 0.86303245 3.1194038 5.2218965 -2.5002427 0.055032632 - 64000 27.897822 -1.1011516 0.74540883 4.2869228 4.3394269 -2.2552393 0.1403321 - 65000 27.083245 -1.0633392 0.92771724 5.0805224 4.2747962 -2.2388039 0.064196692 - 66000 29.072723 -1.5514209 0.89798805 4.2600224 4.4261812 -2.3524752 0.15067414 - 67000 27.308181 -0.72224802 0.97109517 4.5074578 4.4559352 -2.1381121 0.089297603 - 68000 27.505686 -0.43855431 0.80785812 4.1917251 5.0157721 -2.3382145 0.11105164 - 69000 29.041681 -0.64735378 0.89874684 3.3891579 4.3753361 -2.2320941 0.14716747 - 70000 29.735756 -1.7061457 0.9206878 3.5767878 4.3851664 -2.2516304 0.097196062 - 71000 28.224352 -0.92217702 0.86093586 3.9507157 4.5596589 -2.2173397 0.089116669 - 72000 29.282336 -1.056142 0.65185725 3.8735742 4.4839333 -2.4314756 0.071909704 - 73000 26.257283 -0.64273826 0.98300685 5.063943 5.045958 -2.5544375 0.2180275 - 74000 28.825119 -0.97736616 0.87201848 3.55875 4.3653309 -2.2303567 0.098963875 - 75000 29.239507 -0.96508809 0.74517323 3.4306236 4.7651921 -2.6077732 0.17883654 - 76000 27.349841 -0.50990238 1.1183613 4.4252451 4.4097775 -2.4125794 0.18483606 - 77000 28.130197 -1.4081219 0.94921357 4.2572132 4.5162849 -2.4013797 0.073744606 - 78000 28.235774 -0.9214321 0.6324981 3.8697686 4.8092154 -2.2272847 0.092108346 - 79000 26.732846 -0.55949486 1.0989617 5.0088609 4.4930687 -2.277945 0.03855146 - 80000 28.529208 -0.94244671 0.79407482 3.961106 4.3930011 -2.3127726 0.091124948 - 81000 29.603852 -1.6116062 1.060847 3.7824932 4.151001 -1.9139868 0.19875986 - 82000 28.232876 -1.1833011 1.0182713 3.4195758 5.1394333 -2.4632697 0.28501012 - 83000 29.565482 -1.3479552 0.99056973 3.7851802 4.4781011 -2.7872481 0.2031991 - 84000 28.780274 -1.3073882 1.0512637 4.004638 4.502282 -2.3789146 0.015656202 - 85000 27.262312 -1.1305346 1.203524 4.7938623 4.1747105 -2.0952844 0.054240361 - 86000 28.157348 -1.0662817 0.81163796 3.9912709 4.8320213 -2.255237 0.14698333 - 87000 28.445543 -1.3365026 0.78156195 4.4767689 4.4457575 -2.5008786 0.13879386 - 88000 27.656717 -1.1490599 0.87974869 4.4629952 4.7023033 -2.3258145 0.081904139 - 89000 28.838821 -1.020709 0.85587929 3.7110705 4.4938307 -2.4914483 0.11447952 - 90000 27.356497 -0.59107077 0.81879666 4.5209332 4.4703836 -2.3806717 0.071307775 - 91000 27.780445 -0.80564513 0.94752313 3.8468943 4.2924253 -2.1011134 0.1118672 - 92000 28.555276 -1.3514732 0.80826674 3.9590742 4.5775954 -2.4891232 0.054254978 - 93000 28.747267 -1.2133243 0.75507246 4.1319789 4.9048611 -2.4913887 0.13045693 - 94000 27.479343 -0.69973695 0.99696121 3.5966229 4.549025 -2.4155312 0.41745762 - 95000 27.726945 -1.1905026 1.1120842 4.7433275 4.5386861 -2.7947142 0.33671682 - 96000 28.021114 -1.0341645 0.6663033 4.2397505 4.6203984 -1.9904034 0.10972565 - 97000 28.382022 -1.3916008 1.180588 4.0729621 4.6741792 -2.554927 0.13462346 - 98000 27.895969 -0.7496449 1.3072185 4.2611888 4.3726077 -2.1320701 0.15376665 - 99000 28.517889 -1.2183957 1.279778 3.957647 4.2638434 -2.2888407 0.042705003 - 100000 28.109211 -1.2538948 0.83671785 4.3734766 4.544545 -2.3076497 0.042189096 -Loop time of 2.84552 on 1 procs for 100000 steps with 34 atoms - -Performance: 6072.706 ns/day, 0.004 hours/ns, 35142.973 timesteps/s -99.9% CPU use with 1 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.94207 | 0.94207 | 0.94207 | 0.0 | 33.11 -Bond | 1.6125 | 1.6125 | 1.6125 | 0.0 | 56.67 -Neigh | 0.0073986 | 0.0073986 | 0.0073986 | 0.0 | 0.26 -Comm | 0.012739 | 0.012739 | 0.012739 | 0.0 | 0.45 -Output | 0.00075531 | 0.00075531 | 0.00075531 | 0.0 | 0.03 -Modify | 0.21483 | 0.21483 | 0.21483 | 0.0 | 7.55 -Other | | 0.05524 | | | 1.94 - -Nlocal: 34 ave 34 max 34 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 0 ave 0 max 0 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 395 ave 395 max 395 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 395 -Ave neighs/atom = 11.6176 -Ave special neighs/atom = 9.52941 -Neighbor list builds = 237 -Dangerous builds = 0 -Total wall time: 0:00:02 diff --git a/examples/cmap/log.5Oct16.cmap.g++.4 b/examples/cmap/log.5Oct16.cmap.g++.4 deleted file mode 100644 index de5d670073..0000000000 --- a/examples/cmap/log.5Oct16.cmap.g++.4 +++ /dev/null @@ -1,201 +0,0 @@ -LAMMPS (5 Oct 2016) -# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016 - -units real -neigh_modify delay 2 every 1 -#newton off - -boundary p p p - -atom_style full -bond_style harmonic -angle_style charmm -dihedral_style charmm -improper_style harmonic - -pair_style lj/charmm/coul/charmm 8 12 -#pair_style lj/charmmfsw/coul/charmmfsh 8 12 -pair_modify mix arithmetic - -fix cmap all cmap charmm22.cmap -Reading potential file charmm22.cmap with DATE: 2016-09-26 -fix_modify cmap energy yes - -read_data gagg.data fix cmap crossterm CMAP - orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 34 atoms - scanning bonds ... - 4 = max bonds/atom - scanning angles ... - 6 = max angles/atom - scanning dihedrals ... - 12 = max dihedrals/atom - scanning impropers ... - 1 = max impropers/atom - reading bonds ... - 33 bonds - reading angles ... - 57 angles - reading dihedrals ... - 75 dihedrals - reading impropers ... - 7 impropers - 4 = max # of 1-2 neighbors - 7 = max # of 1-3 neighbors - 13 = max # of 1-4 neighbors - 16 = max # of special neighbors - -special_bonds charmm -fix 1 all nve - -#fix 1 all nvt temp 300 300 100.0 -#fix 2 all shake 1e-9 500 0 m 1.0 - -velocity all create 0.0 12345678 dist uniform - -thermo 1000 -thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp -timestep 2.0 - -run 100000 -Neighbor list info ... - 1 neighbor list requests - update every 1 steps, delay 2 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 12 12 12 -Memory usage per processor = 15.9307 Mbytes -Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro - 0 26.542777 -0.93822087 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023 - 1000 28.673005 -0.47724367 0.80029132 3.151679 4.4684446 -2.3928648 0.18604953 - 2000 27.67955 -1.170342 0.72018905 4.0400131 4.4713764 -2.5490207 0.21834436 - 3000 29.256656 -0.35856055 0.73303546 3.7411606 4.4710568 -2.8939692 0.37728884 - 4000 30.097549 -1.1353905 0.79007053 3.0688444 4.4091469 -2.3383587 0.20743631 - 5000 28.357525 -1.0723742 0.9180297 3.6579424 4.8792663 -2.3185572 0.088366962 - 6000 29.214175 -0.95299239 0.81926011 3.6805428 4.6742897 -2.9343578 0.26697816 - 7000 27.018614 -0.52423469 0.72502751 3.8840141 4.7780958 -2.3916014 0.24952572 - 8000 29.682494 -1.0940368 0.76113051 3.1089345 4.9357863 -2.5662256 0.14112613 - 9000 27.853918 -0.7913741 0.79503268 4.2177256 4.146792 -2.00475 0.090585666 - 10000 27.13754 -0.80551128 1.1325023 4.4718283 5.2460631 -3.4947725 0.11893125 - 11000 28.277434 -1.4897448 0.90075953 4.1895717 4.3594269 -1.9553119 0.090222212 - 12000 28.630973 -1.222206 0.67796385 3.3905661 4.9691334 -2.9052721 0.13897658 - 13000 28.593007 -0.95684026 0.75585196 3.7242568 4.7417932 -2.3893117 0.2074121 - 14000 26.147115 -0.6026921 0.93591488 5.1292829 4.9821952 -2.2571835 0.11872421 - 15000 26.29432 -0.82424162 1.048979 4.5569495 5.1189308 -2.9750422 0.16195676 - 16000 29.189992 -0.80998247 0.74093508 3.8299275 4.4536688 -2.5497538 0.19155639 - 17000 25.878012 -0.3519646 1.0988924 4.7359591 5.3923098 -2.7211029 0.13405223 - 18000 27.726135 -0.28229987 0.63072344 4.1777888 4.7237271 -2.2177157 0.15939372 - 19000 27.153504 -0.66477422 0.77910129 4.2036117 5.113851 -2.3494315 0.094793307 - 20000 28.044833 -1.2835827 0.88745367 3.9955526 4.5077788 -3.0116467 0.17197859 - 21000 27.205696 -0.74090037 1.0023251 4.3421733 4.912671 -2.3473271 0.26089356 - 22000 27.385785 -0.93740972 0.84554838 4.562743 4.883866 -2.2110955 0.11573301 - 23000 27.05534 -0.95605442 0.96719024 3.9277618 5.0359014 -2.6135949 0.21368061 - 24000 28.273378 -0.97543103 0.8983443 4.2067985 4.4782971 -2.4230505 0.30311692 - 25000 27.477789 -0.20383849 0.8380706 3.8037992 4.8312504 -2.5831791 0.093843746 - 26000 30.344199 -1.9773473 0.92882437 3.7821405 4.5176677 -2.3020968 0.2194307 - 27000 27.32767 -0.9803839 0.92988865 3.7611603 5.0328211 -2.4647656 0.18213622 - 28000 27.34208 -1.037938 0.74488346 4.1727342 4.7056812 -2.2718346 0.17741362 - 29000 27.682777 -0.51006495 0.57074224 4.7332237 4.7080462 -2.0491512 0.2130517 - 30000 24.925731 0.13670248 0.84976065 4.4143762 6.0677158 -3.5479173 0.28059419 - 31000 28.623419 -0.90725708 1.0710501 3.6930688 4.6639301 -2.2225373 0.20988139 - 32000 27.732286 -1.1948367 0.89230134 4.4398373 4.8923907 -3.5849327 0.49167488 - 33000 28.800772 -1.5319589 0.93455495 4.1634728 4.6107706 -2.3503486 0.22636535 - 34000 27.374398 -1.0957453 0.89450276 3.9829508 4.991786 -2.3548834 0.15869465 - 35000 28.38753 -0.89261166 0.90000776 3.536864 4.4293294 -2.4218118 0.10640557 - 36000 27.713974 0.088038031 0.85190574 3.8969601 4.6256355 -2.7935475 0.34671662 - 37000 29.13007 -1.378597 0.74412556 3.131538 4.6458653 -2.9373734 0.38035616 - 38000 28.556573 -1.4055344 1.139984 4.0035753 4.2938358 -2.489329 0.25338326 - 39000 26.447036 -1.1829705 0.87032438 5.0804461 4.5772023 -2.7346466 0.32165802 - 40000 27.991454 -0.64295679 0.61020872 4.165871 4.4623087 -2.2244194 0.13826991 - 41000 29.483296 -1.2400745 0.66926627 3.3473666 4.5766617 -2.3051145 0.12171554 - 42000 26.948627 -1.2162288 1.1440628 4.3993073 5.1176533 -2.4734485 0.15497709 - 43000 28.04459 -0.26543193 0.83647367 3.5160747 4.6964397 -2.2805068 0.12618821 - 44000 28.213608 -1.216128 0.9132792 4.0206483 4.9483599 -2.3387049 0.10132022 - 45000 28.283506 -1.0390766 0.86113772 4.504509 4.7209088 -2.3043085 0.14588362 - 46000 27.433853 -0.57912107 0.78448334 4.5998579 5.1181394 -2.6165094 0.18722528 - 47000 27.552939 -1.1128925 0.80087638 4.3448001 4.8062869 -2.4296883 0.2702479 - 48000 28.874034 -1.3242519 0.71770727 3.5648565 4.4671824 -2.2608958 0.16115978 - 49000 29.216186 -1.2210307 0.76937497 3.9260628 4.7550577 -2.7316081 0.085505664 - 50000 28.065856 -1.1545547 0.86953819 4.4137666 4.732157 -2.4450867 0.23320539 - 51000 26.308975 -0.99728352 0.90408444 4.2400186 5.6340425 -2.2090554 0.079882158 - 52000 28.517571 -1.5027398 0.83520278 3.8176552 4.3001251 -2.0731682 0.1665375 - 53000 28.77579 -1.3564268 0.97253881 3.6866407 4.8532347 -2.5330776 0.17668411 - 54000 29.135315 -1.0994106 0.67605671 3.6819254 4.3134408 -1.9796929 0.076951331 - 55000 26.168938 -0.76247492 0.88784685 4.6533473 6.0484793 -2.1334561 0.036876985 - 56000 27.471775 -0.68648837 1.0576168 4.0354311 4.4767052 -2.2368959 0.24950568 - 57000 29.787083 -1.4914384 1.0702944 3.5388133 4.5173097 -2.6694464 0.27937092 - 58000 28.705448 -1.3016617 0.63337853 3.9552713 4.4119825 -1.8774657 0.17540021 - 59000 29.130155 -0.91647363 0.84384883 3.1076903 4.5346348 -2.3457338 0.16674486 - 60000 26.874199 -0.81598034 1.3432151 5.1322624 4.9545484 -2.9566615 0.25950486 - 61000 27.401306 -0.82895856 1.1636949 4.020154 4.5745928 -2.601466 0.18061051 - 62000 28.930313 -1.5231967 0.85173243 4.3517328 4.4878662 -2.5859205 0.1755493 - 63000 26.56874 0.026147233 0.60836216 4.4231618 4.4390677 -2.1721849 0.08594237 - 64000 26.729023 -0.76953985 0.76734633 4.5104288 5.0886456 -2.2118551 0.11339216 - 65000 28.900471 -1.3901477 0.86194657 4.2774976 4.498325 -2.3672362 0.20668335 - 66000 26.884253 -0.21198879 0.98509625 4.0843117 4.4344172 -2.3289416 0.23631017 - 67000 27.210888 -0.84075559 1.0396559 4.7253607 4.4314589 -2.2985702 0.19326507 - 68000 28.042102 -1.1898715 1.053534 3.8748712 4.4358449 -2.3998723 0.2431659 - 69000 28.939141 -1.6968936 0.98155912 4.0460838 5.0075204 -2.5547087 0.28645131 - 70000 27.15577 -0.85202797 1.1469079 4.7645212 4.6133209 -2.3410451 0.086576572 - 71000 25.507417 -0.27780727 0.95157881 4.8759406 4.853401 -2.9598705 0.41011008 - 72000 29.804703 -1.4847015 0.96345767 3.6797304 4.3678377 -2.4594626 0.14480206 - 73000 28.602798 -1.4906143 0.72497266 4.2442974 4.5360598 -2.3621638 0.14385651 - 74000 28.4928 -0.91319873 1.0377472 3.8033127 4.3991601 -2.4051911 0.095567428 - 75000 26.38168 -0.70733237 1.1557817 5.697939 4.5935618 -2.4285007 0.058980519 - 76000 27.16626 -0.83631031 0.84844246 4.7460887 4.5801472 -2.1260014 0.12845946 - 77000 29.040661 -1.3089499 0.80285084 4.664804 4.5215895 -2.6861939 0.13215598 - 78000 27.477871 -1.0600977 0.88595045 4.6264017 5.4095605 -2.474411 0.10987174 - 79000 26.151797 -0.55779685 0.91382436 4.99964 4.9184022 -2.2547241 0.22854038 - 80000 28.14523 -0.54460026 0.8982411 3.5374555 4.3785673 -2.3196807 0.088567964 - 81000 29.029941 -1.6467789 0.79042284 3.7269899 4.7407998 -2.3795824 0.1408727 - 82000 27.920287 -0.72798032 1.0076975 3.4449461 4.5621371 -2.8239074 0.25103454 - 83000 29.131054 -1.114367 0.76887285 3.459639 4.5163922 -2.607825 0.19991648 - 84000 28.249768 -0.69944068 1.0510846 4.0436296 4.6430538 -2.4213355 0.077299966 - 85000 28.06888 -0.62132922 0.91829312 4.1294147 4.3099557 -2.354063 0.15866186 - 86000 28.664264 -1.1022906 0.87831695 4.5773522 4.6045802 -2.9206875 0.33950063 - 87000 27.960967 -1.2852756 0.77694253 3.9011301 4.9114139 -3.2374868 0.3068138 - 88000 27.190678 -1.2803268 1.1545301 4.5769709 5.2404761 -2.3825838 0.10356039 - 89000 26.792931 -0.44516641 1.0236244 4.2007253 4.7098685 -2.3608551 0.034447062 - 90000 27.173991 -0.87185611 1.065719 4.1953618 4.6856408 -2.6539232 0.16957757 - 91000 28.626528 -1.239257 0.89524651 4.7048012 4.6344201 -2.7367901 0.43534143 - 92000 27.661812 -1.109044 0.92817391 5.0294489 4.3890711 -2.4108669 0.12570139 - 93000 28.156793 -1.0820907 0.92812693 4.938385 4.4901426 -2.4023366 0.30135781 - 94000 28.842149 -1.3524969 1.1451109 4.3125908 4.6959035 -2.6747199 0.2254607 - 95000 27.862247 -1.2119045 1.0218976 4.2614082 4.4931316 -2.6902934 0.16345201 - 96000 27.084973 -0.93738328 1.3984324 4.5647189 4.4232205 -2.2834097 0.11217888 - 97000 27.587078 -0.89397255 0.78218462 3.8944421 4.3981479 -2.4205318 0.16570942 - 98000 27.981746 -1.2380545 0.84847869 4.311441 4.7340377 -2.4270441 0.023565612 - 99000 27.476625 -0.8569146 0.82550381 4.1656963 4.4064921 -2.4169708 0.160814 - 100000 26.121325 -0.63610855 1.0803389 4.9257118 4.7073263 -2.4010334 0.066303044 -Loop time of 2.693 on 4 procs for 100000 steps with 34 atoms - -Performance: 6416.646 ns/day, 0.004 hours/ns, 37133.367 timesteps/s -98.4% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.065478 | 0.2501 | 0.63682 | 45.6 | 9.29 -Bond | 0.066944 | 0.44772 | 0.88814 | 53.7 | 16.63 -Neigh | 0.0076509 | 0.0077319 | 0.0078275 | 0.1 | 0.29 -Comm | 0.57917 | 1.4166 | 1.9823 | 46.9 | 52.60 -Output | 0.0033755 | 0.0035856 | 0.0037644 | 0.2 | 0.13 -Modify | 0.03866 | 0.1366 | 0.23978 | 24.6 | 5.07 -Other | | 0.4306 | | | 15.99 - -Nlocal: 8.5 ave 15 max 2 min -Histogram: 1 1 0 0 0 0 0 0 1 1 -Nghost: 25.5 ave 32 max 19 min -Histogram: 1 1 0 0 0 0 0 0 1 1 -Neighs: 98.75 ave 257 max 18 min -Histogram: 1 1 1 0 0 0 0 0 0 1 - -Total # of neighbors = 395 -Ave neighs/atom = 11.6176 -Ave special neighs/atom = 9.52941 -Neighbor list builds = 294 -Dangerous builds = 0 -Total wall time: 0:00:02 diff --git a/examples/mscg/log.31Mar17.g++.1 b/examples/mscg/log.31Mar17.g++.1 new file mode 100644 index 0000000000..c67bc483db --- /dev/null +++ b/examples/mscg/log.31Mar17.g++.1 @@ -0,0 +1,145 @@ +LAMMPS (13 Apr 2017) +units real +atom_style full +pair_style zero 10.0 + +read_data data.meoh + orthogonal box = (-20.6917 -20.6917 -20.6917) to (20.6917 20.6917 20.6917) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 1000 atoms + 0 = max # of 1-2 neighbors + 0 = max # of 1-3 neighbors + 0 = max # of 1-4 neighbors + 1 = max # of special neighbors +pair_coeff * * + +thermo 1 +thermo_style custom step + +# Test 1a: range finder functionality +fix 1 all mscg 1 range on +rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair zero, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.794 | 5.794 | 5.794 Mbytes +Step + 0 + 250 + 500 + 750 + 1000 + 1250 + 1500 + 1750 + 2000 + 2250 + 2500 + 2750 + 3000 + 3250 + 3500 + 3750 + 4000 + 4250 + 4500 +Loop time of 0.581537 on 1 procs for 19 steps with 1000 atoms + +Performance: 2.823 ns/day, 8.502 hours/ns, 32.672 timesteps/s +99.2% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Bond | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.5815 | | |100.00 + +Nlocal: 1000 ave 1000 max 1000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2934 ave 2934 max 2934 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 50654 ave 50654 max 50654 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 50654 +Ave neighs/atom = 50.654 +Ave special neighs/atom = 0 +Neighbor list builds = 0 +Dangerous builds = 0 +print "TEST_1a mscg range finder" +TEST_1a mscg range finder +unfix 1 + +# Test 1b: force matching functionality +fix 1 all mscg 1 +rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz +Per MPI rank memory allocation (min/avg/max) = 5.794 | 5.794 | 5.794 Mbytes +Step + 0 + 250 + 500 + 750 + 1000 + 1250 + 1500 + 1750 + 2000 + 2250 + 2500 + 2750 + 3000 + 3250 + 3500 + 3750 + 4000 + 4250 + 4500 +Loop time of 0.841917 on 1 procs for 19 steps with 1000 atoms + +Performance: 1.950 ns/day, 12.309 hours/ns, 22.568 timesteps/s +99.8% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Bond | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.8419 | | |100.00 + +Nlocal: 1000 ave 1000 max 1000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2934 ave 2934 max 2934 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 50654 ave 50654 max 50654 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 50654 +Ave neighs/atom = 50.654 +Ave special neighs/atom = 0 +Neighbor list builds = 0 +Dangerous builds = 0 +print "TEST_1b mscg force matching" +TEST_1b mscg force matching + +print TEST_DONE +TEST_DONE +Total wall time: 0:00:01 diff --git a/examples/python/README.pair_python b/examples/python/README.pair_python new file mode 100644 index 0000000000..3844a2676e --- /dev/null +++ b/examples/python/README.pair_python @@ -0,0 +1,41 @@ +This folder contains several LAMMPS input scripts and a python module +file py_pot.py to demonstrate the use of the pair style python. + +in.pair_python_melt: +This is a version of the melt example using the python pair style. The first +part of the output should have identical energies, temperature and pressure +than the melt example. The following two sections then demonstrate how to +restart with pair style python from a restart file and a data file. + +in.pair_python_hybrid: +This versions shows how to mix regular pair styles with a python pair style. +However, in this case both potentials are the same, so the energies and +pressure in the output should be identical to that of the previous example. + +in.pair_python_spce: +This input shows a simulation of small bulk water system with the SPC/E +water potential. Since the python pair style does not support computing +coulomb contributions, pair style hybrid/overload is used to combine +the python style containing the Lennard-Jones part with the long-range coulomb. +Same as for the previous example, it also showcases restarting. + +in.pair_python_table: +This input demonstrates the use of using the python pair style to build +a table file for use with pair style table. This will run much faster +than the python pair style. This example tabulates the melt example from +above. Note that tabulation is approximative, so the output will only +agree with the melt result to some degree. + +in.pair_python_coulomb: +This is another tabulation example, this time for the SPC/E water example +with cutoff coulomb interactions. +Please note, that tabulating long-range coulomb has a systematic error in +forces and energies for all systems with bonds, angle and dihedrals. +In this case, this will only affect the energies, since the water molecules +are held rigid with fix shake. To enable long-range coulomb the coul/cut +style needs to be replaced with coul/long, a suitable kspace style added +and the pppm keyword added to the table pair style definition. + +in.pair_python_long: +The final example shows how to combine long-range coulomb with tabulation +for only the short range interactions via pair style hybrid/overlay. diff --git a/examples/python/data.spce b/examples/python/data.spce new file mode 100644 index 0000000000..1e8a4a0913 --- /dev/null +++ b/examples/python/data.spce @@ -0,0 +1,9029 @@ +LAMMPS Atom File + + 4500 atoms + 3000 bonds + 1500 angles + 0 dihedrals + 0 impropers + + 2 atom types + 1 bond types + 1 angle types + + 0.02645 35.53280 xlo xhi + 0.02645 35.53280 ylo yhi + 0.02641 35.47360 zlo zhi + +Masses + + 1 15.9994 + 2 1.00794 + +Atoms + + 1 1 1 -0.8472 12.12456 28.09298 22.27452 0 1 0 + 2 1 2 0.4236 12.53683 28.75606 22.89928 0 1 0 + 3 1 2 0.4236 11.49482 28.56390 21.65678 0 1 0 + 4 2 1 -0.8472 1.17079 29.37777 23.72984 1 -1 0 + 5 2 2 0.4236 1.91804 29.48483 23.07399 1 -1 0 + 6 2 2 0.4236 0.40074 28.91964 23.28586 1 -1 0 + 7 3 1 -0.8472 29.68313 14.73733 21.62793 -1 0 0 + 8 3 2 0.4236 30.54284 14.93741 21.15800 -1 0 0 + 9 3 2 0.4236 29.73135 15.07344 22.56848 -1 0 0 + 10 4 1 -0.8472 10.87272 7.00153 35.10920 0 1 0 + 11 4 2 0.4236 11.11057 6.21663 34.53712 0 1 0 + 12 4 2 0.4236 9.95658 7.32301 34.86983 0 1 0 + 13 5 1 -0.8472 9.46588 6.43648 19.79899 0 1 0 + 14 5 2 0.4236 9.04840 6.32936 18.89668 0 1 0 + 15 5 2 0.4236 10.31722 5.91326 19.83657 0 1 0 + 16 6 1 -0.8472 3.17905 29.69801 22.11922 0 0 0 + 17 6 2 0.4236 3.19240 30.63289 21.76465 0 0 0 + 18 6 2 0.4236 3.38651 29.05797 21.37944 0 0 0 + 19 7 1 -0.8472 23.38618 11.29979 30.78238 0 0 0 + 20 7 2 0.4236 23.69882 10.46688 31.23897 0 0 0 + 21 7 2 0.4236 24.17354 11.79208 30.41132 0 0 0 + 22 8 1 -0.8472 11.03761 10.46106 30.14741 0 1 0 + 23 8 2 0.4236 10.94682 11.45112 30.25464 0 1 0 + 24 8 2 0.4236 11.60678 10.09680 30.88450 0 1 0 + 25 9 1 -0.8472 26.24001 25.40937 21.06754 0 0 0 + 26 9 2 0.4236 25.67045 26.09258 21.52442 0 0 0 + 27 9 2 0.4236 26.22311 25.56759 20.08030 0 0 0 + 28 10 1 -0.8472 10.84087 35.33915 19.78347 0 -1 0 + 29 10 2 0.4236 10.20697 0.54421 19.48018 0 0 0 + 30 10 2 0.4236 11.06253 34.74012 19.01405 0 -1 0 + 31 11 1 -0.8472 20.07383 4.95885 33.62365 0 1 0 + 32 11 2 0.4236 19.77359 5.87080 33.90322 0 1 0 + 33 11 2 0.4236 20.68149 4.57954 34.32139 0 1 0 + 34 12 1 -0.8472 12.43897 28.56656 17.39837 0 0 0 + 35 12 2 0.4236 12.80348 27.99505 18.13354 0 0 0 + 36 12 2 0.4236 11.63887 28.12177 16.99597 0 0 0 + 37 13 1 -0.8472 14.80338 7.14199 1.42116 0 0 0 + 38 13 2 0.4236 14.86001 6.68442 0.53382 0 0 0 + 39 13 2 0.4236 14.13589 6.67036 1.99737 0 0 0 + 40 14 1 -0.8472 15.87968 22.18330 24.13468 1 -1 0 + 41 14 2 0.4236 15.97100 22.71526 23.29285 1 -1 0 + 42 14 2 0.4236 16.69618 22.30846 24.69826 1 -1 0 + 43 15 1 -0.8472 13.29194 18.30473 12.37157 1 0 0 + 44 15 2 0.4236 12.55838 18.63231 12.96701 1 0 0 + 45 15 2 0.4236 13.24823 18.78335 11.49467 1 0 0 + 46 16 1 -0.8472 20.27409 23.94157 15.50212 0 0 0 + 47 16 2 0.4236 20.17851 24.67734 14.83167 0 0 0 + 48 16 2 0.4236 20.62006 23.12024 15.04857 0 0 0 + 49 17 1 -0.8472 30.10203 10.78182 14.24321 1 0 0 + 50 17 2 0.4236 29.40171 11.00523 13.56532 1 0 0 + 51 17 2 0.4236 29.70120 10.21329 14.96159 1 0 0 + 52 18 1 -0.8472 19.71525 12.98975 25.40578 0 0 0 + 53 18 2 0.4236 20.21522 13.35852 26.18938 0 0 0 + 54 18 2 0.4236 18.75253 12.87297 25.64962 0 0 0 + 55 19 1 -0.8472 4.22362 18.99305 32.62946 1 0 0 + 56 19 2 0.4236 4.05067 18.17865 32.07556 1 0 0 + 57 19 2 0.4236 3.38513 19.53353 32.69833 1 0 0 + 58 20 1 -0.8472 17.67279 30.86798 34.86933 1 -1 0 + 59 20 2 0.4236 17.18866 31.74218 34.90528 1 -1 0 + 60 20 2 0.4236 18.18607 30.80612 34.01339 1 -1 0 + 61 21 1 -0.8472 7.49194 27.84024 34.65598 0 0 0 + 62 21 2 0.4236 7.36412 27.37987 33.77752 0 0 0 + 63 21 2 0.4236 7.83650 27.18529 35.32848 0 0 0 + 64 22 1 -0.8472 9.58199 8.75878 28.38767 0 0 0 + 65 22 2 0.4236 8.89931 8.96106 27.68557 0 0 0 + 66 22 2 0.4236 9.61451 9.50981 29.04713 0 0 0 + 67 23 1 -0.8472 18.15447 7.97877 4.02967 1 0 0 + 68 23 2 0.4236 17.65379 8.02465 3.16529 1 0 0 + 69 23 2 0.4236 17.56073 7.59903 4.73904 1 0 0 + 70 24 1 -0.8472 13.45467 10.30195 21.94603 0 0 0 + 71 24 2 0.4236 14.12655 11.01716 21.75366 0 0 0 + 72 24 2 0.4236 13.15542 10.37304 22.89754 0 0 0 + 73 25 1 -0.8472 28.77370 1.83495 6.23711 0 0 0 + 74 25 2 0.4236 29.55410 1.23316 6.06742 0 0 0 + 75 25 2 0.4236 28.43863 1.69170 7.16831 0 0 0 + 76 26 1 -0.8472 21.17410 3.00906 4.56251 0 1 0 + 77 26 2 0.4236 21.00772 2.96011 5.54734 0 1 0 + 78 26 2 0.4236 21.17488 3.96581 4.27178 0 1 0 + 79 27 1 -0.8472 15.86257 20.77629 10.34675 1 -1 0 + 80 27 2 0.4236 15.84751 19.99231 10.96732 1 -1 0 + 81 27 2 0.4236 15.76759 20.45695 9.40393 1 -1 0 + 82 28 1 -0.8472 19.37283 6.41248 28.33230 0 0 0 + 83 28 2 0.4236 19.86925 6.14505 27.50645 0 0 0 + 84 28 2 0.4236 19.23659 7.40311 28.33322 0 0 0 + 85 29 1 -0.8472 19.69874 26.80111 22.56675 -1 -1 0 + 86 29 2 0.4236 20.51873 26.54536 22.05471 -1 -1 0 + 87 29 2 0.4236 19.52519 26.12131 23.27927 -1 -1 0 + 88 30 1 -0.8472 10.44934 1.95847 4.23874 1 1 0 + 89 30 2 0.4236 10.99436 1.49085 4.93460 1 1 0 + 90 30 2 0.4236 10.84502 2.85816 4.05448 1 1 0 + 91 31 1 -0.8472 6.35411 29.19722 23.17920 0 0 0 + 92 31 2 0.4236 5.50252 29.65394 23.43641 0 0 0 + 93 31 2 0.4236 7.01350 29.87731 22.85887 0 0 0 + 94 32 1 -0.8472 27.70305 33.63868 1.45545 0 0 0 + 95 32 2 0.4236 27.45511 34.56848 1.72731 0 0 0 + 96 32 2 0.4236 28.23820 33.21008 2.18338 0 0 0 + 97 33 1 -0.8472 34.54150 25.90721 10.97268 0 0 0 + 98 33 2 0.4236 34.26945 25.07242 10.49403 0 0 0 + 99 33 2 0.4236 34.74630 26.62266 10.30478 0 0 0 + 100 34 1 -0.8472 35.13701 10.35159 32.75388 -1 0 0 + 101 34 2 0.4236 35.31674 9.89658 31.88174 -1 0 0 + 102 34 2 0.4236 35.40693 9.74994 33.50557 -1 0 0 + 103 35 1 -0.8472 19.45549 25.22953 13.06888 1 0 0 + 104 35 2 0.4236 18.63554 25.79082 12.95680 1 0 0 + 105 35 2 0.4236 19.57263 24.64638 12.26505 1 0 0 + 106 36 1 -0.8472 8.76637 34.60601 24.20146 0 0 0 + 107 36 2 0.4236 9.50956 34.70873 24.86259 0 0 0 + 108 36 2 0.4236 9.11640 34.17220 23.37125 0 0 0 + 109 37 1 -0.8472 18.52185 34.00287 24.76220 0 0 0 + 110 37 2 0.4236 19.50193 33.87409 24.61123 0 0 0 + 111 37 2 0.4236 18.00811 33.44318 24.11196 0 0 0 + 112 38 1 -0.8472 5.46879 16.75651 12.09359 0 -1 0 + 113 38 2 0.4236 5.19937 17.67659 11.80934 0 -1 0 + 114 38 2 0.4236 5.65240 16.75172 13.07654 0 -1 0 +115 39 1 -0.8472 26.78187 26.54273 35.17124 1 0 0 +116 39 2 0.4236 27.38399 26.93341 0.42029 1 0 1 +117 39 2 0.4236 27.18089 25.69431 34.82353 1 0 0 + 118 40 1 -0.8472 14.04248 15.27305 12.41517 0 0 0 + 119 40 2 0.4236 14.36157 15.76500 13.22517 0 0 0 + 120 40 2 0.4236 14.30963 15.77579 11.59307 0 0 0 + 121 41 1 -0.8472 6.75576 6.34672 6.04938 1 1 0 + 122 41 2 0.4236 7.58170 5.78427 6.08735 1 1 0 + 123 41 2 0.4236 5.98388 5.83092 6.42094 1 1 0 + 124 42 1 -0.8472 10.86576 34.93065 25.98166 0 0 0 + 125 42 2 0.4236 10.69405 35.32452 26.88460 0 0 0 + 126 42 2 0.4236 11.59169 35.44574 25.52591 0 0 0 + 127 43 1 -0.8472 18.34531 30.51202 26.27445 0 0 0 + 128 43 2 0.4236 18.04166 30.29551 25.34662 0 0 0 + 129 43 2 0.4236 19.17552 31.06784 26.23227 0 0 0 + 130 44 1 -0.8472 13.18486 0.77946 20.62491 -1 1 0 + 131 44 2 0.4236 13.44906 0.56426 21.56501 -1 1 0 + 132 44 2 0.4236 12.29152 0.37510 20.42885 -1 1 0 + 133 45 1 -0.8472 24.47790 24.05885 28.20937 -1 0 0 + 134 45 2 0.4236 24.15706 23.14172 27.97301 -1 0 0 + 135 45 2 0.4236 23.74391 24.71952 28.05213 -1 0 0 +136 46 1 -0.8472 30.79103 15.33785 34.86738 0 0 0 +137 46 2 0.4236 31.17614 15.32312 0.34290 0 0 1 +138 46 2 0.4236 29.97833 14.75605 34.83605 0 0 0 + 139 47 1 -0.8472 18.30892 19.76908 34.23734 0 0 0 + 140 47 2 0.4236 17.75981 20.19846 33.52035 0 0 0 + 141 47 2 0.4236 19.04827 19.24096 33.81970 0 0 0 + 142 48 1 -0.8472 24.18923 16.21113 25.53917 0 0 0 + 143 48 2 0.4236 24.35093 17.17602 25.33219 0 0 0 + 144 48 2 0.4236 24.25573 16.06630 26.52637 0 0 0 + 145 49 1 -0.8472 9.29176 8.02479 32.23837 -1 -1 0 + 146 49 2 0.4236 10.25650 7.88688 32.46243 -1 -1 0 + 147 49 2 0.4236 8.95956 7.24816 31.70319 -1 -1 0 + 148 50 1 -0.8472 5.32982 1.15354 27.64551 0 0 0 + 149 50 2 0.4236 4.51900 0.59091 27.48437 0 0 0 + 150 50 2 0.4236 5.05010 2.09452 27.83594 0 0 0 + 151 51 1 -0.8472 22.89850 21.33836 11.63894 1 0 0 + 152 51 2 0.4236 23.24391 20.51986 12.09802 1 0 0 + 153 51 2 0.4236 22.43572 21.07543 10.79239 1 0 0 + 154 52 1 -0.8472 16.88462 32.60779 23.16332 -1 0 0 + 155 52 2 0.4236 15.90624 32.80151 23.09101 -1 0 0 + 156 52 2 0.4236 17.01667 31.65114 23.42276 -1 0 0 + 157 53 1 -0.8472 29.24409 7.09722 23.70701 -1 0 0 + 158 53 2 0.4236 28.47748 7.53121 24.18023 -1 0 0 + 159 53 2 0.4236 29.54220 7.68009 22.95113 -1 0 0 + 160 54 1 -0.8472 34.29952 6.85677 2.08156 -1 0 0 + 161 54 2 0.4236 34.97347 6.89704 1.34390 -1 0 0 + 162 54 2 0.4236 33.70962 7.66305 2.03849 -1 0 0 + 163 55 1 -0.8472 32.95408 29.26891 19.25970 -1 -1 0 + 164 55 2 0.4236 32.57416 28.37248 19.48784 -1 -1 0 + 165 55 2 0.4236 33.66659 29.16225 18.56621 -1 -1 0 + 166 56 1 -0.8472 9.78186 33.73160 21.96701 0 -1 0 + 167 56 2 0.4236 9.24351 33.11436 21.39330 0 -1 0 + 168 56 2 0.4236 10.15276 34.47066 21.40475 0 -1 0 + 169 57 1 -0.8472 7.86139 6.97451 8.72713 0 0 0 + 170 57 2 0.4236 8.16632 6.08021 9.05452 0 0 0 + 171 57 2 0.4236 7.15204 6.85123 8.03315 0 0 0 + 172 58 1 -0.8472 34.25223 27.81706 31.79312 -1 -1 0 + 173 58 2 0.4236 34.70046 28.60821 31.37704 -1 -1 0 + 174 58 2 0.4236 33.75216 28.10807 32.60872 -1 -1 0 + 175 59 1 -0.8472 34.94048 26.84844 19.52041 -1 -1 0 + 176 59 2 0.4236 35.40459 26.00577 19.79339 -1 -1 0 + 177 59 2 0.4236 34.15050 26.62404 18.94986 -1 -1 0 + 178 60 1 -0.8472 21.76411 32.20568 20.89158 0 0 0 + 179 60 2 0.4236 22.66239 31.82480 21.11072 0 0 0 + 180 60 2 0.4236 21.76667 33.18956 21.07031 0 0 0 + 181 61 1 -0.8472 34.30787 3.43694 14.24158 -1 0 0 + 182 61 2 0.4236 34.88829 2.82261 13.70711 -1 0 0 + 183 61 2 0.4236 34.87977 4.10183 14.72200 -1 0 0 + 184 62 1 -0.8472 17.88110 18.91029 14.42187 0 1 0 + 185 62 2 0.4236 17.92538 18.97676 15.41864 0 1 0 + 186 62 2 0.4236 18.59574 19.48109 14.01766 0 1 0 + 187 63 1 -0.8472 19.08402 14.22906 20.78379 0 0 0 + 188 63 2 0.4236 19.62169 13.82628 21.52446 0 0 0 + 189 63 2 0.4236 18.62450 13.50292 20.27246 0 0 0 + 190 64 1 -0.8472 7.85997 17.97828 9.48138 1 0 0 + 191 64 2 0.4236 7.51843 18.85870 9.15243 1 0 0 + 192 64 2 0.4236 8.12328 17.41050 8.70147 1 0 0 + 193 65 1 -0.8472 0.30367 23.18327 0.38100 1 -1 0 + 194 65 2 0.4236 35.34479 24.06840 0.38922 0 -1 0 + 195 65 2 0.4236 35.13158 22.44974 0.42108 0 -1 0 + 196 66 1 -0.8472 4.53675 21.21621 29.86203 1 0 0 + 197 66 2 0.4236 5.08827 22.03780 29.71779 1 0 0 + 198 66 2 0.4236 5.02017 20.59411 30.47781 1 0 0 + 199 67 1 -0.8472 3.76321 1.66558 34.11622 0 0 0 + 200 67 2 0.4236 4.72980 1.83277 33.92193 0 0 0 + 201 67 2 0.4236 3.56027 1.93821 35.05667 0 0 0 + 202 68 1 -0.8472 35.17951 34.70220 7.53093 0 0 0 + 203 68 2 0.4236 34.22070 34.47397 7.36203 0 0 0 + 204 68 2 0.4236 0.09341 33.98533 8.08720 1 0 0 + 205 69 1 -0.8472 24.82000 8.21485 16.09338 0 0 0 + 206 69 2 0.4236 25.55821 8.87272 15.94450 0 0 0 + 207 69 2 0.4236 25.00950 7.68219 16.91816 0 0 0 + 208 70 1 -0.8472 4.18282 28.52828 29.70840 1 0 0 + 209 70 2 0.4236 5.11675 28.38566 30.03614 1 0 0 + 210 70 2 0.4236 4.20438 29.05790 28.86047 1 0 0 + 211 71 1 -0.8472 26.43717 31.27303 3.91741 -1 0 0 + 212 71 2 0.4236 26.51838 30.56852 3.21243 -1 0 0 + 213 71 2 0.4236 27.32007 31.72670 4.03853 -1 0 0 + 214 72 1 -0.8472 35.11244 7.70363 27.08017 -1 0 0 + 215 72 2 0.4236 35.17419 7.38841 28.02717 -1 0 0 + 216 72 2 0.4236 34.54388 7.07077 26.55463 -1 0 0 + 217 73 1 -0.8472 21.45404 34.05655 2.90167 0 0 0 + 218 73 2 0.4236 21.02487 33.76274 3.75574 0 0 0 + 219 73 2 0.4236 22.28101 34.57992 3.10701 0 0 0 + 220 74 1 -0.8472 31.21242 3.31299 25.80396 -1 1 0 + 221 74 2 0.4236 30.72513 4.00625 25.27307 -1 1 0 + 222 74 2 0.4236 31.51511 3.70889 26.67093 -1 1 0 + 223 75 1 -0.8472 7.36998 1.04818 25.83115 0 0 0 + 224 75 2 0.4236 7.49443 0.23812 25.25822 0 0 0 + 225 75 2 0.4236 6.65222 0.87278 26.50493 0 0 0 + 226 76 1 -0.8472 30.34479 6.09271 15.73480 -1 1 0 + 227 76 2 0.4236 30.87025 5.55980 15.07161 -1 1 0 + 228 76 2 0.4236 30.41783 5.66752 16.63693 -1 1 0 + 229 77 1 -0.8472 34.94002 25.15376 32.03668 -1 0 0 + 230 77 2 0.4236 34.65462 24.57426 32.80000 -1 0 0 + 231 77 2 0.4236 34.57715 26.07671 32.16507 -1 0 0 + 232 78 1 -0.8472 0.48609 26.41083 7.95690 1 0 0 + 233 78 2 0.4236 35.24442 26.06137 7.39276 0 0 0 + 234 78 2 0.4236 0.34398 27.38491 8.13268 1 0 0 + 235 79 1 -0.8472 28.55527 15.83548 29.53063 0 0 0 + 236 79 2 0.4236 28.35434 16.81126 29.61648 0 0 0 + 237 79 2 0.4236 29.51472 15.71387 29.27644 0 0 0 + 238 80 1 -0.8472 18.14783 14.69040 4.78991 0 0 0 + 239 80 2 0.4236 18.53030 15.52266 5.19110 0 0 0 + 240 80 2 0.4236 17.28957 14.90833 4.32534 0 0 0 + 241 81 1 -0.8472 23.98866 17.79905 4.00089 0 0 0 + 242 81 2 0.4236 24.91076 17.80239 3.61409 0 0 0 + 243 81 2 0.4236 23.56034 16.91103 3.83379 0 0 0 + 244 82 1 -0.8472 27.78613 18.39989 29.82640 0 0 0 + 245 82 2 0.4236 27.08378 18.93614 30.29448 0 0 0 + 246 82 2 0.4236 28.35313 19.00694 29.26967 0 0 0 + 247 83 1 -0.8472 32.55865 20.99313 23.15964 -1 0 0 + 248 83 2 0.4236 33.14482 20.54010 22.48798 -1 0 0 + 249 83 2 0.4236 32.46068 21.95876 22.91902 -1 0 0 + 250 84 1 -0.8472 0.52838 10.68840 20.51354 1 0 0 + 251 84 2 0.4236 35.18409 10.69540 21.03922 0 0 0 + 252 84 2 0.4236 1.10279 11.45563 20.79882 1 0 0 + 253 85 1 -0.8472 24.98681 7.82783 20.94061 0 0 0 + 254 85 2 0.4236 25.90983 7.94912 20.57557 0 0 0 + 255 85 2 0.4236 24.31734 8.12211 20.25857 0 0 0 + 256 86 1 -0.8472 22.84393 20.12521 4.77949 -1 0 0 + 257 86 2 0.4236 23.21948 19.20137 4.85318 -1 0 0 + 258 86 2 0.4236 23.56755 20.75818 4.50442 -1 0 0 + 259 87 1 -0.8472 33.53546 10.01481 10.45635 0 1 0 + 260 87 2 0.4236 32.83119 9.35450 10.71703 0 1 0 + 261 87 2 0.4236 34.10172 10.22977 11.25204 0 1 0 + 262 88 1 -0.8472 16.00583 10.01890 6.93528 0 0 0 + 263 88 2 0.4236 16.41480 10.74688 6.38507 0 0 0 + 264 88 2 0.4236 15.01766 10.16046 6.99384 0 0 0 + 265 89 1 -0.8472 29.81145 30.32235 24.40624 0 0 0 + 266 89 2 0.4236 29.91989 30.02878 25.35598 0 0 0 + 267 89 2 0.4236 29.93572 29.53794 23.79868 0 0 0 + 268 90 1 -0.8472 4.63663 9.89409 32.09045 0 0 0 + 269 90 2 0.4236 4.00394 9.69969 31.34088 0 0 0 + 270 90 2 0.4236 5.49457 9.40350 31.93817 0 0 0 + 271 91 1 -0.8472 32.85747 18.79237 15.26420 -1 0 0 + 272 91 2 0.4236 33.07562 17.88380 15.62037 -1 0 0 + 273 91 2 0.4236 33.39393 19.47988 15.75358 -1 0 0 + 274 92 1 -0.8472 0.61908 7.95150 15.25321 1 1 0 + 275 92 2 0.4236 0.05994 7.85361 16.07647 1 1 0 + 276 92 2 0.4236 35.53017 8.04513 14.45519 0 1 0 + 277 93 1 -0.8472 2.41100 1.86682 23.94422 0 0 0 + 278 93 2 0.4236 2.46310 1.85101 22.94575 0 0 0 + 279 93 2 0.4236 2.78298 1.01413 24.31096 0 0 0 + 280 94 1 -0.8472 35.13183 4.79424 16.91501 0 1 0 + 281 94 2 0.4236 34.79462 5.73186 16.83068 0 1 0 + 282 94 2 0.4236 34.48810 4.25622 17.45916 0 1 0 + 283 95 1 -0.8472 0.97118 31.04426 15.17959 1 0 0 + 284 95 2 0.4236 35.51597 31.29963 15.28033 0 0 0 + 285 95 2 0.4236 1.24465 30.46576 15.94801 1 0 0 + 286 96 1 -0.8472 2.19969 4.72227 17.02777 0 0 0 + 287 96 2 0.4236 2.38681 5.64899 17.35356 0 0 0 + 288 96 2 0.4236 1.21985 4.62094 16.85574 0 0 0 + 289 97 1 -0.8472 11.32464 9.43507 18.23393 0 0 0 + 290 97 2 0.4236 11.35628 9.66113 17.26037 0 0 0 + 291 97 2 0.4236 10.41863 9.65219 18.59726 0 0 0 + 292 98 1 -0.8472 1.17633 29.87451 2.30651 1 -1 0 + 293 98 2 0.4236 0.77781 30.54458 1.68034 1 -1 0 + 294 98 2 0.4236 2.08692 30.17937 2.58550 1 -1 0 + 295 99 1 -0.8472 30.89603 1.46693 1.97982 -1 0 0 + 296 99 2 0.4236 31.66382 1.01488 2.43382 -1 0 0 + 297 99 2 0.4236 30.67056 0.97609 1.13828 -1 0 0 + 298 100 1 -0.8472 5.03295 1.93998 10.31545 1 0 0 + 299 100 2 0.4236 4.31256 1.98624 11.00745 1 0 0 + 300 100 2 0.4236 4.62804 2.03388 9.40598 1 0 0 + 301 101 1 -0.8472 12.08877 2.72082 8.77105 0 0 0 + 302 101 2 0.4236 11.73932 2.99595 9.66666 0 0 0 + 303 101 2 0.4236 12.21940 3.53177 8.20074 0 0 0 + 304 102 1 -0.8472 33.01497 6.97472 32.69727 0 1 0 + 305 102 2 0.4236 32.33382 6.98790 31.96528 0 1 0 + 306 102 2 0.4236 33.92894 7.07439 32.30391 0 1 0 + 307 103 1 -0.8472 24.19758 6.71414 6.62083 0 0 0 + 308 103 2 0.4236 24.48496 5.94255 6.05339 0 0 0 + 309 103 2 0.4236 23.34354 7.09140 6.26267 0 0 0 + 310 104 1 -0.8472 11.20786 33.66002 13.73986 0 0 0 + 311 104 2 0.4236 11.08611 33.96434 12.79516 0 0 0 + 312 104 2 0.4236 12.16057 33.79345 14.01284 0 0 0 + 313 105 1 -0.8472 9.04560 20.24739 13.13311 0 0 0 + 314 105 2 0.4236 8.82766 19.47789 13.73338 0 0 0 + 315 105 2 0.4236 9.27855 19.90464 12.22304 0 0 0 + 316 106 1 -0.8472 8.42921 16.29486 7.43324 1 0 0 + 317 106 2 0.4236 9.19124 15.72058 7.13424 1 0 0 + 318 106 2 0.4236 7.64285 15.71749 7.65276 1 0 0 + 319 107 1 -0.8472 8.18016 30.95703 14.07257 0 -1 0 + 320 107 2 0.4236 7.61764 30.47123 14.74149 0 -1 0 + 321 107 2 0.4236 7.67505 31.74583 13.72246 0 -1 0 + 322 108 1 -0.8472 17.76414 27.72204 30.17860 -1 0 0 + 323 108 2 0.4236 17.65280 28.63562 29.78755 -1 0 0 + 324 108 2 0.4236 17.44856 27.72492 31.12746 -1 0 0 + 325 109 1 -0.8472 17.45769 25.85512 16.78307 0 1 0 + 326 109 2 0.4236 18.01645 26.62724 17.08576 0 1 0 + 327 109 2 0.4236 17.96426 25.00368 16.91877 0 1 0 + 328 110 1 -0.8472 28.59997 12.45467 18.30443 0 0 0 + 329 110 2 0.4236 28.86886 12.50061 17.34240 0 0 0 + 330 110 2 0.4236 28.32838 13.36441 18.61842 0 0 0 + 331 111 1 -0.8472 20.03964 19.60707 21.61488 0 -1 0 + 332 111 2 0.4236 19.21429 19.35873 22.12192 0 -1 0 + 333 111 2 0.4236 20.56705 20.27317 22.14227 0 -1 0 + 334 112 1 -0.8472 24.43021 31.08112 15.27434 0 0 0 + 335 112 2 0.4236 24.67230 31.05676 14.30439 0 0 0 + 336 112 2 0.4236 23.62159 31.65540 15.40173 0 0 0 + 337 113 1 -0.8472 14.03481 4.28377 28.23021 0 1 0 + 338 113 2 0.4236 13.71830 5.06061 27.68592 0 1 0 + 339 113 2 0.4236 14.81307 3.85222 27.77408 0 1 0 + 340 114 1 -0.8472 3.18866 1.92022 1.25228 0 1 0 + 341 114 2 0.4236 3.95739 2.04883 1.87873 0 1 0 + 342 114 2 0.4236 2.56934 1.22782 1.62238 0 1 0 + 343 115 1 -0.8472 22.54465 23.60224 9.46826 0 0 0 + 344 115 2 0.4236 22.81528 22.78339 8.96211 0 0 0 + 345 115 2 0.4236 23.34546 23.99391 9.92131 0 0 0 + 346 116 1 -0.8472 6.44525 3.02083 18.87605 1 0 0 + 347 116 2 0.4236 5.96940 2.76314 19.71693 1 0 0 + 348 116 2 0.4236 5.93293 2.68338 18.08636 1 0 0 + 349 117 1 -0.8472 12.31665 10.94306 26.18378 0 0 0 + 350 117 2 0.4236 11.78278 11.60820 26.70581 0 0 0 + 351 117 2 0.4236 12.52820 10.15648 26.76387 0 0 0 + 352 118 1 -0.8472 8.93649 1.70958 18.81431 -1 0 0 + 353 118 2 0.4236 9.43794 2.41836 19.31046 -1 0 0 + 354 118 2 0.4236 8.03395 2.05639 18.55926 -1 0 0 + 355 119 1 -0.8472 2.23387 20.21809 0.67571 0 0 0 + 356 119 2 0.4236 1.96454 19.32973 0.30400 0 0 0 + 357 119 2 0.4236 1.48917 20.87294 0.54693 0 0 0 + 358 120 1 -0.8472 32.34699 18.13646 22.38224 0 0 0 + 359 120 2 0.4236 31.75911 18.90777 22.62608 0 0 0 + 360 120 2 0.4236 32.85409 17.83551 23.18984 0 0 0 + 361 121 1 -0.8472 20.24600 32.09543 18.49475 -1 0 0 + 362 121 2 0.4236 19.37450 32.56379 18.64008 -1 0 0 + 363 121 2 0.4236 20.82295 32.20632 19.30394 -1 0 0 + 364 122 1 -0.8472 32.44396 13.48495 19.54721 0 0 0 + 365 122 2 0.4236 32.49225 14.27437 20.15914 0 0 0 + 366 122 2 0.4236 31.99587 13.75157 18.69393 0 0 0 + 367 123 1 -0.8472 35.26586 18.21614 1.39574 -1 0 0 + 368 123 2 0.4236 0.12239 18.40827 2.30752 0 0 0 + 369 123 2 0.4236 34.49009 17.58963 1.47095 -1 0 0 + 370 124 1 -0.8472 3.39252 26.43728 7.35930 0 1 0 + 371 124 2 0.4236 2.42858 26.42070 7.62480 0 1 0 + 372 124 2 0.4236 3.52347 25.87316 6.54412 0 1 0 + 373 125 1 -0.8472 15.94721 21.75698 15.77077 0 0 0 + 374 125 2 0.4236 16.58200 22.15497 15.10848 0 0 0 + 375 125 2 0.4236 15.22931 21.25758 15.28582 0 0 0 + 376 126 1 -0.8472 20.49377 23.57178 7.41254 0 0 0 + 377 126 2 0.4236 19.70411 23.45223 8.01425 0 0 0 + 378 126 2 0.4236 21.29088 23.83328 7.95669 0 0 0 + 379 127 1 -0.8472 6.64565 4.33685 1.91046 0 1 0 + 380 127 2 0.4236 6.29368 4.91344 1.17318 0 1 0 + 381 127 2 0.4236 7.51811 4.70392 2.23304 0 1 0 + 382 128 1 -0.8472 26.70656 32.89276 9.89051 1 0 0 + 383 128 2 0.4236 26.59749 31.89883 9.88051 1 0 0 + 384 128 2 0.4236 27.36144 33.14912 10.60140 1 0 0 + 385 129 1 -0.8472 5.48704 32.63030 12.93174 0 -1 0 + 386 129 2 0.4236 5.73825 33.49250 13.37160 0 -1 0 + 387 129 2 0.4236 4.53739 32.40792 13.15229 0 -1 0 + 388 130 1 -0.8472 3.37091 5.95470 9.99334 1 1 0 + 389 130 2 0.4236 3.99627 6.57052 10.47252 1 1 0 + 390 130 2 0.4236 3.39679 5.05241 10.42360 1 1 0 + 391 131 1 -0.8472 2.63445 9.37840 30.00842 1 0 0 + 392 131 2 0.4236 2.81625 9.87705 29.16095 1 0 0 + 393 131 2 0.4236 1.69314 9.04098 30.00123 1 0 0 + 394 132 1 -0.8472 0.97785 8.01694 8.99862 1 1 0 + 395 132 2 0.4236 1.85940 8.12605 8.53940 1 1 0 + 396 132 2 0.4236 0.38758 8.79268 8.77566 1 1 0 + 397 133 1 -0.8472 2.73822 11.52983 15.38585 1 1 0 + 398 133 2 0.4236 3.54689 11.84375 15.88332 1 1 0 + 399 133 2 0.4236 2.92713 10.64019 14.97013 1 1 0 + 400 134 1 -0.8472 18.35568 23.16903 17.37047 0 0 0 + 401 134 2 0.4236 17.78374 22.43717 17.00005 0 0 0 + 402 134 2 0.4236 19.06840 23.40230 16.70899 0 0 0 + 403 135 1 -0.8472 9.13053 4.89654 5.72398 1 1 0 + 404 135 2 0.4236 9.75352 5.64004 5.96693 1 1 0 + 405 135 2 0.4236 9.45310 4.04375 6.13467 1 1 0 + 406 136 1 -0.8472 7.31448 35.35133 12.86442 0 0 0 + 407 136 2 0.4236 7.78778 35.16424 13.72519 0 0 0 + 408 136 2 0.4236 6.36291 0.08760 13.05321 0 1 0 + 409 137 1 -0.8472 0.52118 24.39975 19.66486 0 -1 0 + 410 137 2 0.4236 0.83476 23.57085 20.12801 0 -1 0 + 411 137 2 0.4236 0.24832 24.17585 18.72923 0 -1 0 + 412 138 1 -0.8472 11.64306 23.42080 11.52906 0 0 0 + 413 138 2 0.4236 12.61679 23.64677 11.50116 0 0 0 + 414 138 2 0.4236 11.14825 24.13853 12.01894 0 0 0 + 415 139 1 -0.8472 3.45756 3.18529 19.12957 0 0 0 + 416 139 2 0.4236 3.12124 3.74194 18.36997 0 0 0 + 417 139 2 0.4236 3.81274 3.78249 19.84869 0 0 0 + 418 140 1 -0.8472 20.63306 31.64067 26.89364 0 0 0 + 419 140 2 0.4236 21.22839 31.75801 26.09881 0 0 0 + 420 140 2 0.4236 20.82429 32.35914 27.56232 0 0 0 + 421 141 1 -0.8472 27.87238 20.32866 9.33395 0 0 0 + 422 141 2 0.4236 27.02686 20.77238 9.03698 0 0 0 + 423 141 2 0.4236 27.66058 19.63412 10.02149 0 0 0 + 424 142 1 -0.8472 31.29036 11.93464 0.40169 -1 0 0 + 425 142 2 0.4236 31.37881 12.03516 1.39266 -1 0 0 + 426 142 2 0.4236 30.32288 11.95452 0.14970 -1 0 0 + 427 143 1 -0.8472 21.39492 7.42434 2.71837 0 0 0 + 428 143 2 0.4236 22.26919 6.94124 2.67113 0 0 0 + 429 143 2 0.4236 21.01752 7.34108 3.64064 0 0 0 + 430 144 1 -0.8472 20.01912 19.53687 17.16647 0 0 0 + 431 144 2 0.4236 19.03559 19.42765 17.02256 0 0 0 + 432 144 2 0.4236 20.17677 20.10021 17.97747 0 0 0 + 433 145 1 -0.8472 3.61472 34.67433 24.72232 0 -1 0 + 434 145 2 0.4236 3.58465 34.54040 25.71283 0 -1 0 + 435 145 2 0.4236 4.55220 34.54959 24.39742 0 -1 0 + 436 146 1 -0.8472 23.41665 19.55686 18.89810 -1 -1 0 + 437 146 2 0.4236 24.10125 19.00406 18.42300 -1 -1 0 + 438 146 2 0.4236 22.99870 20.19654 18.25307 -1 -1 0 + 439 147 1 -0.8472 13.01993 3.81803 4.38911 1 0 0 + 440 147 2 0.4236 13.38071 3.06814 3.83464 1 0 0 + 441 147 2 0.4236 12.92360 4.63574 3.82163 1 0 0 + 442 148 1 -0.8472 15.28417 28.36986 13.07087 0 -1 0 + 443 148 2 0.4236 15.75033 28.86687 12.33902 0 -1 0 + 444 148 2 0.4236 14.37943 28.08353 12.75556 0 -1 0 + 445 149 1 -0.8472 31.07288 4.64018 29.64410 -1 1 0 + 446 149 2 0.4236 30.89042 5.35240 30.32192 -1 1 0 + 447 149 2 0.4236 31.28496 5.06438 28.76373 -1 1 0 + 448 150 1 -0.8472 18.66998 22.71969 9.28241 0 0 0 + 449 150 2 0.4236 19.43319 22.68014 9.92730 0 0 0 + 450 150 2 0.4236 18.18674 21.84425 9.28383 0 0 0 + 451 151 1 -0.8472 20.67684 4.79499 26.44460 0 0 0 + 452 151 2 0.4236 21.36307 4.90680 25.72591 0 0 0 + 453 151 2 0.4236 19.86562 4.35059 26.06464 0 0 0 + 454 152 1 -0.8472 23.32494 28.59379 34.87592 1 0 0 + 455 152 2 0.4236 22.34422 28.77672 34.80776 1 0 0 + 456 152 2 0.4236 23.47151 27.61801 35.03820 1 0 0 + 457 153 1 -0.8472 19.30434 32.15966 13.01468 0 -1 0 + 458 153 2 0.4236 18.79685 32.17514 13.87616 0 -1 0 + 459 153 2 0.4236 19.74774 33.04469 12.87307 0 -1 0 +460 154 1 -0.8472 34.76355 20.49891 35.24394 -1 0 0 +461 154 2 0.4236 34.80087 20.22981 34.28158 -1 0 0 +462 154 2 0.4236 34.89670 19.69303 0.37358 -1 0 1 + 463 155 1 -0.8472 13.98652 1.37155 3.23039 0 0 0 + 464 155 2 0.4236 13.54990 0.47234 3.25696 0 0 0 + 465 155 2 0.4236 14.88021 1.32142 3.67626 0 0 0 + 466 156 1 -0.8472 24.96125 8.59168 8.51399 0 1 0 + 467 156 2 0.4236 24.90010 7.88536 7.80880 0 1 0 + 468 156 2 0.4236 25.92109 8.74798 8.74688 0 1 0 + 469 157 1 -0.8472 14.17804 14.86661 29.62017 0 1 0 + 470 157 2 0.4236 14.50622 13.96710 29.90836 0 1 0 + 471 157 2 0.4236 13.20875 14.80710 29.38165 0 1 0 + 472 158 1 -0.8472 32.49214 9.01293 2.57301 0 0 0 + 473 158 2 0.4236 31.88516 8.85624 3.35211 0 0 0 + 474 158 2 0.4236 32.99481 9.86668 2.70850 0 0 0 + 475 159 1 -0.8472 33.47883 32.17624 27.46842 0 -1 0 + 476 159 2 0.4236 33.32942 32.21278 28.45647 0 -1 0 + 477 159 2 0.4236 34.06965 31.39983 27.24918 0 -1 0 +478 160 1 -0.8472 31.09621 6.22320 0.75259 0 0 0 +479 160 2 0.4236 31.59348 5.78768 35.44946 0 0 -1 +480 160 2 0.4236 31.36134 7.18545 0.81397 0 0 0 + 481 161 1 -0.8472 14.57216 18.78960 19.08835 1 1 0 + 482 161 2 0.4236 15.51077 18.79497 19.43321 1 1 0 + 483 161 2 0.4236 14.22722 17.85110 19.07534 1 1 0 +484 162 1 -0.8472 34.12274 15.77227 34.76104 0 0 0 +485 162 2 0.4236 34.17490 15.19579 0.12925 0 0 1 +486 162 2 0.4236 33.34160 16.39158 34.84009 0 0 0 + 487 163 1 -0.8472 27.18476 6.31416 11.14438 0 0 0 + 488 163 2 0.4236 27.19357 6.88440 10.32300 0 0 0 + 489 163 2 0.4236 26.82710 6.84346 11.91376 0 0 0 +490 164 1 -0.8472 11.84314 26.77963 35.28640 1 -1 0 +491 164 2 0.4236 10.96567 26.43433 0.17205 1 -1 1 +492 164 2 0.4236 12.49337 26.82363 0.59765 1 -1 1 + 493 165 1 -0.8472 7.69412 20.72079 27.49780 0 0 0 + 494 165 2 0.4236 7.69014 21.70350 27.31291 0 0 0 + 495 165 2 0.4236 7.81598 20.22083 26.64044 0 0 0 + 496 166 1 -0.8472 23.50826 8.78434 32.03012 0 1 0 + 497 166 2 0.4236 23.26862 8.22923 31.23365 0 1 0 + 498 166 2 0.4236 23.39645 8.24013 32.86157 0 1 0 + 499 167 1 -0.8472 15.57156 13.00008 12.60169 -1 1 0 + 500 167 2 0.4236 14.79081 13.54649 12.90466 -1 1 0 + 501 167 2 0.4236 15.45716 12.76130 11.63738 -1 1 0 + 502 168 1 -0.8472 11.29876 27.92926 8.68449 0 0 0 + 503 168 2 0.4236 11.64644 27.07029 8.30868 0 0 0 + 504 168 2 0.4236 11.27909 28.62394 7.96544 0 0 0 + 505 169 1 -0.8472 15.75868 14.55512 3.49819 0 0 0 + 506 169 2 0.4236 16.03520 15.14903 2.74271 0 0 0 + 507 169 2 0.4236 15.61473 13.62638 3.15656 0 0 0 + 508 170 1 -0.8472 29.04153 26.88438 19.85085 0 -1 0 + 509 170 2 0.4236 28.56574 26.75357 18.98108 0 -1 0 + 510 170 2 0.4236 28.97499 27.84337 20.12624 0 -1 0 + 511 171 1 -0.8472 28.02811 8.84491 0.36833 0 1 0 + 512 171 2 0.4236 29.02395 8.89761 0.29466 0 1 0 + 513 171 2 0.4236 27.77104 8.71695 1.32619 0 1 0 + 514 172 1 -0.8472 32.75013 2.39378 21.01859 -1 0 0 + 515 172 2 0.4236 32.14493 1.59871 20.97960 -1 0 0 + 516 172 2 0.4236 33.70131 2.09215 20.95322 -1 0 0 + 517 173 1 -0.8472 16.25250 5.40744 30.72899 -1 0 0 + 518 173 2 0.4236 16.73137 6.28231 30.80113 -1 0 0 + 519 173 2 0.4236 16.60479 4.89842 29.94366 -1 0 0 + 520 174 1 -0.8472 28.57796 22.31517 24.22429 -1 0 0 + 521 174 2 0.4236 28.58470 23.09656 24.84827 -1 0 0 + 522 174 2 0.4236 29.27495 21.65646 24.50755 -1 0 0 + 523 175 1 -0.8472 31.71292 17.98411 9.79888 0 0 0 + 524 175 2 0.4236 32.59827 17.60937 10.07395 0 0 0 + 525 175 2 0.4236 31.02797 17.76528 10.49379 0 0 0 + 526 176 1 -0.8472 28.82118 33.24498 34.59564 0 -1 0 + 527 176 2 0.4236 28.54721 33.97957 33.97493 0 -1 0 + 528 176 2 0.4236 28.33279 33.34188 35.46282 0 -1 0 + 529 177 1 -0.8472 34.60878 19.71536 32.71287 -1 0 0 + 530 177 2 0.4236 34.64592 19.24398 31.83176 -1 0 0 + 531 177 2 0.4236 33.91982 20.43912 32.67487 -1 0 0 + 532 178 1 -0.8472 17.69608 27.65071 3.21742 1 0 0 + 533 178 2 0.4236 18.46713 27.41434 3.80862 1 0 0 + 534 178 2 0.4236 17.78836 28.59848 2.91219 1 0 0 + 535 179 1 -0.8472 26.68813 34.62171 27.49337 -1 0 0 + 536 179 2 0.4236 26.45510 35.44751 28.00690 -1 0 0 + 537 179 2 0.4236 26.59489 34.80084 26.51400 -1 0 0 + 538 180 1 -0.8472 21.33619 26.62475 14.50460 0 0 0 + 539 180 2 0.4236 20.69573 26.16271 13.89115 0 0 0 + 540 180 2 0.4236 22.25918 26.57281 14.12340 0 0 0 + 541 181 1 -0.8472 0.36454 14.16600 2.11556 1 1 0 + 542 181 2 0.4236 0.97624 14.65982 1.49757 1 1 0 + 543 181 2 0.4236 35.11443 13.77344 1.59245 0 1 0 + 544 182 1 -0.8472 23.79319 1.57159 23.81505 0 1 0 + 545 182 2 0.4236 23.00502 1.11432 24.22691 0 1 0 + 546 182 2 0.4236 23.48929 2.12744 23.04135 0 1 0 + 547 183 1 -0.8472 8.54975 33.46239 4.50736 0 0 0 + 548 183 2 0.4236 9.38674 33.19180 4.03176 0 0 0 + 549 183 2 0.4236 7.78193 33.44464 3.86700 0 0 0 + 550 184 1 -0.8472 33.97496 22.17620 28.93863 0 0 0 + 551 184 2 0.4236 33.73550 23.05012 29.36155 0 0 0 + 552 184 2 0.4236 34.53692 22.34106 28.12813 0 0 0 + 553 185 1 -0.8472 14.49504 4.44877 8.81426 0 0 0 + 554 185 2 0.4236 14.11718 4.62743 7.90582 0 0 0 + 555 185 2 0.4236 15.42318 4.81711 8.86750 0 0 0 + 556 186 1 -0.8472 33.79611 24.01791 9.18291 -1 0 0 + 557 186 2 0.4236 33.97006 23.08648 8.86336 -1 0 0 + 558 186 2 0.4236 33.55971 24.59880 8.40407 -1 0 0 + 559 187 1 -0.8472 16.22846 19.00342 12.26604 0 0 0 + 560 187 2 0.4236 16.94871 18.98450 12.95946 0 0 0 + 561 187 2 0.4236 15.35887 18.73631 12.68134 0 0 0 + 562 188 1 -0.8472 31.00840 33.57317 32.25266 -1 -1 0 + 563 188 2 0.4236 30.94716 33.24371 33.19481 -1 -1 0 + 564 188 2 0.4236 30.29274 33.14344 31.70212 -1 -1 0 + 565 189 1 -0.8472 2.10006 7.03828 25.49380 0 -1 0 + 566 189 2 0.4236 1.34846 7.26527 26.11306 0 -1 0 + 567 189 2 0.4236 2.26908 6.05309 25.52187 0 -1 0 +568 190 1 -0.8472 20.39048 7.66788 35.45350 1 1 0 +569 190 2 0.4236 20.32745 7.14917 0.85891 1 1 1 +570 190 2 0.4236 21.31606 7.58937 35.08326 1 1 0 + 571 191 1 -0.8472 29.27392 19.03193 27.68532 -1 0 0 + 572 191 2 0.4236 28.40799 18.61701 27.40610 -1 0 0 + 573 191 2 0.4236 30.00918 18.69729 27.09597 -1 0 0 + 574 192 1 -0.8472 13.93940 22.66146 17.38525 -1 0 0 + 575 192 2 0.4236 14.79020 22.51805 16.87977 -1 0 0 + 576 192 2 0.4236 14.02994 23.46909 17.96790 -1 0 0 + 577 193 1 -0.8472 25.12845 22.92439 17.53845 0 -1 0 + 578 193 2 0.4236 24.27881 22.39854 17.57699 0 -1 0 + 579 193 2 0.4236 25.76341 22.58310 18.23148 0 -1 0 + 580 194 1 -0.8472 14.81424 28.36357 3.79200 0 -1 0 + 581 194 2 0.4236 15.79031 28.28929 3.58772 0 -1 0 + 582 194 2 0.4236 14.69331 28.63885 4.74570 0 -1 0 + 583 195 1 -0.8472 9.41760 1.55572 11.44122 0 0 0 + 584 195 2 0.4236 9.09058 1.02170 12.22084 0 0 0 + 585 195 2 0.4236 9.03168 1.18373 10.59704 0 0 0 + 586 196 1 -0.8472 0.58572 35.29558 12.11489 0 -2 0 + 587 196 2 0.4236 1.04027 34.42603 11.92181 0 -2 0 + 588 196 2 0.4236 35.34646 35.43381 11.46305 -1 -2 0 + 589 197 1 -0.8472 24.99565 27.11368 22.85908 -1 0 0 + 590 197 2 0.4236 24.94466 26.62404 23.72948 -1 0 0 + 591 197 2 0.4236 24.97275 28.09884 23.02901 -1 0 0 + 592 198 1 -0.8472 28.09827 27.08137 14.36745 -1 -1 0 + 593 198 2 0.4236 28.14567 26.09053 14.49365 -1 -1 0 + 594 198 2 0.4236 28.79796 27.36787 13.71303 -1 -1 0 + 595 199 1 -0.8472 29.44158 8.93119 21.38827 0 1 0 + 596 199 2 0.4236 29.92798 9.47643 20.70555 0 1 0 + 597 199 2 0.4236 28.77321 8.34420 20.93150 0 1 0 + 598 200 1 -0.8472 5.82233 14.89623 2.82244 0 0 0 + 599 200 2 0.4236 5.70821 13.97491 3.19408 0 0 0 + 600 200 2 0.4236 5.10567 15.07479 2.14829 0 0 0 + 601 201 1 -0.8472 31.11315 8.01659 27.88429 0 1 0 + 602 201 2 0.4236 31.51235 8.74883 27.33255 0 1 0 + 603 201 2 0.4236 30.79916 7.28065 27.28448 0 1 0 + 604 202 1 -0.8472 24.92095 28.59181 25.96921 0 0 0 + 605 202 2 0.4236 25.90902 28.48362 25.85968 0 0 0 + 606 202 2 0.4236 24.73202 29.09078 26.81495 0 0 0 + 607 203 1 -0.8472 34.86059 10.30036 12.87353 0 0 0 + 608 203 2 0.4236 34.64887 9.32336 12.89739 0 0 0 + 609 203 2 0.4236 34.24783 10.78945 13.49425 0 0 0 + 610 204 1 -0.8472 10.31573 24.57994 6.57007 0 0 0 + 611 204 2 0.4236 9.42999 24.66399 7.02652 0 0 0 + 612 204 2 0.4236 10.17346 24.38675 5.59931 0 0 0 + 613 205 1 -0.8472 4.30063 0.28530 18.84890 0 1 0 + 614 205 2 0.4236 3.76992 0.99348 19.31450 0 1 0 + 615 205 2 0.4236 4.35052 0.49419 17.87226 0 1 0 + 616 206 1 -0.8472 8.41827 15.48929 34.91520 1 1 0 + 617 206 2 0.4236 8.57237 16.15336 34.18361 1 1 0 + 618 206 2 0.4236 8.04251 14.64754 34.52759 1 1 0 + 619 207 1 -0.8472 6.19035 7.54030 24.21894 0 0 0 + 620 207 2 0.4236 5.98782 8.51233 24.33762 0 0 0 + 621 207 2 0.4236 6.98218 7.43584 23.61726 0 0 0 + 622 208 1 -0.8472 1.08041 14.41014 23.75419 -1 0 0 + 623 208 2 0.4236 0.61056 15.08927 23.19030 -1 0 0 + 624 208 2 0.4236 0.47286 13.63227 23.91466 -1 0 0 + 625 209 1 -0.8472 31.67035 8.22586 14.08427 -1 0 0 + 626 209 2 0.4236 31.10235 7.76414 14.76556 -1 0 0 + 627 209 2 0.4236 31.37149 9.17541 13.98941 -1 0 0 + 628 210 1 -0.8472 4.32304 13.16675 34.02448 1 0 0 + 629 210 2 0.4236 4.93961 13.82550 33.59337 1 0 0 + 630 210 2 0.4236 3.51145 13.04635 33.45283 1 0 0 + 631 211 1 -0.8472 22.65976 11.99198 8.47744 0 1 0 + 632 211 2 0.4236 22.58083 11.20406 9.08809 0 1 0 + 633 211 2 0.4236 22.40561 11.72100 7.54908 0 1 0 + 634 212 1 -0.8472 12.56499 8.92537 31.88900 -1 0 0 + 635 212 2 0.4236 12.96543 8.59761 32.74470 -1 0 0 + 636 212 2 0.4236 12.39885 8.14974 31.28016 -1 0 0 + 637 213 1 -0.8472 7.37183 21.64222 6.22392 1 0 0 + 638 213 2 0.4236 7.16919 22.55931 5.88065 1 0 0 + 639 213 2 0.4236 7.30951 20.98418 5.47358 1 0 0 + 640 214 1 -0.8472 27.00762 16.63398 8.08993 -1 0 0 + 641 214 2 0.4236 26.39609 16.89271 8.83762 -1 0 0 + 642 214 2 0.4236 27.41015 15.73918 8.28309 -1 0 0 + 643 215 1 -0.8472 7.19611 3.79417 31.59334 -1 1 0 + 644 215 2 0.4236 7.16103 3.74933 30.59497 -1 1 0 + 645 215 2 0.4236 8.13759 3.95988 31.88666 -1 1 0 + 646 216 1 -0.8472 30.15909 16.21000 7.44433 -1 1 0 + 647 216 2 0.4236 30.11627 17.16629 7.73351 -1 1 0 + 648 216 2 0.4236 29.26380 15.92488 7.10209 -1 1 0 + 649 217 1 -0.8472 10.21667 4.39970 31.89677 1 0 0 + 650 217 2 0.4236 10.61047 4.91898 31.13834 1 0 0 + 651 217 2 0.4236 10.85283 4.40222 32.66827 1 0 0 + 652 218 1 -0.8472 27.50584 28.38786 25.68326 -1 -1 0 + 653 218 2 0.4236 28.38796 27.92670 25.77862 -1 -1 0 + 654 218 2 0.4236 27.53933 29.27126 26.15063 -1 -1 0 + 655 219 1 -0.8472 19.05430 11.89931 31.26655 0 0 0 + 656 219 2 0.4236 18.26304 12.04698 30.67327 0 0 0 + 657 219 2 0.4236 18.74274 11.61302 32.17258 0 0 0 + 658 220 1 -0.8472 3.86618 28.09390 13.01872 0 0 0 + 659 220 2 0.4236 4.51190 27.43345 12.63558 0 0 0 + 660 220 2 0.4236 3.79034 28.88012 12.40552 0 0 0 + 661 221 1 -0.8472 26.59923 23.34113 29.55810 -1 0 0 + 662 221 2 0.4236 25.78847 23.65064 29.06127 -1 0 0 + 663 221 2 0.4236 26.80200 22.39435 29.30823 -1 0 0 + 664 222 1 -0.8472 27.68945 7.40971 19.86110 0 1 0 + 665 222 2 0.4236 27.80506 6.60329 20.44098 0 1 0 + 666 222 2 0.4236 27.26625 7.14029 18.99604 0 1 0 + 667 223 1 -0.8472 20.05157 4.56036 1.84594 1 1 0 + 668 223 2 0.4236 19.09425 4.38304 2.07401 1 1 0 + 669 223 2 0.4236 20.43859 5.20207 2.50804 1 1 0 + 670 224 1 -0.8472 32.60186 15.47413 21.29596 0 0 0 + 671 224 2 0.4236 32.41360 16.44824 21.42099 0 0 0 + 672 224 2 0.4236 32.59763 15.01702 22.18537 0 0 0 + 673 225 1 -0.8472 29.05431 5.06576 32.53287 -1 0 0 + 674 225 2 0.4236 28.15604 4.90289 32.12476 -1 0 0 + 675 225 2 0.4236 28.98490 5.79843 33.20987 -1 0 0 + 676 226 1 -0.8472 18.30583 3.04311 8.64163 0 1 0 + 677 226 2 0.4236 18.05345 4.01062 8.65401 0 1 0 + 678 226 2 0.4236 17.52270 2.49818 8.34211 0 1 0 + 679 227 1 -0.8472 34.73675 1.91251 8.11982 0 0 0 + 680 227 2 0.4236 35.36485 2.28659 7.43753 0 0 0 + 681 227 2 0.4236 34.59568 0.93821 7.94428 0 0 0 + 682 228 1 -0.8472 20.95258 29.30857 14.69084 -1 0 0 + 683 228 2 0.4236 21.18298 28.34078 14.58939 -1 0 0 + 684 228 2 0.4236 19.96366 29.40579 14.80271 -1 0 0 + 685 229 1 -0.8472 31.90522 26.19768 33.54499 -1 -1 0 + 686 229 2 0.4236 32.28670 27.11432 33.42592 -1 -1 0 + 687 229 2 0.4236 31.85789 25.98042 34.51993 -1 -1 0 + 688 230 1 -0.8472 2.97458 9.05586 14.30805 0 1 0 + 689 230 2 0.4236 2.89178 8.93222 13.31921 0 1 0 + 690 230 2 0.4236 2.13650 8.74237 14.75447 0 1 0 + 691 231 1 -0.8472 34.24545 3.69756 9.97076 0 0 0 + 692 231 2 0.4236 34.61539 3.02013 9.33498 0 0 0 + 693 231 2 0.4236 33.24679 3.64650 9.96832 0 0 0 + 694 232 1 -0.8472 19.84819 0.14555 18.90147 0 0 0 + 695 232 2 0.4236 20.70006 0.66545 18.83855 0 0 0 + 696 232 2 0.4236 19.07092 0.77369 18.86563 0 0 0 + 697 233 1 -0.8472 32.25840 30.83726 10.92443 0 -1 0 + 698 233 2 0.4236 32.58236 31.71455 10.57039 0 -1 0 + 699 233 2 0.4236 32.93405 30.46448 11.56043 0 -1 0 + 700 234 1 -0.8472 27.01784 17.94590 11.23453 0 0 0 + 701 234 2 0.4236 26.81191 18.37645 12.11326 0 0 0 + 702 234 2 0.4236 28.00904 17.89236 11.11376 0 0 0 + 703 235 1 -0.8472 20.22992 35.23458 14.58694 0 -1 0 + 704 235 2 0.4236 20.57845 34.95937 13.69095 0 -1 0 + 705 235 2 0.4236 20.52427 34.57303 15.27660 0 -1 0 + 706 236 1 -0.8472 28.13686 28.87373 1.33657 -1 -1 0 + 707 236 2 0.4236 27.46398 29.26717 0.71018 -1 -1 0 + 708 236 2 0.4236 28.92013 28.53560 0.81488 -1 -1 0 + 709 237 1 -0.8472 19.05398 9.09800 19.78995 0 0 0 + 710 237 2 0.4236 18.05441 9.10305 19.81771 0 0 0 + 711 237 2 0.4236 19.38916 10.03413 19.68354 0 0 0 + 712 238 1 -0.8472 28.90831 11.67910 7.13534 -1 0 0 + 713 238 2 0.4236 29.80783 11.81981 6.72185 -1 0 0 + 714 238 2 0.4236 28.34177 11.12904 6.52182 -1 0 0 + 715 239 1 -0.8472 15.67854 27.80087 20.40915 1 -1 0 + 716 239 2 0.4236 15.96525 28.55811 19.82235 1 -1 0 + 717 239 2 0.4236 16.44143 27.16584 20.53048 1 -1 0 + 718 240 1 -0.8472 11.47125 8.69000 1.78828 0 0 0 + 719 240 2 0.4236 10.66305 9.18052 2.11417 0 0 0 + 720 240 2 0.4236 11.21383 8.08927 1.03142 0 0 0 + 721 241 1 -0.8472 35.28908 10.28448 26.21504 -1 0 0 + 722 241 2 0.4236 35.21125 9.44014 26.74512 -1 0 0 + 723 241 2 0.4236 0.39578 10.91856 26.68628 0 0 0 + 724 242 1 -0.8472 15.12386 29.33502 22.69833 1 1 0 + 725 242 2 0.4236 14.19377 29.70191 22.71562 1 1 0 + 726 242 2 0.4236 15.21039 28.68142 21.94649 1 1 0 + 727 243 1 -0.8472 26.35331 6.84545 25.17133 0 0 0 + 728 243 2 0.4236 25.98539 7.64249 24.69244 0 0 0 + 729 243 2 0.4236 26.23230 6.03072 24.60432 0 0 0 + 730 244 1 -0.8472 17.87784 33.07860 31.33496 1 -1 0 + 731 244 2 0.4236 18.25637 33.78489 30.73679 1 -1 0 + 732 244 2 0.4236 17.58626 33.49307 32.19700 1 -1 0 + 733 245 1 -0.8472 3.45206 32.22567 21.06903 0 -1 0 + 734 245 2 0.4236 2.96341 32.50549 20.24269 0 -1 0 + 735 245 2 0.4236 3.44745 32.97659 21.72938 0 -1 0 + 736 246 1 -0.8472 34.68849 28.24157 9.56684 0 0 0 + 737 246 2 0.4236 33.83286 28.24004 9.04931 0 0 0 + 738 246 2 0.4236 35.06650 29.16719 9.58322 0 0 0 + 739 247 1 -0.8472 11.83632 33.77069 34.97656 0 0 0 + 740 247 2 0.4236 12.77309 33.76721 34.62670 0 0 0 + 741 247 2 0.4236 11.57656 32.84252 35.24295 0 0 0 + 742 248 1 -0.8472 27.98976 23.64368 17.28664 -1 0 0 + 743 248 2 0.4236 27.31344 24.27555 17.66514 -1 0 0 + 744 248 2 0.4236 27.85693 22.73581 17.68425 -1 0 0 +745 249 1 -0.8472 30.21555 28.59820 35.32079 -1 0 0 +746 249 2 0.4236 31.13658 28.35743 0.17971 -1 0 1 +747 249 2 0.4236 30.14340 28.45205 34.33419 -1 0 0 + 748 250 1 -0.8472 21.89279 27.37113 7.14605 0 0 0 + 749 250 2 0.4236 22.43085 28.04298 7.65503 0 0 0 + 750 250 2 0.4236 22.50662 26.70858 6.71689 0 0 0 + 751 251 1 -0.8472 1.75662 32.96857 9.67410 0 -1 0 + 752 251 2 0.4236 2.22776 33.08964 8.80040 0 -1 0 + 753 251 2 0.4236 2.24821 33.46509 10.38946 0 -1 0 + 754 252 1 -0.8472 0.21094 25.70035 3.19371 1 -1 0 + 755 252 2 0.4236 0.74244 26.43217 3.62019 1 -1 0 + 756 252 2 0.4236 0.80652 24.91764 3.01303 1 -1 0 + 757 253 1 -0.8472 13.89378 31.51483 7.21453 0 0 0 + 758 253 2 0.4236 14.54187 30.75755 7.13407 0 0 0 + 759 253 2 0.4236 13.61470 31.81312 6.30180 0 0 0 + 760 254 1 -0.8472 1.19622 17.23496 10.17207 1 0 0 + 761 254 2 0.4236 1.84313 16.48876 10.32903 1 0 0 + 762 254 2 0.4236 1.44494 17.72104 9.33431 1 0 0 + 763 255 1 -0.8472 26.39631 29.97190 16.79257 0 0 0 + 764 255 2 0.4236 25.56003 30.21629 16.30184 0 0 0 + 765 255 2 0.4236 27.09337 30.67060 16.63179 0 0 0 + 766 256 1 -0.8472 6.05191 32.39660 30.18509 0 0 0 + 767 256 2 0.4236 6.81186 32.75954 30.72424 0 0 0 + 768 256 2 0.4236 6.40726 31.96924 29.35378 0 0 0 + 769 257 1 -0.8472 26.10814 21.66842 1.29484 -1 0 0 + 770 257 2 0.4236 25.67123 21.86200 2.17324 -1 0 0 + 771 257 2 0.4236 25.60231 22.12525 0.56313 -1 0 0 + 772 258 1 -0.8472 17.44295 21.30487 6.11779 0 0 0 + 773 258 2 0.4236 17.34023 22.27327 6.34498 0 0 0 + 774 258 2 0.4236 17.42293 21.19161 5.12446 0 0 0 + 775 259 1 -0.8472 28.41502 27.09376 17.12429 0 0 0 + 776 259 2 0.4236 29.35136 27.32828 16.86315 0 0 0 + 777 259 2 0.4236 27.82586 27.12578 16.31691 0 0 0 + 778 260 1 -0.8472 31.30417 34.71417 13.44331 0 -1 0 + 779 260 2 0.4236 30.92816 34.87480 14.35586 0 -1 0 + 780 260 2 0.4236 31.33151 33.73110 13.26235 0 -1 0 + 781 261 1 -0.8472 17.83189 30.37504 2.80681 1 0 0 + 782 261 2 0.4236 18.77927 30.61759 2.59797 1 0 0 + 783 261 2 0.4236 17.21440 30.96079 2.28189 1 0 0 + 784 262 1 -0.8472 21.21362 4.88439 14.56061 0 0 0 + 785 262 2 0.4236 21.28865 5.64522 15.20511 0 0 0 + 786 262 2 0.4236 20.95119 5.23403 13.66124 0 0 0 + 787 263 1 -0.8472 2.58160 4.47678 29.57948 0 0 0 + 788 263 2 0.4236 1.78431 3.98285 29.23263 0 0 0 + 789 263 2 0.4236 3.10038 3.88258 30.19406 0 0 0 + 790 264 1 -0.8472 13.99926 19.82145 25.19090 0 0 0 + 791 264 2 0.4236 13.52302 20.12052 26.01778 0 0 0 + 792 264 2 0.4236 14.97235 20.03772 25.27009 0 0 0 + 793 265 1 -0.8472 31.23575 21.27430 20.21443 0 0 0 + 794 265 2 0.4236 30.98824 22.03691 19.61687 0 0 0 + 795 265 2 0.4236 31.77445 21.61562 20.98467 0 0 0 + 796 266 1 -0.8472 5.98967 23.61218 29.50507 0 0 0 + 797 266 2 0.4236 6.97700 23.45436 29.48877 0 0 0 + 798 266 2 0.4236 5.74003 24.23844 28.76653 0 0 0 + 799 267 1 -0.8472 34.97950 3.70445 28.21894 -1 0 0 + 800 267 2 0.4236 34.34909 2.94118 28.07776 -1 0 0 + 801 267 2 0.4236 0.32312 3.53113 27.72144 0 0 0 + 802 268 1 -0.8472 23.90763 26.40710 10.43250 0 0 0 + 803 268 2 0.4236 23.14485 27.05044 10.49733 0 0 0 + 804 268 2 0.4236 24.63427 26.80687 9.87378 0 0 0 + 805 269 1 -0.8472 24.66360 14.21401 8.75088 -1 1 0 + 806 269 2 0.4236 25.55807 13.80139 8.57868 -1 1 0 + 807 269 2 0.4236 23.94424 13.56083 8.51463 -1 1 0 + 808 270 1 -0.8472 26.76202 32.32885 18.59478 -1 -1 0 + 809 270 2 0.4236 27.04977 33.25283 18.34289 -1 -1 0 + 810 270 2 0.4236 25.76632 32.26114 18.53211 -1 -1 0 + 811 271 1 -0.8472 15.81595 4.45782 25.29065 0 0 0 + 812 271 2 0.4236 15.00012 4.36334 24.72016 0 0 0 + 813 271 2 0.4236 16.07252 5.42260 25.34818 0 0 0 + 814 272 1 -0.8472 19.18223 10.06356 23.18200 0 0 0 + 815 272 2 0.4236 18.80611 10.71294 23.84292 0 0 0 + 816 272 2 0.4236 20.15560 10.24987 23.04858 0 0 0 + 817 273 1 -0.8472 16.92076 8.80930 1.39624 0 0 0 + 818 273 2 0.4236 17.48546 8.47710 0.64076 0 0 0 + 819 273 2 0.4236 16.01226 8.39523 1.34044 0 0 0 + 820 274 1 -0.8472 22.40156 16.29663 33.08332 0 -1 0 + 821 274 2 0.4236 21.63917 15.71905 32.79159 0 -1 0 + 822 274 2 0.4236 22.94970 15.80750 33.76171 0 -1 0 + 823 275 1 -0.8472 8.68261 34.48827 6.98529 1 0 0 + 824 275 2 0.4236 9.60677 34.43356 7.36327 1 0 0 + 825 275 2 0.4236 8.71088 34.28969 6.00564 1 0 0 + 826 276 1 -0.8472 23.95137 6.80440 13.70629 0 -1 0 + 827 276 2 0.4236 24.01394 5.80664 13.68403 0 -1 0 + 828 276 2 0.4236 24.04121 7.12303 14.64986 0 -1 0 + 829 277 1 -0.8472 9.01439 30.12792 34.41532 0 0 0 + 830 277 2 0.4236 8.30096 29.42784 34.38629 0 0 0 + 831 277 2 0.4236 9.71944 29.91353 33.73938 0 0 0 + 832 278 1 -0.8472 26.45031 33.55297 7.08366 -1 -1 0 + 833 278 2 0.4236 25.78208 32.93952 6.66283 -1 -1 0 + 834 278 2 0.4236 26.50126 33.36762 8.06498 -1 -1 0 + 835 279 1 -0.8472 30.88800 23.18806 18.39688 0 0 0 + 836 279 2 0.4236 29.93334 23.24008 18.10384 0 0 0 + 837 279 2 0.4236 31.48270 23.16839 17.59322 0 0 0 +838 280 1 -0.8472 10.52809 12.54265 34.90389 0 0 0 +839 280 2 0.4236 11.44533 12.14846 34.96061 0 0 0 +840 280 2 0.4236 10.46670 13.33299 0.06628 0 0 1 + 841 281 1 -0.8472 3.63801 19.43628 9.70987 1 0 0 + 842 281 2 0.4236 2.97661 19.22892 8.98912 1 0 0 + 843 281 2 0.4236 4.55252 19.50946 9.31205 1 0 0 + 844 282 1 -0.8472 24.55168 32.70298 5.53394 1 0 0 + 845 282 2 0.4236 25.10629 32.33979 4.78533 1 0 0 + 846 282 2 0.4236 23.65660 32.25716 5.53235 1 0 0 + 847 283 1 -0.8472 11.81055 0.45139 5.96055 0 1 0 + 848 283 2 0.4236 11.39065 35.39717 6.67428 0 0 0 + 849 283 2 0.4236 12.78463 0.56502 6.15590 0 1 0 +850 284 1 -0.8472 7.05422 31.92915 0.30939 0 -1 0 +851 284 2 0.4236 7.50991 32.81916 0.32211 0 -1 0 +852 284 2 0.4236 7.60255 31.28148 35.22760 0 -1 -1 + 853 285 1 -0.8472 4.88099 2.44705 3.21183 1 1 0 + 854 285 2 0.4236 5.10191 2.23231 4.16317 1 1 0 + 855 285 2 0.4236 5.19600 3.37148 2.99700 1 1 0 + 856 286 1 -0.8472 7.47945 15.15365 10.29372 1 0 0 + 857 286 2 0.4236 7.03313 15.93251 10.73429 1 0 0 + 858 286 2 0.4236 7.09594 15.02313 9.37947 1 0 0 + 859 287 1 -0.8472 6.26659 26.90479 24.54665 0 0 0 + 860 287 2 0.4236 5.54453 27.04075 25.22493 0 0 0 + 861 287 2 0.4236 6.35642 27.72783 23.98591 0 0 0 + 862 288 1 -0.8472 7.15187 18.20123 25.54502 0 0 0 + 863 288 2 0.4236 6.48538 17.85437 26.20487 0 0 0 + 864 288 2 0.4236 7.52394 17.43919 25.01512 0 0 0 + 865 289 1 -0.8472 15.23808 32.91772 4.26238 0 0 0 + 866 289 2 0.4236 14.28204 32.86755 4.55117 0 0 0 + 867 289 2 0.4236 15.32351 32.56635 3.33007 0 0 0 + 868 290 1 -0.8472 13.90041 29.69758 15.32527 -1 0 0 + 869 290 2 0.4236 14.48236 29.05538 14.82643 -1 0 0 + 870 290 2 0.4236 13.21933 29.18973 15.85269 -1 0 0 + 871 291 1 -0.8472 0.23593 14.77991 15.99345 1 0 0 + 872 291 2 0.4236 35.16151 14.00125 16.23084 0 0 0 + 873 291 2 0.4236 35.18896 15.61239 15.96630 0 0 0 + 874 292 1 -0.8472 9.98786 25.23095 12.26937 1 0 0 + 875 292 2 0.4236 10.02909 26.10849 11.79168 1 0 0 + 876 292 2 0.4236 10.18929 25.36950 13.23899 1 0 0 + 877 293 1 -0.8472 21.45982 13.20463 15.01415 0 0 0 + 878 293 2 0.4236 21.82643 13.13504 14.08639 0 0 0 + 879 293 2 0.4236 20.48493 12.98240 15.00483 0 0 0 + 880 294 1 -0.8472 31.10796 22.99984 9.65674 0 0 0 + 881 294 2 0.4236 30.36879 23.52310 10.08072 0 0 0 + 882 294 2 0.4236 31.98714 23.41399 9.89228 0 0 0 + 883 295 1 -0.8472 13.25615 9.90592 3.33739 0 0 0 + 884 295 2 0.4236 13.70857 10.67615 2.88796 0 0 0 + 885 295 2 0.4236 12.62006 9.47384 2.69813 0 0 0 + 886 296 1 -0.8472 10.30305 4.65218 17.64093 1 0 0 + 887 296 2 0.4236 10.46972 4.28256 18.55501 1 0 0 + 888 296 2 0.4236 11.16248 4.67871 17.13035 1 0 0 + 889 297 1 -0.8472 28.87774 21.47687 33.75214 -1 0 0 + 890 297 2 0.4236 28.59667 20.77739 34.40919 -1 0 0 + 891 297 2 0.4236 28.64741 21.17648 32.82658 -1 0 0 + 892 298 1 -0.8472 17.68653 22.52768 26.05691 0 0 0 + 893 298 2 0.4236 18.40542 23.22136 26.01338 0 0 0 + 894 298 2 0.4236 17.66622 22.12564 26.97230 0 0 0 + 895 299 1 -0.8472 35.32860 15.35870 21.60921 0 0 0 + 896 299 2 0.4236 34.37125 15.41320 21.32556 0 0 0 + 897 299 2 0.4236 0.36572 14.95613 20.87262 1 0 0 + 898 300 1 -0.8472 34.81358 9.91603 7.88012 -1 1 0 + 899 300 2 0.4236 34.34643 10.38273 7.12917 -1 1 0 + 900 300 2 0.4236 34.39397 10.17956 8.74872 -1 1 0 + 901 301 1 -0.8472 25.98823 17.72835 34.76824 0 1 0 + 902 301 2 0.4236 25.46157 18.42882 34.28669 0 1 0 + 903 301 2 0.4236 25.38292 16.97587 35.02775 0 1 0 + 904 302 1 -0.8472 1.10644 14.44721 19.28866 0 0 0 + 905 302 2 0.4236 1.56807 13.69242 19.75465 0 0 0 + 906 302 2 0.4236 1.73043 14.86132 18.62601 0 0 0 + 907 303 1 -0.8472 23.07025 7.39359 29.72495 -1 0 0 + 908 303 2 0.4236 22.62092 7.30025 28.83650 -1 0 0 + 909 303 2 0.4236 23.24054 6.48626 30.10931 -1 0 0 + 910 304 1 -0.8472 10.52323 33.05449 2.56148 -1 0 0 + 911 304 2 0.4236 10.82351 34.00113 2.44456 -1 0 0 + 912 304 2 0.4236 10.86668 32.49988 1.80358 -1 0 0 + 913 305 1 -0.8472 32.88033 27.86819 13.04648 -1 0 0 + 914 305 2 0.4236 33.46970 28.54401 12.60392 -1 0 0 + 915 305 2 0.4236 33.12675 26.95244 12.72930 -1 0 0 + 916 306 1 -0.8472 28.86382 12.21670 3.75354 0 0 0 + 917 306 2 0.4236 28.56770 11.49284 4.37666 0 0 0 + 918 306 2 0.4236 28.23838 12.26251 2.97467 0 0 0 + 919 307 1 -0.8472 23.12120 7.48978 34.39303 0 0 0 + 920 307 2 0.4236 23.05104 7.80962 35.33787 0 0 0 + 921 307 2 0.4236 23.94307 6.92935 34.29101 0 0 0 + 922 308 1 -0.8472 23.04919 24.12205 3.43733 0 -1 0 + 923 308 2 0.4236 23.21646 24.95901 3.95832 0 -1 0 + 924 308 2 0.4236 22.43224 23.52672 3.95201 0 -1 0 + 925 309 1 -0.8472 32.92674 24.00762 30.56994 -1 0 0 + 926 309 2 0.4236 33.71882 24.55051 30.84902 -1 0 0 + 927 309 2 0.4236 32.11989 24.59624 30.52056 -1 0 0 + 928 310 1 -0.8472 3.45535 11.74575 10.93216 0 0 0 + 929 310 2 0.4236 2.61810 11.84889 11.46912 0 0 0 + 930 310 2 0.4236 4.12115 12.43525 11.21716 0 0 0 + 931 311 1 -0.8472 18.03058 4.40786 5.24725 0 0 0 + 932 311 2 0.4236 18.51841 3.54031 5.15058 0 0 0 + 933 311 2 0.4236 17.13139 4.24134 5.65181 0 0 0 + 934 312 1 -0.8472 21.91775 10.67527 23.00739 0 0 0 + 935 312 2 0.4236 21.50019 11.47111 23.44584 0 0 0 + 936 312 2 0.4236 22.91139 10.78644 22.99172 0 0 0 + 937 313 1 -0.8472 27.43171 31.48689 23.76096 0 0 0 + 938 313 2 0.4236 27.78240 32.24630 23.21302 0 0 0 + 939 313 2 0.4236 28.19676 30.99143 24.17225 0 0 0 + 940 314 1 -0.8472 31.13317 15.04734 2.15137 -1 0 0 + 941 314 2 0.4236 31.39347 14.25637 2.70508 -1 0 0 + 942 314 2 0.4236 30.55850 15.65965 2.69426 -1 0 0 + 943 315 1 -0.8472 26.48284 26.46164 29.23316 0 -1 0 + 944 315 2 0.4236 26.29994 27.37877 29.58724 0 -1 0 + 945 315 2 0.4236 25.62067 25.96515 29.13256 0 -1 0 + 946 316 1 -0.8472 31.22165 24.17798 24.58738 0 1 0 + 947 316 2 0.4236 31.23699 25.14094 24.31833 0 1 0 + 948 316 2 0.4236 31.44662 23.60757 23.79747 0 1 0 + 949 317 1 -0.8472 2.23033 8.39736 11.70154 1 0 0 + 950 317 2 0.4236 2.21591 7.43318 11.96640 1 0 0 + 951 317 2 0.4236 1.68011 8.52458 10.87630 1 0 0 + 952 318 1 -0.8472 10.30355 13.69355 7.45075 0 0 0 + 953 318 2 0.4236 9.95467 13.20339 6.65202 0 0 0 + 954 318 2 0.4236 11.01804 14.33182 7.16427 0 0 0 + 955 319 1 -0.8472 19.09624 27.27097 33.96412 1 -1 0 + 956 319 2 0.4236 18.28761 27.58882 33.46913 1 -1 0 + 957 319 2 0.4236 19.40262 27.98394 34.59479 1 -1 0 + 958 320 1 -0.8472 21.98400 20.91583 33.60592 -1 0 0 + 959 320 2 0.4236 22.11925 21.66026 32.95210 -1 0 0 + 960 320 2 0.4236 21.87187 21.29511 34.52436 -1 0 0 + 961 321 1 -0.8472 8.05909 10.61317 31.65640 1 0 0 + 962 321 2 0.4236 8.44206 9.70136 31.80450 1 0 0 + 963 321 2 0.4236 7.46372 10.59818 30.85313 1 0 0 + 964 322 1 -0.8472 9.09413 18.32337 0.84811 0 0 0 + 965 322 2 0.4236 9.41231 18.00449 1.74088 0 0 0 + 966 322 2 0.4236 8.81473 17.53907 0.29424 0 0 0 + 967 323 1 -0.8472 13.25569 25.97059 14.21521 0 0 0 + 968 323 2 0.4236 13.55824 26.21604 15.13616 0 0 0 + 969 323 2 0.4236 12.33025 25.59418 14.25764 0 0 0 + 970 324 1 -0.8472 33.13584 9.19316 24.05826 -1 0 0 + 971 324 2 0.4236 32.63026 9.79659 23.44165 -1 0 0 + 972 324 2 0.4236 34.11588 9.27845 23.87875 -1 0 0 + 973 325 1 -0.8472 0.58676 30.79636 7.10521 0 0 0 + 974 325 2 0.4236 0.12738 30.93416 7.98267 0 0 0 + 975 325 2 0.4236 1.25039 30.05311 7.18950 0 0 0 + 976 326 1 -0.8472 16.60852 24.00687 6.45844 0 -1 0 + 977 326 2 0.4236 16.74085 24.60096 5.66503 0 -1 0 + 978 326 2 0.4236 16.73950 24.53616 7.29670 0 -1 0 + 979 327 1 -0.8472 21.78272 18.23485 15.29826 -1 0 0 + 980 327 2 0.4236 21.29209 17.36498 15.24772 -1 0 0 + 981 327 2 0.4236 21.47580 18.74440 16.10210 -1 0 0 + 982 328 1 -0.8472 1.07556 17.90780 22.50149 0 0 0 + 983 328 2 0.4236 0.66446 17.09563 22.08753 0 0 0 + 984 328 2 0.4236 2.00753 18.02178 22.15740 0 0 0 + 985 329 1 -0.8472 8.51481 8.69543 13.44579 1 1 0 + 986 329 2 0.4236 8.03320 7.99947 12.91320 1 1 0 + 987 329 2 0.4236 8.13923 9.59736 13.23261 1 1 0 + 988 330 1 -0.8472 28.35224 22.67556 2.57766 0 0 0 + 989 330 2 0.4236 27.72872 22.28765 1.89891 0 0 0 + 990 330 2 0.4236 28.76046 21.93781 3.11531 0 0 0 + 991 331 1 -0.8472 17.15184 8.01186 31.33613 0 0 0 + 992 331 2 0.4236 16.77891 8.44369 32.15737 0 0 0 + 993 331 2 0.4236 16.96330 8.58870 30.54137 0 0 0 + 994 332 1 -0.8472 29.37145 32.28297 30.40766 0 0 0 + 995 332 2 0.4236 28.59990 31.77818 30.79475 0 0 0 + 996 332 2 0.4236 29.34145 32.22393 29.40986 0 0 0 + 997 333 1 -0.8472 35.30101 34.49612 31.31065 -1 0 0 + 998 333 2 0.4236 0.55375 34.83822 31.86447 0 0 0 + 999 333 2 0.4236 34.45007 34.92923 31.60762 -1 0 0 + 1000 334 1 -0.8472 14.09936 2.13015 18.09005 0 0 0 + 1001 334 2 0.4236 13.86459 1.50779 17.34342 0 0 0 + 1002 334 2 0.4236 14.07709 1.63332 18.95758 0 0 0 + 1003 335 1 -0.8472 23.74810 3.68755 32.95671 0 1 0 + 1004 335 2 0.4236 23.65696 3.88556 31.98078 0 1 0 + 1005 335 2 0.4236 22.86999 3.83831 33.41082 0 1 0 + 1006 336 1 -0.8472 31.09997 21.56673 28.07556 -1 0 0 + 1007 336 2 0.4236 31.85938 21.15085 28.57586 -1 0 0 + 1008 336 2 0.4236 31.35399 22.49164 27.79269 -1 0 0 + 1009 337 1 -0.8472 20.64083 15.69267 14.82107 0 0 0 + 1010 337 2 0.4236 20.01411 15.85195 15.58383 0 0 0 + 1011 337 2 0.4236 21.11190 14.82056 14.95329 0 0 0 + 1012 338 1 -0.8472 20.28701 15.13724 31.94210 0 0 0 + 1013 338 2 0.4236 20.65117 14.38373 31.39480 0 0 0 + 1014 338 2 0.4236 19.30068 15.01933 32.05716 0 0 0 + 1015 339 1 -0.8472 32.47304 33.05421 7.42147 0 -1 0 + 1016 339 2 0.4236 31.48483 33.14393 7.54532 0 -1 0 + 1017 339 2 0.4236 32.72329 32.08609 7.42835 0 -1 0 + 1018 340 1 -0.8472 25.63615 27.76429 13.62009 0 -1 0 + 1019 340 2 0.4236 26.57313 27.51515 13.86499 0 -1 0 + 1020 340 2 0.4236 25.13047 26.94246 13.35778 0 -1 0 + 1021 341 1 -0.8472 0.75304 4.33827 20.97066 1 0 0 + 1022 341 2 0.4236 0.72114 4.46730 19.97956 1 0 0 + 1023 341 2 0.4236 1.06157 5.18343 21.40709 1 0 0 + 1024 342 1 -0.8472 1.15960 21.09656 13.17958 1 1 0 + 1025 342 2 0.4236 1.13952 21.53055 12.27891 1 1 0 + 1026 342 2 0.4236 1.23756 20.10565 13.07037 1 1 0 + 1027 343 1 -0.8472 8.63426 29.66811 25.42818 0 0 0 + 1028 343 2 0.4236 8.28239 30.40249 24.84784 0 0 0 + 1029 343 2 0.4236 8.72735 28.83186 24.88783 0 0 0 + 1030 344 1 -0.8472 30.76174 20.63256 25.14989 0 0 0 + 1031 344 2 0.4236 31.51870 20.90756 24.55714 0 0 0 + 1032 344 2 0.4236 30.99303 20.83502 26.10143 0 0 0 + 1033 345 1 -0.8472 4.24571 30.79640 2.94362 0 0 0 + 1034 345 2 0.4236 4.17270 31.39155 2.14334 0 0 0 + 1035 345 2 0.4236 4.36412 29.84944 2.64506 0 0 0 + 1036 346 1 -0.8472 15.42989 29.35349 26.25283 -1 0 0 + 1037 346 2 0.4236 16.28644 29.25844 25.74568 -1 0 0 + 1038 346 2 0.4236 14.71000 28.83510 25.79131 -1 0 0 + 1039 347 1 -0.8472 15.48947 26.89872 10.36047 0 0 0 + 1040 347 2 0.4236 15.06016 26.08318 10.74851 0 0 0 + 1041 347 2 0.4236 16.47729 26.75709 10.29670 0 0 0 + 1042 348 1 -0.8472 3.69092 24.79802 1.79703 1 -1 0 + 1043 348 2 0.4236 2.92286 24.81460 1.15692 1 -1 0 + 1044 348 2 0.4236 4.44248 24.26962 1.40222 1 -1 0 + 1045 349 1 -0.8472 30.63584 20.42155 0.68360 -1 0 0 + 1046 349 2 0.4236 29.66630 20.49948 0.45154 -1 0 0 + 1047 349 2 0.4236 31.17191 21.00886 0.07726 -1 0 0 + 1048 350 1 -0.8472 23.07817 3.25695 10.19922 0 1 0 + 1049 350 2 0.4236 23.13072 3.67899 9.29418 0 1 0 + 1050 350 2 0.4236 22.12223 3.06798 10.42381 0 1 0 + 1051 351 1 -0.8472 35.17355 13.92467 30.41812 -1 0 0 + 1052 351 2 0.4236 34.42649 13.31598 30.68525 -1 0 0 + 1053 351 2 0.4236 35.48891 13.68248 29.50057 -1 0 0 + 1054 352 1 -0.8472 28.88786 35.07154 21.77560 0 -1 0 + 1055 352 2 0.4236 29.82352 35.08183 22.12833 0 -1 0 + 1056 352 2 0.4236 28.66001 34.14976 21.46196 0 -1 0 + 1057 353 1 -0.8472 11.05103 25.60242 26.28306 0 0 0 + 1058 353 2 0.4236 11.23918 24.62323 26.20763 0 0 0 + 1059 353 2 0.4236 10.10113 25.78009 26.02597 0 0 0 + 1060 354 1 -0.8472 31.63339 18.14090 19.44147 0 0 0 + 1061 354 2 0.4236 31.85189 18.85252 20.10916 0 0 0 + 1062 354 2 0.4236 32.47695 17.81105 19.01777 0 0 0 + 1063 355 1 -0.8472 3.31136 2.63236 12.33445 0 0 0 + 1064 355 2 0.4236 2.41026 2.87346 11.97410 0 0 0 + 1065 355 2 0.4236 3.44117 3.06706 13.22559 0 0 0 + 1066 356 1 -0.8472 12.24600 6.49581 14.08200 1 -1 0 + 1067 356 2 0.4236 13.15730 6.37133 13.68960 1 -1 0 + 1068 356 2 0.4236 12.00228 7.46532 14.05786 1 -1 0 + 1069 357 1 -0.8472 11.50548 16.90283 14.04964 0 0 0 + 1070 357 2 0.4236 12.29833 16.85131 13.44246 0 0 0 + 1071 357 2 0.4236 11.71965 16.45258 14.91643 0 0 0 + 1072 358 1 -0.8472 35.43225 34.57807 0.88602 -1 -1 0 + 1073 358 2 0.4236 34.78145 34.97858 0.24105 -1 -1 0 + 1074 358 2 0.4236 0.05551 33.61013 0.67092 0 -1 0 + 1075 359 1 -0.8472 32.90157 21.64637 32.04479 0 0 0 + 1076 359 2 0.4236 32.92099 22.39953 31.38732 0 0 0 + 1077 359 2 0.4236 32.11375 21.05995 31.85667 0 0 0 + 1078 360 1 -0.8472 28.66651 4.66095 28.42273 -1 0 0 + 1079 360 2 0.4236 29.64436 4.61462 28.62669 -1 0 0 + 1080 360 2 0.4236 28.36393 3.78767 28.04089 -1 0 0 + 1081 361 1 -0.8472 11.79766 16.60969 9.43182 0 0 0 + 1082 361 2 0.4236 12.66888 16.45176 9.89661 0 0 0 + 1083 361 2 0.4236 11.22235 15.79621 9.51672 0 0 0 + 1084 362 1 -0.8472 9.55596 34.02911 10.63320 0 0 0 + 1085 362 2 0.4236 8.61455 34.26813 10.87101 0 0 0 + 1086 362 2 0.4236 9.60738 33.05467 10.41463 0 0 0 + 1087 363 1 -0.8472 22.07096 19.00704 23.45916 0 -1 0 + 1088 363 2 0.4236 22.91480 19.42776 23.12621 0 -1 0 + 1089 363 2 0.4236 21.60867 19.63643 24.08378 0 -1 0 + 1090 364 1 -0.8472 35.05069 19.90937 25.80470 -2 -1 0 + 1091 364 2 0.4236 35.34344 20.47030 25.03040 -2 -1 0 + 1092 364 2 0.4236 34.77122 20.50286 26.55945 -2 -1 0 + 1093 365 1 -0.8472 1.29483 5.44650 7.26933 1 1 0 + 1094 365 2 0.4236 0.45590 5.92938 7.52030 1 1 0 + 1095 365 2 0.4236 1.81961 5.23896 8.09486 1 1 0 + 1096 366 1 -0.8472 33.07892 1.60280 25.15613 0 0 0 + 1097 366 2 0.4236 32.33755 2.22668 25.40327 0 0 0 + 1098 366 2 0.4236 33.70561 2.06250 24.52690 0 0 0 + 1099 367 1 -0.8472 14.22157 33.43622 22.79701 0 0 0 + 1100 367 2 0.4236 14.35642 34.29835 23.28540 0 0 0 + 1101 367 2 0.4236 13.26247 33.16147 22.86432 0 0 0 + 1102 368 1 -0.8472 27.07629 24.91963 8.20535 0 0 0 + 1103 368 2 0.4236 27.19239 24.91058 7.21216 0 0 0 + 1104 368 2 0.4236 26.78492 25.83019 8.49857 0 0 0 + 1105 369 1 -0.8472 7.24937 20.22566 4.06221 0 0 0 + 1106 369 2 0.4236 6.58398 19.52352 4.31566 0 0 0 + 1107 369 2 0.4236 7.35965 20.23706 3.06839 0 0 0 + 1108 370 1 -0.8472 13.41227 1.40249 12.73458 0 0 0 + 1109 370 2 0.4236 13.93745 1.43607 11.88427 0 0 0 + 1110 370 2 0.4236 12.54553 1.88546 12.61030 0 0 0 + 1111 371 1 -0.8472 0.85266 16.38948 27.38175 0 -1 0 + 1112 371 2 0.4236 1.08079 16.82099 28.25450 0 -1 0 + 1113 371 2 0.4236 1.16521 16.97097 26.63066 0 -1 0 + 1114 372 1 -0.8472 4.02418 25.29508 4.78828 1 1 0 + 1115 372 2 0.4236 3.89068 25.61801 3.85137 1 1 0 + 1116 372 2 0.4236 4.33792 24.34571 4.77208 1 1 0 + 1117 373 1 -0.8472 16.59052 15.77188 19.36735 0 0 0 + 1118 373 2 0.4236 16.75530 16.39211 20.13429 0 0 0 + 1119 373 2 0.4236 17.42140 15.24909 19.17700 0 0 0 + 1120 374 1 -0.8472 33.13843 25.82196 3.97841 0 0 0 + 1121 374 2 0.4236 32.77967 24.90515 4.15352 0 0 0 + 1122 374 2 0.4236 34.05286 25.75158 3.57992 0 0 0 + 1123 375 1 -0.8472 31.57661 1.02930 18.56306 -1 1 0 + 1124 375 2 0.4236 31.87941 0.41338 19.29029 -1 1 0 + 1125 375 2 0.4236 31.65157 0.56519 17.68046 -1 1 0 + 1126 376 1 -0.8472 2.96164 16.29354 6.71299 0 0 0 + 1127 376 2 0.4236 3.05881 15.49032 7.30060 0 0 0 + 1128 376 2 0.4236 3.23467 16.05902 5.78005 0 0 0 + 1129 377 1 -0.8472 20.34379 33.69865 7.89455 0 -1 0 + 1130 377 2 0.4236 20.39438 32.70170 7.83518 0 -1 0 + 1131 377 2 0.4236 19.51518 33.96147 8.38879 0 -1 0 + 1132 378 1 -0.8472 19.37439 16.80508 19.73572 0 1 0 + 1133 378 2 0.4236 20.27469 17.21358 19.88587 0 1 0 + 1134 378 2 0.4236 19.36459 15.87542 20.10391 0 1 0 + 1135 379 1 -0.8472 19.23315 9.13979 28.88985 -1 0 0 + 1136 379 2 0.4236 20.13295 9.57333 28.93806 -1 0 0 + 1137 379 2 0.4236 18.76709 9.24056 29.76884 -1 0 0 + 1138 380 1 -0.8472 27.72787 22.91683 10.45990 -1 -1 0 + 1139 380 2 0.4236 27.54902 23.44015 9.62678 -1 -1 0 + 1140 380 2 0.4236 28.28042 22.11378 10.23690 -1 -1 0 + 1141 381 1 -0.8472 6.32840 1.89288 33.44010 0 0 0 + 1142 381 2 0.4236 6.55965 2.59979 32.77171 0 0 0 + 1143 381 2 0.4236 6.96546 1.94041 34.20941 0 0 0 + 1144 382 1 -0.8472 5.63638 5.55085 15.38096 0 1 0 + 1145 382 2 0.4236 6.60617 5.51829 15.62260 0 1 0 + 1146 382 2 0.4236 5.27855 6.46702 15.56128 0 1 0 + 1147 383 1 -0.8472 3.24229 34.95287 10.93468 1 -1 0 + 1148 383 2 0.4236 2.73508 0.26278 11.21110 1 0 0 + 1149 383 2 0.4236 4.13670 34.94890 11.38181 1 -1 0 + 1150 384 1 -0.8472 10.54404 28.17006 5.04204 0 -1 0 + 1151 384 2 0.4236 10.79734 28.87476 5.70473 0 -1 0 + 1152 384 2 0.4236 9.58213 27.92638 5.16590 0 -1 0 + 1153 385 1 -0.8472 17.00917 14.06687 8.09635 1 1 0 + 1154 385 2 0.4236 17.37361 14.76031 8.71785 1 1 0 + 1155 385 2 0.4236 16.55338 14.51499 7.32736 1 1 0 + 1156 386 1 -0.8472 6.62644 33.85271 2.78808 0 -1 0 + 1157 386 2 0.4236 5.73917 34.02559 3.21562 0 -1 0 + 1158 386 2 0.4236 6.61962 34.20458 1.85208 0 -1 0 + 1159 387 1 -0.8472 29.76540 24.76348 2.86127 0 -1 0 + 1160 387 2 0.4236 29.15412 23.97946 2.75344 0 -1 0 + 1161 387 2 0.4236 30.61943 24.46419 3.28677 0 -1 0 + 1162 388 1 -0.8472 13.59066 30.37117 2.31161 0 0 0 + 1163 388 2 0.4236 12.65166 30.51884 2.62207 0 0 0 + 1164 388 2 0.4236 14.05913 29.75730 2.94695 0 0 0 + 1165 389 1 -0.8472 12.39821 27.47130 30.65704 0 -1 0 + 1166 389 2 0.4236 12.99120 27.19012 29.90254 0 -1 0 + 1167 389 2 0.4236 12.45715 26.79959 31.39547 0 -1 0 + 1168 390 1 -0.8472 6.06136 34.05062 23.58964 1 0 0 + 1169 390 2 0.4236 5.93233 34.43746 22.67660 1 0 0 + 1170 390 2 0.4236 7.01612 34.15998 23.86610 1 0 0 + 1171 391 1 -0.8472 14.90784 2.14573 10.34168 0 0 0 + 1172 391 2 0.4236 15.85071 2.06423 10.01876 0 0 0 + 1173 391 2 0.4236 14.45030 2.88711 9.85081 0 0 0 + 1174 392 1 -0.8472 5.95470 23.52370 19.25445 1 0 0 + 1175 392 2 0.4236 6.57166 22.96298 19.80661 1 0 0 + 1176 392 2 0.4236 5.74564 23.04724 18.40053 1 0 0 + 1177 393 1 -0.8472 16.53893 22.97687 33.90825 -1 0 0 + 1178 393 2 0.4236 16.96060 22.22410 33.40285 -1 0 0 + 1179 393 2 0.4236 16.93781 23.02835 34.82378 -1 0 0 + 1180 394 1 -0.8472 19.35884 19.30139 25.91860 1 0 0 + 1181 394 2 0.4236 18.36758 19.43312 25.92172 1 0 0 + 1182 394 2 0.4236 19.60298 18.62496 25.22376 1 0 0 + 1183 395 1 -0.8472 17.84105 26.28435 20.60609 0 0 0 + 1184 395 2 0.4236 18.33356 25.48138 20.27048 0 0 0 + 1185 395 2 0.4236 18.48109 26.89968 21.06619 0 0 0 + 1186 396 1 -0.8472 25.39077 19.85526 30.73839 -1 0 0 + 1187 396 2 0.4236 24.77676 19.15798 31.10824 -1 0 0 + 1188 396 2 0.4236 25.40880 20.64357 31.35332 -1 0 0 + 1189 397 1 -0.8472 32.17283 22.92893 4.21570 0 0 0 + 1190 397 2 0.4236 32.63445 22.40316 3.50127 0 0 0 + 1191 397 2 0.4236 32.09738 22.37237 5.04307 0 0 0 + 1192 398 1 -0.8472 17.29312 0.37281 33.34237 1 0 0 + 1193 398 2 0.4236 16.62031 1.11144 33.38353 1 0 0 + 1194 398 2 0.4236 18.16235 0.73226 33.00297 1 0 0 + 1195 399 1 -0.8472 12.89246 11.41146 34.96369 0 1 0 + 1196 399 2 0.4236 13.81811 11.65819 35.25053 0 1 0 + 1197 399 2 0.4236 12.78729 10.41756 34.99613 0 1 0 + 1198 400 1 -0.8472 6.36277 31.86052 10.27376 2 -1 0 + 1199 400 2 0.4236 6.33011 32.23746 11.19940 2 -1 0 + 1200 400 2 0.4236 7.24273 31.40813 10.12900 2 -1 0 + 1201 401 1 -0.8472 28.38360 3.88865 10.78470 0 1 0 + 1202 401 2 0.4236 28.42858 3.61210 11.74461 0 1 0 + 1203 401 2 0.4236 28.18657 4.86742 10.72869 0 1 0 + 1204 402 1 -0.8472 18.39410 3.79825 25.35360 0 0 0 + 1205 402 2 0.4236 18.47271 2.80161 25.37448 0 0 0 + 1206 402 2 0.4236 17.43266 4.05869 25.44176 0 0 0 + 1207 403 1 -0.8472 30.80900 7.81409 7.07058 0 1 0 + 1208 403 2 0.4236 30.55382 7.48751 7.98062 0 1 0 + 1209 403 2 0.4236 31.80460 7.79968 6.97835 0 1 0 + 1210 404 1 -0.8472 23.51102 5.83799 2.88617 0 0 0 + 1211 404 2 0.4236 23.86573 5.70253 3.81125 0 0 0 + 1212 404 2 0.4236 23.71338 5.03178 2.33025 0 0 0 + 1213 405 1 -0.8472 20.52153 3.94426 31.16343 -1 0 0 + 1214 405 2 0.4236 20.17492 4.52582 31.89939 -1 0 0 + 1215 405 2 0.4236 21.49852 4.11501 31.03575 -1 0 0 +1216 406 1 -0.8472 35.47429 28.06507 0.54549 -1 0 0 +1217 406 2 0.4236 0.72718 27.77804 35.40863 0 0 -1 +1218 406 2 0.4236 0.32616 28.54845 1.34422 0 0 0 + 1219 407 1 -0.8472 25.34340 8.52458 3.30496 0 0 0 + 1220 407 2 0.4236 24.76479 8.25867 4.07597 0 0 0 + 1221 407 2 0.4236 25.95386 7.76826 3.06992 0 0 0 + 1222 408 1 -0.8472 14.03033 20.30541 1.00428 0 0 0 + 1223 408 2 0.4236 14.58182 21.08353 1.30490 0 0 0 + 1224 408 2 0.4236 13.32894 20.10945 1.68953 0 0 0 + 1225 409 1 -0.8472 4.87289 5.75995 12.70282 1 0 0 + 1226 409 2 0.4236 5.73516 6.07464 12.30602 1 0 0 + 1227 409 2 0.4236 5.01108 5.53970 13.66840 1 0 0 + 1228 410 1 -0.8472 13.53566 35.12394 16.50467 1 -1 0 + 1229 410 2 0.4236 13.08941 0.40195 16.07381 1 0 0 + 1230 410 2 0.4236 12.84428 34.53745 16.92657 1 -1 0 + 1231 411 1 -0.8472 31.28347 13.97579 12.16494 0 0 0 + 1232 411 2 0.4236 30.31706 14.04855 12.41123 0 0 0 + 1233 411 2 0.4236 31.39500 14.17310 11.19100 0 0 0 + 1234 412 1 -0.8472 23.72875 25.77285 13.04432 -1 0 0 + 1235 412 2 0.4236 23.54106 26.23908 12.17983 -1 0 0 + 1236 412 2 0.4236 23.97314 24.82029 12.86304 -1 0 0 + 1237 413 1 -0.8472 18.42520 13.29901 12.67620 0 0 0 + 1238 413 2 0.4236 18.76063 12.84318 13.50059 0 0 0 + 1239 413 2 0.4236 17.47850 13.02572 12.50598 0 0 0 + 1240 414 1 -0.8472 24.38139 21.23148 28.52212 -1 0 0 + 1241 414 2 0.4236 24.87887 20.69331 29.20242 -1 0 0 + 1242 414 2 0.4236 24.93945 21.32433 27.69755 -1 0 0 + 1243 415 1 -0.8472 4.76545 3.74972 28.15333 2 0 0 + 1244 415 2 0.4236 3.95303 4.05898 28.64753 2 0 0 + 1245 415 2 0.4236 4.90570 4.32652 27.34861 2 0 0 + 1246 416 1 -0.8472 31.10729 24.42201 27.24570 -1 0 0 + 1247 416 2 0.4236 30.99608 24.27377 26.26307 -1 0 0 + 1248 416 2 0.4236 30.32437 24.93611 27.59596 -1 0 0 + 1249 417 1 -0.8472 17.27764 19.50978 16.96588 0 1 0 + 1250 417 2 0.4236 16.89080 19.37919 17.87871 0 1 0 + 1251 417 2 0.4236 16.81794 20.27508 16.51545 0 1 0 + 1252 418 1 -0.8472 3.72639 18.97644 12.27267 1 0 0 + 1253 418 2 0.4236 3.70512 19.37343 11.35512 1 0 0 + 1254 418 2 0.4236 2.86172 18.50772 12.45316 1 0 0 + 1255 419 1 -0.8472 20.96515 21.23762 26.79063 0 0 0 + 1256 419 2 0.4236 20.35291 20.50701 26.48848 0 0 0 + 1257 419 2 0.4236 20.95957 21.28545 27.78946 0 0 0 + 1258 420 1 -0.8472 32.52779 5.30124 27.39384 0 1 0 + 1259 420 2 0.4236 32.53926 5.99020 26.66919 0 1 0 + 1260 420 2 0.4236 33.40185 5.31374 27.87950 0 1 0 + 1261 421 1 -0.8472 4.50515 19.02032 2.73901 0 0 0 + 1262 421 2 0.4236 4.98200 18.24270 3.14870 0 0 0 + 1263 421 2 0.4236 3.52800 18.95648 2.94164 0 0 0 +1264 422 1 -0.8472 5.79914 17.68163 34.54127 0 0 0 +1265 422 2 0.4236 5.22820 18.24930 33.94824 0 0 0 +1266 422 2 0.4236 5.67665 17.96493 0.04523 0 0 1 + 1267 423 1 -0.8472 2.03112 11.27795 4.67878 0 1 0 + 1268 423 2 0.4236 2.55869 11.86871 4.06838 0 1 0 + 1269 423 2 0.4236 1.30126 10.83107 4.16153 0 1 0 + 1270 424 1 -0.8472 21.63764 11.67562 6.00451 0 1 0 + 1271 424 2 0.4236 22.26518 11.45584 5.25763 0 1 0 + 1272 424 2 0.4236 20.83218 11.08572 5.94754 0 1 0 + 1273 425 1 -0.8472 12.73506 6.22561 3.30400 0 -1 0 + 1274 425 2 0.4236 12.99724 6.93528 3.95793 0 -1 0 + 1275 425 2 0.4236 12.40741 6.65328 2.46157 0 -1 0 + 1276 426 1 -0.8472 8.02014 27.69985 5.68973 1 0 0 + 1277 426 2 0.4236 7.57052 27.13867 6.38461 1 0 0 + 1278 426 2 0.4236 7.73250 28.65199 5.79296 1 0 0 + 1279 427 1 -0.8472 25.07348 29.28113 32.08839 -1 0 0 + 1280 427 2 0.4236 25.90142 29.03034 32.58997 -1 0 0 + 1281 427 2 0.4236 24.38203 29.61190 32.73063 -1 0 0 + 1282 428 1 -0.8472 1.22361 34.16286 23.41071 1 -1 0 + 1283 428 2 0.4236 2.07526 34.49875 23.81300 1 -1 0 + 1284 428 2 0.4236 0.45459 34.41914 23.99626 1 -1 0 + 1285 429 1 -0.8472 32.55982 17.30341 1.33505 -1 0 0 + 1286 429 2 0.4236 32.05627 16.45897 1.51747 -1 0 0 + 1287 429 2 0.4236 32.12518 17.79418 0.57993 -1 0 0 + 1288 430 1 -0.8472 23.56130 32.19165 29.59188 0 0 0 + 1289 430 2 0.4236 23.54642 31.84067 30.52811 0 0 0 + 1290 430 2 0.4236 24.39254 32.72925 29.45063 0 0 0 + 1291 431 1 -0.8472 19.51922 6.96283 31.09761 0 0 0 + 1292 431 2 0.4236 19.36239 6.54737 30.20165 0 0 0 + 1293 431 2 0.4236 18.64261 7.22274 31.50252 0 0 0 + 1294 432 1 -0.8472 4.38539 5.20065 20.84759 0 0 0 + 1295 432 2 0.4236 4.95246 5.86004 20.35406 0 0 0 + 1296 432 2 0.4236 4.14576 5.57251 21.74441 0 0 0 + 1297 433 1 -0.8472 31.40867 11.19859 22.79106 0 0 0 + 1298 433 2 0.4236 30.95869 11.02820 21.91445 0 0 0 + 1299 433 2 0.4236 30.74030 11.11633 23.53034 0 0 0 + 1300 434 1 -0.8472 22.15699 13.64693 17.96846 0 0 0 + 1301 434 2 0.4236 21.97115 13.28502 17.05495 0 0 0 + 1302 434 2 0.4236 22.20251 14.64505 17.92862 0 0 0 + 1303 435 1 -0.8472 27.17581 16.07117 32.82215 -1 0 0 + 1304 435 2 0.4236 26.91651 16.04642 31.85667 -1 0 0 + 1305 435 2 0.4236 26.58410 16.71305 33.30983 -1 0 0 + 1306 436 1 -0.8472 15.66526 24.93973 32.39405 0 -1 0 + 1307 436 2 0.4236 15.85550 24.23681 33.07939 0 -1 0 + 1308 436 2 0.4236 15.76485 24.54877 31.47909 0 -1 0 + 1309 437 1 -0.8472 15.40880 32.12991 30.70836 1 0 0 + 1310 437 2 0.4236 16.32127 32.42798 30.98861 1 0 0 + 1311 437 2 0.4236 15.44004 31.80470 29.76327 1 0 0 + 1312 438 1 -0.8472 16.02646 35.51995 25.85561 1 -1 0 + 1313 438 2 0.4236 16.36022 34.65253 26.22458 1 -1 0 + 1314 438 2 0.4236 15.10674 35.39241 25.48440 1 -1 0 + 1315 439 1 -0.8472 22.60142 17.62162 10.57677 -1 1 0 + 1316 439 2 0.4236 22.16810 17.04837 9.88140 -1 1 0 + 1317 439 2 0.4236 21.98106 17.72970 11.35356 -1 1 0 + 1318 440 1 -0.8472 19.65290 1.47489 32.37803 0 0 0 + 1319 440 2 0.4236 19.94551 2.38837 32.09541 0 0 0 + 1320 440 2 0.4236 19.57422 0.88630 31.57349 0 0 0 + 1321 441 1 -0.8472 29.94701 28.28311 22.62910 -1 0 0 + 1322 441 2 0.4236 29.23937 27.58854 22.75866 -1 0 0 + 1323 441 2 0.4236 30.80353 27.84074 22.36342 -1 0 0 + 1324 442 1 -0.8472 1.27173 25.47080 15.54363 0 -1 0 + 1325 442 2 0.4236 1.03307 25.88871 14.66705 0 -1 0 + 1326 442 2 0.4236 0.46325 25.03048 15.93404 0 -1 0 + 1327 443 1 -0.8472 15.31637 29.31819 6.73777 0 0 0 + 1328 443 2 0.4236 14.94391 28.42485 6.98909 0 0 0 + 1329 443 2 0.4236 16.28023 29.36325 7.00015 0 0 0 + 1330 444 1 -0.8472 32.35725 35.03802 20.71803 0 -1 0 + 1331 444 2 0.4236 33.33712 35.09018 20.91062 0 -1 0 + 1332 444 2 0.4236 31.85345 34.94201 21.57646 0 -1 0 + 1333 445 1 -0.8472 16.00211 9.25317 19.85500 0 0 0 + 1334 445 2 0.4236 16.24217 9.66138 20.73572 0 0 0 + 1335 445 2 0.4236 15.43408 8.44423 20.00632 0 0 0 + 1336 446 1 -0.8472 0.16188 12.47022 34.77387 1 1 0 + 1337 446 2 0.4236 0.18865 12.21972 33.80616 1 1 0 + 1338 446 2 0.4236 1.08690 12.68233 35.08896 1 1 0 + 1339 447 1 -0.8472 10.87982 14.01780 18.67798 0 0 0 + 1340 447 2 0.4236 9.93453 14.26617 18.46646 0 0 0 + 1341 447 2 0.4236 11.22640 14.61264 19.40323 0 0 0 + 1342 448 1 -0.8472 10.15286 18.86839 10.84315 1 1 0 + 1343 448 2 0.4236 11.01993 18.43152 10.60388 1 1 0 + 1344 448 2 0.4236 9.39604 18.33981 10.45869 1 1 0 + 1345 449 1 -0.8472 27.86066 32.50244 21.02022 -1 0 0 + 1346 449 2 0.4236 27.45447 32.35015 20.11922 -1 0 0 + 1347 449 2 0.4236 28.30747 31.66339 21.33052 -1 0 0 + 1348 450 1 -0.8472 22.34922 4.76719 7.82422 0 1 0 + 1349 450 2 0.4236 23.25130 5.11188 7.56461 0 1 0 + 1350 450 2 0.4236 21.98578 5.31750 8.57588 0 1 0 + 1351 451 1 -0.8472 22.17357 31.29434 5.91224 0 -1 0 + 1352 451 2 0.4236 22.16509 30.44936 5.37759 0 -1 0 + 1353 451 2 0.4236 21.33321 31.35654 6.45064 0 -1 0 + 1354 452 1 -0.8472 30.13033 27.20667 12.21095 0 -1 0 + 1355 452 2 0.4236 30.05303 26.22797 12.02082 0 -1 0 + 1356 452 2 0.4236 31.05488 27.41186 12.53189 0 -1 0 + 1357 453 1 -0.8472 5.00071 18.67552 17.14077 0 1 0 + 1358 453 2 0.4236 4.29325 18.76947 17.84121 0 1 0 + 1359 453 2 0.4236 5.79076 19.23659 17.38766 0 1 0 + 1360 454 1 -0.8472 0.50513 15.22995 32.85253 0 -1 0 + 1361 454 2 0.4236 0.06619 14.95304 31.99779 0 -1 0 + 1362 454 2 0.4236 35.32108 15.30810 33.57172 -1 -1 0 + 1363 455 1 -0.8472 0.36336 21.48947 23.88549 1 -1 0 + 1364 455 2 0.4236 35.37583 21.67758 23.03656 0 -1 0 + 1365 455 2 0.4236 1.28632 21.17034 23.67043 1 -1 0 + 1366 456 1 -0.8472 8.03722 15.22335 26.84029 0 0 0 + 1367 456 2 0.4236 8.80880 15.78502 27.13887 0 0 0 + 1368 456 2 0.4236 7.19774 15.55391 27.27151 0 0 0 + 1369 457 1 -0.8472 34.68466 18.85053 10.89189 0 -1 0 + 1370 457 2 0.4236 0.02666 18.33065 10.79225 1 -1 0 + 1371 457 2 0.4236 34.83965 19.79809 10.61246 0 -1 0 + 1372 458 1 -0.8472 31.33961 22.90028 34.29431 0 0 0 + 1373 458 2 0.4236 32.01330 22.72538 33.57632 0 0 0 + 1374 458 2 0.4236 30.42440 22.68898 33.95121 0 0 0 + 1375 459 1 -0.8472 35.12721 20.42790 21.39380 -1 -1 0 + 1376 459 2 0.4236 35.36489 19.72054 22.05946 -1 -1 0 + 1377 459 2 0.4236 35.03195 20.01479 20.48816 -1 -1 0 + 1378 460 1 -0.8472 17.36832 12.96180 29.52925 0 0 0 + 1379 460 2 0.4236 17.38806 13.28729 28.58391 0 0 0 + 1380 460 2 0.4236 16.43280 12.70808 29.77490 0 0 0 + 1381 461 1 -0.8472 16.09606 4.35109 20.46593 0 1 0 + 1382 461 2 0.4236 16.85305 4.93396 20.17069 0 1 0 + 1383 461 2 0.4236 16.41625 3.72234 21.17452 0 1 0 + 1384 462 1 -0.8472 13.66607 33.47244 9.21024 0 -1 0 + 1385 462 2 0.4236 14.22920 33.27612 10.01291 0 -1 0 + 1386 462 2 0.4236 13.83657 32.78258 8.50669 0 -1 0 + 1387 463 1 -0.8472 25.16946 0.64360 29.23443 -1 1 0 + 1388 463 2 0.4236 24.70521 0.11676 29.94639 -1 1 0 + 1389 463 2 0.4236 25.62255 1.43400 29.64665 -1 1 0 +1390 464 1 -0.8472 24.81460 0.43732 0.69087 0 1 0 +1391 464 2 0.4236 25.28418 0.50225 1.57135 0 1 0 +1392 464 2 0.4236 25.37131 0.87260 35.43053 0 1 -1 + 1393 465 1 -0.8472 13.79635 23.63164 5.71664 1 0 0 + 1394 465 2 0.4236 13.80281 24.21398 4.90376 1 0 0 + 1395 465 2 0.4236 14.71198 23.60934 6.11797 1 0 0 + 1396 466 1 -0.8472 7.02681 26.64979 3.02480 0 0 0 + 1397 466 2 0.4236 7.59047 26.86453 3.82234 0 0 0 + 1398 466 2 0.4236 6.50426 25.81556 3.20083 0 0 0 + 1399 467 1 -0.8472 18.31755 10.37524 9.82001 0 -1 0 + 1400 467 2 0.4236 18.76024 10.64686 8.96548 0 -1 0 + 1401 467 2 0.4236 17.38714 10.74099 9.84429 0 -1 0 + 1402 468 1 -0.8472 22.32508 9.87101 1.19557 -2 1 0 + 1403 468 2 0.4236 21.56595 10.42438 0.85287 -2 1 0 + 1404 468 2 0.4236 21.96657 9.12144 1.75194 -2 1 0 + 1405 469 1 -0.8472 10.90773 22.43777 26.25786 0 -1 0 + 1406 469 2 0.4236 10.29364 22.43731 27.04706 0 -1 0 + 1407 469 2 0.4236 11.59943 21.72491 26.37335 0 -1 0 + 1408 470 1 -0.8472 1.31955 18.38600 3.47899 1 0 0 + 1409 470 2 0.4236 1.00358 17.95410 4.32374 1 0 0 + 1410 470 2 0.4236 1.03894 19.34578 3.47019 1 0 0 + 1411 471 1 -0.8472 6.61696 10.28455 21.35803 0 0 0 + 1412 471 2 0.4236 6.47810 10.37783 22.34389 0 0 0 + 1413 471 2 0.4236 6.98271 9.37573 21.15747 0 0 0 +1414 472 1 -0.8472 19.80189 1.74573 35.19601 0 0 0 +1415 472 2 0.4236 19.85469 1.68501 34.19927 0 0 0 +1416 472 2 0.4236 20.62333 2.19562 0.09930 0 0 1 + 1417 473 1 -0.8472 21.99977 23.12994 31.80630 0 0 0 + 1418 473 2 0.4236 21.08623 23.53105 31.73959 0 0 0 + 1419 473 2 0.4236 22.56169 23.68131 32.42287 0 0 0 + 1420 474 1 -0.8472 21.02839 21.40514 14.85156 0 0 0 + 1421 474 2 0.4236 20.68163 20.80512 15.57245 0 0 0 + 1422 474 2 0.4236 20.71991 21.06957 13.96151 0 0 0 + 1423 475 1 -0.8472 29.50475 13.09304 27.02263 -1 0 0 + 1424 475 2 0.4236 29.90288 12.69977 27.85135 -1 0 0 + 1425 475 2 0.4236 28.79000 13.74578 27.27360 -1 0 0 + 1426 476 1 -0.8472 30.76891 14.86132 17.31904 -1 0 0 + 1427 476 2 0.4236 30.43544 15.73971 17.66131 -1 0 0 + 1428 476 2 0.4236 29.99732 14.23769 17.19387 -1 0 0 + 1429 477 1 -0.8472 12.57397 32.15444 27.84788 0 0 0 + 1430 477 2 0.4236 12.58061 33.06419 27.43279 0 0 0 + 1431 477 2 0.4236 13.51414 31.83439 27.96447 0 0 0 + 1432 478 1 -0.8472 15.45030 20.13796 7.83369 0 0 0 + 1433 478 2 0.4236 16.08221 20.61942 7.22641 0 0 0 + 1434 478 2 0.4236 15.42577 19.16878 7.58860 0 0 0 + 1435 479 1 -0.8472 27.25616 6.25533 2.82631 0 1 0 + 1436 479 2 0.4236 27.49530 5.28459 2.80580 0 1 0 + 1437 479 2 0.4236 27.60427 6.66664 3.66868 0 1 0 + 1438 480 1 -0.8472 32.30705 10.32216 17.65366 0 0 0 + 1439 480 2 0.4236 32.98270 10.38259 16.91891 0 0 0 + 1440 480 2 0.4236 32.39596 9.44025 18.11653 0 0 0 + 1441 481 1 -0.8472 17.34936 14.89275 22.47125 0 0 0 + 1442 481 2 0.4236 18.03506 14.70169 21.76894 0 0 0 + 1443 481 2 0.4236 16.97988 15.81190 22.33499 0 0 0 + 1444 482 1 -0.8472 24.79053 15.57468 30.86227 0 1 0 + 1445 482 2 0.4236 23.87781 15.71532 31.24585 0 1 0 + 1446 482 2 0.4236 25.10405 14.65002 31.07836 0 1 0 + 1447 483 1 -0.8472 18.67769 24.83694 24.40387 1 -1 0 + 1448 483 2 0.4236 17.82202 25.24420 24.08463 1 -1 0 + 1449 483 2 0.4236 18.94221 24.09319 23.79003 1 -1 0 + 1450 484 1 -0.8472 4.57794 35.31046 30.69351 1 -1 0 + 1451 484 2 0.4236 4.13322 34.49598 30.32096 1 -1 0 + 1452 484 2 0.4236 5.54325 35.11397 30.86539 1 -1 0 + 1453 485 1 -0.8472 22.13661 7.03626 27.30735 0 0 0 + 1454 485 2 0.4236 21.73532 6.12073 27.28041 0 0 0 + 1455 485 2 0.4236 22.26947 7.36952 26.37395 0 0 0 + 1456 486 1 -0.8472 20.99558 25.62127 2.95624 0 0 0 + 1457 486 2 0.4236 21.65774 24.88498 2.81706 0 0 0 + 1458 486 2 0.4236 20.30686 25.59152 2.23185 0 0 0 + 1459 487 1 -0.8472 19.89257 8.75533 11.55086 -1 0 0 + 1460 487 2 0.4236 19.31087 9.22927 10.88984 -1 0 0 + 1461 487 2 0.4236 20.72914 9.28320 11.69740 -1 0 0 + 1462 488 1 -0.8472 35.31710 7.31729 4.49850 -1 1 0 + 1463 488 2 0.4236 34.87607 7.03125 3.64783 -1 1 0 + 1464 488 2 0.4236 0.48658 6.63223 4.77027 0 1 0 + 1465 489 1 -0.8472 1.93468 20.42936 32.73686 1 -1 0 + 1466 489 2 0.4236 1.67361 20.93781 31.91633 1 -1 0 + 1467 489 2 0.4236 1.21014 19.78158 32.97220 1 -1 0 + 1468 490 1 -0.8472 14.69430 26.31691 16.51917 0 0 0 + 1469 490 2 0.4236 14.31886 26.44843 17.43661 0 0 0 + 1470 490 2 0.4236 15.68539 26.19779 16.57854 0 0 0 + 1471 491 1 -0.8472 34.54384 29.00446 17.20468 -1 -1 0 + 1472 491 2 0.4236 0.02730 29.12415 17.12773 0 -1 0 + 1473 491 2 0.4236 34.29128 28.09553 16.87304 -1 -1 0 + 1474 492 1 -0.8472 19.16846 26.02402 7.57209 0 -1 0 + 1475 492 2 0.4236 19.48670 25.39677 6.86130 0 -1 0 + 1476 492 2 0.4236 19.88068 26.69918 7.76403 0 -1 0 +1477 493 1 -0.8472 5.88702 11.59786 0.07425 0 1 0 +1478 493 2 0.4236 6.66923 11.61288 34.89865 0 1 -1 +1479 493 2 0.4236 5.13610 12.12691 35.12622 0 1 -1 + 1480 494 1 -0.8472 33.54430 10.18553 28.65200 -1 1 0 + 1481 494 2 0.4236 33.04807 10.13149 27.78556 -1 1 0 + 1482 494 2 0.4236 32.90409 10.06601 29.41082 -1 1 0 + 1483 495 1 -0.8472 12.83483 20.75939 27.34453 0 -1 0 + 1484 495 2 0.4236 12.07365 20.74291 27.99282 0 -1 0 + 1485 495 2 0.4236 13.69810 20.81986 27.84554 0 -1 0 + 1486 496 1 -0.8472 30.94510 23.53453 7.01751 -1 0 0 + 1487 496 2 0.4236 30.81962 23.35913 7.99394 -1 0 0 + 1488 496 2 0.4236 30.83904 22.68007 6.50902 -1 0 0 + 1489 497 1 -0.8472 22.37756 28.64677 11.27235 0 0 0 + 1490 497 2 0.4236 23.14439 29.15394 11.66563 0 0 0 + 1491 497 2 0.4236 22.12411 29.05339 10.39464 0 0 0 + 1492 498 1 -0.8472 23.97854 11.36306 13.98019 0 0 0 + 1493 498 2 0.4236 24.09429 11.95722 14.77616 0 0 0 + 1494 498 2 0.4236 23.63647 11.90091 13.20971 0 0 0 + 1495 499 1 -0.8472 19.09521 19.82511 30.39699 1 -1 0 + 1496 499 2 0.4236 19.60036 19.35866 29.67089 1 -1 0 + 1497 499 2 0.4236 19.30682 19.39935 31.27672 1 -1 0 + 1498 500 1 -0.8472 35.19922 30.25080 30.67423 -1 0 0 + 1499 500 2 0.4236 35.11968 30.98319 31.35042 -1 0 0 + 1500 500 2 0.4236 0.64477 29.94936 30.61978 0 0 0 + 1501 501 1 -0.8472 3.50017 19.44054 27.89680 1 -1 0 + 1502 501 2 0.4236 2.76383 19.17197 28.51776 1 -1 0 + 1503 501 2 0.4236 4.10894 20.08431 28.36038 1 -1 0 + 1504 502 1 -0.8472 8.60624 18.32181 15.04127 1 0 0 + 1505 502 2 0.4236 9.52518 17.98407 14.83766 1 0 0 + 1506 502 2 0.4236 8.43680 18.25484 16.02450 1 0 0 + 1507 503 1 -0.8472 15.08821 12.02710 30.35400 0 0 0 + 1508 503 2 0.4236 14.28360 12.09424 30.94394 0 0 0 + 1509 503 2 0.4236 15.19533 11.08291 30.04259 0 0 0 + 1510 504 1 -0.8472 34.24491 30.19634 12.70342 -1 0 0 + 1511 504 2 0.4236 34.09586 30.85828 13.43799 -1 0 0 + 1512 504 2 0.4236 35.13854 29.76330 12.82128 -1 0 0 + 1513 505 1 -0.8472 16.03697 11.37886 34.59742 -1 0 0 + 1514 505 2 0.4236 15.95641 12.21333 34.05231 -1 0 0 + 1515 505 2 0.4236 16.98162 11.05206 34.56895 -1 0 0 + 1516 506 1 -0.8472 12.41504 19.77380 32.35963 0 -1 0 + 1517 506 2 0.4236 11.71571 19.77015 33.07439 0 -1 0 + 1518 506 2 0.4236 12.91731 20.63796 32.38916 0 -1 0 + 1519 507 1 -0.8472 16.36097 1.43366 13.60126 0 0 0 + 1520 507 2 0.4236 16.72807 2.34873 13.76804 0 0 0 + 1521 507 2 0.4236 15.44831 1.50881 13.19951 0 0 0 + 1522 508 1 -0.8472 3.93183 21.98716 35.05791 0 0 0 + 1523 508 2 0.4236 3.31515 22.73755 34.82010 0 0 0 + 1524 508 2 0.4236 3.41155 21.13561 35.12164 0 0 0 + 1525 509 1 -0.8472 3.71439 25.49750 16.61339 1 -1 0 + 1526 509 2 0.4236 3.62537 25.26586 17.58209 1 -1 0 + 1527 509 2 0.4236 2.80596 25.55275 16.19905 1 -1 0 + 1528 510 1 -0.8472 12.49813 4.72433 6.90178 1 1 0 + 1529 510 2 0.4236 12.68152 4.46705 5.95300 1 1 0 + 1530 510 2 0.4236 11.92587 5.54417 6.92011 1 1 0 + 1531 511 1 -0.8472 0.21426 15.49408 12.45830 0 1 0 + 1532 511 2 0.4236 0.76877 15.57901 11.63051 0 1 0 + 1533 511 2 0.4236 0.62502 14.81481 13.06644 0 1 0 + 1534 512 1 -0.8472 18.25090 16.03637 12.30365 0 -1 0 + 1535 512 2 0.4236 18.61740 15.18248 12.67308 0 -1 0 + 1536 512 2 0.4236 17.51187 16.36413 12.89214 0 -1 0 + 1537 513 1 -0.8472 26.90199 18.56808 6.14062 -1 0 0 + 1538 513 2 0.4236 27.18156 17.80093 6.71792 -1 0 0 + 1539 513 2 0.4236 26.92006 18.28708 5.18110 -1 0 0 + 1540 514 1 -0.8472 33.44158 4.98761 23.03192 0 0 0 + 1541 514 2 0.4236 32.49417 4.84516 23.31844 0 0 0 + 1542 514 2 0.4236 33.92571 4.11266 23.03447 0 0 0 + 1543 515 1 -0.8472 26.44129 7.53555 13.32226 1 0 0 + 1544 515 2 0.4236 26.54376 8.50657 13.53799 1 0 0 + 1545 515 2 0.4236 25.48592 7.34289 13.09834 1 0 0 + 1546 516 1 -0.8472 3.43874 25.26348 19.52892 1 0 0 + 1547 516 2 0.4236 4.41198 25.04174 19.46898 1 0 0 + 1548 516 2 0.4236 3.04847 24.83353 20.34304 1 0 0 + 1549 517 1 -0.8472 31.86517 26.77264 18.86319 0 0 0 + 1550 517 2 0.4236 31.00166 26.65635 19.35388 0 0 0 + 1551 517 2 0.4236 31.70046 26.70873 17.87892 0 0 0 + 1552 518 1 -0.8472 27.98216 0.07525 33.15709 -1 1 0 + 1553 518 2 0.4236 27.13107 0.52974 33.41972 -1 1 0 + 1554 518 2 0.4236 27.92184 35.28628 32.20363 -1 0 0 + 1555 519 1 -0.8472 12.50129 14.38728 2.64535 0 0 0 + 1556 519 2 0.4236 13.42438 14.05242 2.45638 0 0 0 + 1557 519 2 0.4236 12.09868 14.74756 1.80389 0 0 0 + 1558 520 1 -0.8472 35.34916 8.99351 30.23560 -1 0 0 + 1559 520 2 0.4236 34.73199 9.60837 29.74473 -1 0 0 + 1560 520 2 0.4236 35.06089 8.04691 30.09137 -1 0 0 + 1561 521 1 -0.8472 7.94220 25.31780 7.96523 0 0 0 + 1562 521 2 0.4236 8.06154 26.26412 8.26558 0 0 0 + 1563 521 2 0.4236 8.26247 24.69786 8.68151 0 0 0 + 1564 522 1 -0.8472 32.03791 28.47932 8.56656 -1 -1 0 + 1565 522 2 0.4236 31.56358 27.84276 7.95846 -1 -1 0 + 1566 522 2 0.4236 31.37036 29.07938 9.00727 -1 -1 0 + 1567 523 1 -0.8472 7.65336 14.67403 31.36041 0 0 0 + 1568 523 2 0.4236 8.40726 14.08956 31.06046 0 0 0 + 1569 523 2 0.4236 7.62694 15.49955 30.79666 0 0 0 + 1570 524 1 -0.8472 30.65441 5.02976 23.14808 0 0 0 + 1571 524 2 0.4236 30.30747 5.93609 23.38933 0 0 0 + 1572 524 2 0.4236 30.83112 4.99145 22.16460 0 0 0 + 1573 525 1 -0.8472 16.76986 18.16132 2.61590 1 0 0 + 1574 525 2 0.4236 17.48560 17.79983 2.01843 1 0 0 + 1575 525 2 0.4236 15.87769 17.83789 2.30066 1 0 0 + 1576 526 1 -0.8472 24.48759 17.42694 17.19380 0 0 0 + 1577 526 2 0.4236 23.53748 17.31084 17.48316 0 0 0 + 1578 526 2 0.4236 24.53815 17.39594 16.19557 0 0 0 + 1579 527 1 -0.8472 26.31972 10.56189 19.05712 0 0 0 + 1580 527 2 0.4236 25.49704 11.05068 19.34736 0 0 0 + 1581 527 2 0.4236 27.11059 11.16987 19.12656 0 0 0 + 1582 528 1 -0.8472 17.38870 17.66427 5.36716 0 0 0 + 1583 528 2 0.4236 17.21689 17.87159 4.40414 0 0 0 + 1584 528 2 0.4236 16.58704 17.91430 5.91011 0 0 0 + 1585 529 1 -0.8472 17.48244 20.84375 3.33225 0 1 0 + 1586 529 2 0.4236 17.37372 19.87819 3.09591 0 1 0 + 1587 529 2 0.4236 16.89165 21.39879 2.74671 0 1 0 +1588 530 1 -0.8472 30.70458 8.90567 0.58378 -1 0 0 +1589 530 2 0.4236 31.24449 8.89895 1.42547 -1 0 0 +1590 530 2 0.4236 31.26259 9.25839 35.27988 -1 0 -1 + 1591 531 1 -0.8472 17.24589 9.51645 26.97577 0 0 0 + 1592 531 2 0.4236 18.13025 9.65560 27.42124 0 0 0 + 1593 531 2 0.4236 17.05643 10.28431 26.36385 0 0 0 + 1594 532 1 -0.8472 26.71341 8.00338 33.00835 0 0 0 + 1595 532 2 0.4236 26.42681 7.26893 33.62347 0 0 0 + 1596 532 2 0.4236 26.31855 7.85233 32.10215 0 0 0 + 1597 533 1 -0.8472 2.78325 32.00510 13.26572 0 0 0 + 1598 533 2 0.4236 2.91507 31.36776 12.50655 0 0 0 + 1599 533 2 0.4236 1.96186 31.75034 13.77598 0 0 0 + 1600 534 1 -0.8472 7.54073 16.08292 21.84320 0 0 0 + 1601 534 2 0.4236 8.23534 16.76123 22.08271 0 0 0 + 1602 534 2 0.4236 6.93634 15.93418 22.62580 0 0 0 + 1603 535 1 -0.8472 3.58628 2.68780 31.31068 0 0 0 + 1604 535 2 0.4236 3.76967 1.76383 30.97517 0 0 0 + 1605 535 2 0.4236 3.86213 2.75509 32.26949 0 0 0 + 1606 536 1 -0.8472 6.00583 6.06839 35.24653 0 0 0 + 1607 536 2 0.4236 6.91369 6.48023 35.16836 0 0 0 + 1608 536 2 0.4236 5.68158 5.80167 34.33894 0 0 0 + 1609 537 1 -0.8472 10.80036 12.61686 27.53722 0 0 0 + 1610 537 2 0.4236 9.97750 12.15361 27.86624 0 0 0 + 1611 537 2 0.4236 10.98382 13.41653 28.10891 0 0 0 + 1612 538 1 -0.8472 5.91944 34.41204 7.66308 0 0 0 + 1613 538 2 0.4236 6.14214 34.84256 8.53770 0 0 0 + 1614 538 2 0.4236 6.75284 34.04363 7.25115 0 0 0 + 1615 539 1 -0.8472 33.12021 4.34118 33.43145 -1 1 0 + 1616 539 2 0.4236 32.94883 5.26438 33.08754 -1 1 0 + 1617 539 2 0.4236 33.99253 4.00863 33.07312 -1 1 0 + 1618 540 1 -0.8472 4.74684 32.25578 32.81988 0 0 0 + 1619 540 2 0.4236 5.05074 32.39468 31.87738 0 0 0 + 1620 540 2 0.4236 4.52930 33.13978 33.23362 0 0 0 + 1621 541 1 -0.8472 16.34368 0.38986 4.83953 0 0 0 + 1622 541 2 0.4236 16.19150 34.99317 4.43792 0 -1 0 + 1623 541 2 0.4236 17.20531 0.76550 4.49828 0 0 0 + 1624 542 1 -0.8472 29.76131 18.49380 11.69187 0 -1 0 + 1625 542 2 0.4236 29.61478 19.43603 11.99302 0 -1 0 + 1626 542 2 0.4236 29.83147 17.89584 12.49028 0 -1 0 + 1627 543 1 -0.8472 6.35805 20.23986 32.10952 1 0 0 + 1628 543 2 0.4236 5.57858 19.70794 32.44031 1 0 0 + 1629 543 2 0.4236 6.41383 21.09997 32.61649 1 0 0 + 1630 544 1 -0.8472 32.38857 33.76647 35.35730 0 -2 0 + 1631 544 2 0.4236 33.17635 34.34873 35.15652 0 -2 0 + 1632 544 2 0.4236 31.56884 34.33269 35.44322 0 -2 0 + 1633 545 1 -0.8472 33.53486 26.82508 15.67907 0 0 0 + 1634 545 2 0.4236 33.37107 27.59386 15.06094 0 0 0 + 1635 545 2 0.4236 33.49807 25.96909 15.16346 0 0 0 + 1636 546 1 -0.8472 10.26907 4.84569 26.02852 1 -1 0 + 1637 546 2 0.4236 9.72405 4.17803 26.53559 1 -1 0 + 1638 546 2 0.4236 9.95023 4.88603 25.08162 1 -1 0 + 1639 547 1 -0.8472 21.28403 12.99635 30.41007 0 0 0 + 1640 547 2 0.4236 20.58214 12.32606 30.65094 0 0 0 + 1641 547 2 0.4236 22.18941 12.60184 30.56679 0 0 0 + 1642 548 1 -0.8472 15.45481 8.34949 23.37232 0 1 0 + 1643 548 2 0.4236 15.34211 7.85035 22.51318 0 1 0 + 1644 548 2 0.4236 15.95691 9.19731 23.20178 0 1 0 + 1645 549 1 -0.8472 6.96958 34.41552 28.25021 0 0 0 + 1646 549 2 0.4236 7.79222 34.25461 28.79553 0 0 0 + 1647 549 2 0.4236 6.60968 35.32665 28.45087 0 0 0 + 1648 550 1 -0.8472 1.81120 22.89329 2.63042 1 0 0 + 1649 550 2 0.4236 1.51110 22.86467 1.67697 1 0 0 + 1650 550 2 0.4236 2.73105 23.28247 2.67879 1 0 0 + 1651 551 1 -0.8472 33.27708 8.93368 34.77738 -1 1 0 + 1652 551 2 0.4236 33.57818 8.11473 34.28892 -1 1 0 + 1653 551 2 0.4236 33.98756 9.21787 35.42114 -1 1 0 + 1654 552 1 -0.8472 11.27199 30.33037 3.53330 0 0 0 + 1655 552 2 0.4236 10.35333 30.48973 3.17189 0 0 0 + 1656 552 2 0.4236 11.25036 29.56038 4.17096 0 0 0 + 1657 553 1 -0.8472 24.96636 2.12275 18.90257 0 0 0 + 1658 553 2 0.4236 25.44410 2.99883 18.83795 0 0 0 + 1659 553 2 0.4236 25.45123 1.52498 19.54094 0 0 0 + 1660 554 1 -0.8472 27.58019 12.91742 22.80616 0 0 0 + 1661 554 2 0.4236 28.29124 13.45090 22.34814 0 0 0 + 1662 554 2 0.4236 26.90451 13.53665 23.20614 0 0 0 + 1663 555 1 -0.8472 34.68260 21.43958 8.60959 -1 0 0 + 1664 555 2 0.4236 35.08744 21.14186 7.74503 -1 0 0 + 1665 555 2 0.4236 35.34813 21.32323 9.34679 -1 0 0 + 1666 556 1 -0.8472 2.65962 24.63473 34.38402 0 0 0 + 1667 556 2 0.4236 2.57897 25.62078 34.23869 0 0 0 + 1668 556 2 0.4236 1.76410 24.25847 34.62159 0 0 0 + 1669 557 1 -0.8472 21.59833 20.27093 9.43207 0 0 0 + 1670 557 2 0.4236 21.32327 19.78101 10.25927 0 0 0 + 1671 557 2 0.4236 21.09950 19.90496 8.64649 0 0 0 + 1672 558 1 -0.8472 17.70144 4.33007 28.81474 0 0 0 + 1673 558 2 0.4236 18.33363 5.07425 28.59897 0 0 0 + 1674 558 2 0.4236 17.03534 4.23363 28.07517 0 0 0 +1675 559 1 -0.8472 33.59561 13.39591 0.69504 0 0 0 +1676 559 2 0.4236 32.66438 13.04141 0.61064 0 0 0 +1677 559 2 0.4236 34.13957 13.09357 35.35950 0 0 -1 + 1678 560 1 -0.8472 14.71291 15.98052 26.96475 1 0 0 + 1679 560 2 0.4236 14.68940 15.38014 27.76408 1 0 0 + 1680 560 2 0.4236 15.50516 16.58746 27.02696 1 0 0 + 1681 561 1 -0.8472 25.20926 21.95638 32.28218 0 0 0 + 1682 561 2 0.4236 25.33609 22.94246 32.17492 0 0 0 + 1683 561 2 0.4236 25.09606 21.73837 33.25152 0 0 0 + 1684 562 1 -0.8472 6.81526 3.83312 8.73036 0 1 0 + 1685 562 2 0.4236 7.62105 3.29226 8.48921 0 1 0 + 1686 562 2 0.4236 6.08089 3.22287 9.02737 0 1 0 + 1687 563 1 -0.8472 9.21702 7.78445 24.95624 0 0 0 + 1688 563 2 0.4236 9.47416 7.18840 25.71687 0 0 0 + 1689 563 2 0.4236 10.02681 8.26676 24.62226 0 0 0 + 1690 564 1 -0.8472 25.76586 24.56681 31.82123 0 0 0 + 1691 564 2 0.4236 25.20099 25.37507 31.65516 0 0 0 + 1692 564 2 0.4236 26.09305 24.20382 30.94877 0 0 0 + 1693 565 1 -0.8472 15.97743 35.01757 18.88740 0 0 0 + 1694 565 2 0.4236 15.03871 35.06053 19.22932 0 0 0 + 1695 565 2 0.4236 16.00030 35.32615 17.93653 0 0 0 + 1696 566 1 -0.8472 13.18216 8.82755 28.56636 0 1 0 + 1697 566 2 0.4236 12.35671 9.24546 28.94568 0 1 0 + 1698 566 2 0.4236 12.95644 7.93180 28.18339 0 1 0 + 1699 567 1 -0.8472 1.80194 1.01226 14.21230 0 1 0 + 1700 567 2 0.4236 2.18788 0.27894 14.77198 0 1 0 + 1701 567 2 0.4236 1.33221 0.61709 13.42293 0 1 0 + 1702 568 1 -0.8472 27.20550 9.56978 16.51573 -1 1 0 + 1703 568 2 0.4236 28.12206 9.17580 16.44728 -1 1 0 + 1704 568 2 0.4236 27.04703 9.88443 17.45157 -1 1 0 + 1705 569 1 -0.8472 22.64424 27.88338 17.70052 0 -1 0 + 1706 569 2 0.4236 23.43078 27.61638 17.14368 0 -1 0 + 1707 569 2 0.4236 22.38629 28.82476 17.48330 0 -1 0 + 1708 570 1 -0.8472 35.02158 17.75754 30.83771 0 0 0 + 1709 570 2 0.4236 35.30041 16.79972 30.90651 0 0 0 + 1710 570 2 0.4236 34.14333 17.81577 30.36307 0 0 0 + 1711 571 1 -0.8472 32.15586 2.34760 15.77797 0 1 0 + 1712 571 2 0.4236 31.26558 2.78947 15.88800 0 1 0 + 1713 571 2 0.4236 32.68395 2.83108 15.07987 0 1 0 +1714 572 1 -0.8472 17.34961 33.61287 35.05546 0 -1 0 +1715 572 2 0.4236 17.10728 34.46054 34.58359 0 -1 0 +1716 572 2 0.4236 18.19587 33.74839 0.12349 0 -1 1 + 1717 573 1 -0.8472 1.64640 29.18838 17.08087 0 0 0 + 1718 573 2 0.4236 2.48391 28.77481 16.72381 0 0 0 + 1719 573 2 0.4236 1.56160 28.98120 18.05545 0 0 0 + 1720 574 1 -0.8472 19.94612 29.25446 0.27014 0 0 0 + 1721 574 2 0.4236 19.17911 29.86414 0.07048 0 0 0 + 1722 574 2 0.4236 20.43600 29.58787 1.07563 0 0 0 + 1723 575 1 -0.8472 27.87603 8.50157 9.36777 -1 1 0 + 1724 575 2 0.4236 28.70979 7.95495 9.29035 -1 1 0 + 1725 575 2 0.4236 28.11733 9.45097 9.56858 -1 1 0 + 1726 576 1 -0.8472 9.83071 27.74860 10.85315 0 -1 0 + 1727 576 2 0.4236 9.99678 28.63555 11.28401 0 -1 0 + 1728 576 2 0.4236 10.41174 27.65689 10.04446 0 -1 0 + 1729 577 1 -0.8472 26.30076 18.46106 21.29522 0 0 0 + 1730 577 2 0.4236 27.23387 18.82014 21.31372 0 0 0 + 1731 577 2 0.4236 25.65252 19.21035 21.43056 0 0 0 + 1732 578 1 -0.8472 14.19228 25.99075 24.74448 0 0 0 + 1733 578 2 0.4236 13.37034 26.52353 24.94578 0 0 0 + 1734 578 2 0.4236 13.97910 25.01532 24.79992 0 0 0 + 1735 579 1 -0.8472 0.68976 8.97160 18.20142 0 0 0 + 1736 579 2 0.4236 1.50615 8.39601 18.15506 0 0 0 + 1737 579 2 0.4236 0.62225 9.38166 19.11096 0 0 0 + 1738 580 1 -0.8472 32.76423 11.76137 33.24131 0 0 0 + 1739 580 2 0.4236 33.62352 11.28392 33.05802 0 0 0 + 1740 580 2 0.4236 32.41726 11.49035 34.13912 0 0 0 + 1741 581 1 -0.8472 11.58373 29.93909 6.86949 0 0 0 + 1742 581 2 0.4236 10.77287 30.52328 6.83510 0 0 0 + 1743 581 2 0.4236 12.37620 30.48458 7.14222 0 0 0 + 1744 582 1 -0.8472 33.74122 13.45829 4.64075 -1 0 0 + 1745 582 2 0.4236 33.22265 14.06928 5.23885 -1 0 0 + 1746 582 2 0.4236 34.30148 14.00090 4.01492 -1 0 0 + 1747 583 1 -0.8472 20.83466 4.12619 19.87995 0 0 0 + 1748 583 2 0.4236 20.90699 4.49091 20.80821 0 0 0 + 1749 583 2 0.4236 21.52331 4.55806 19.29756 0 0 0 + 1750 584 1 -0.8472 25.28194 8.95363 23.55618 -1 0 0 + 1751 584 2 0.4236 25.07171 8.56157 22.66061 -1 0 0 + 1752 584 2 0.4236 26.10167 9.52213 23.48703 -1 0 0 + 1753 585 1 -0.8472 16.33729 1.45574 7.44614 0 0 0 + 1754 585 2 0.4236 15.53044 1.08269 7.90423 0 0 0 + 1755 585 2 0.4236 16.36871 1.12364 6.50346 0 0 0 + 1756 586 1 -0.8472 8.46319 10.81445 34.53439 0 0 0 + 1757 586 2 0.4236 8.54521 10.89899 33.54137 0 0 0 + 1758 586 2 0.4236 9.27543 11.20225 34.97011 0 0 0 + 1759 587 1 -0.8472 5.31409 12.69501 25.27016 0 1 0 + 1760 587 2 0.4236 5.23857 12.42388 26.22972 0 1 0 + 1761 587 2 0.4236 6.23790 13.03342 25.09115 0 1 0 +1762 588 1 -0.8472 28.51799 6.14980 35.24103 0 0 0 +1763 588 2 0.4236 28.05275 6.99734 0.04908 0 0 1 +1764 588 2 0.4236 29.45447 6.16511 0.14421 0 0 1 + 1765 589 1 -0.8472 7.79048 0.69800 16.22464 0 0 0 + 1766 589 2 0.4236 8.07389 0.82781 17.17477 0 0 0 + 1767 589 2 0.4236 8.56353 0.88323 15.61796 0 0 0 + 1768 590 1 -0.8472 21.68103 21.80441 29.46721 0 0 0 + 1769 590 2 0.4236 21.56755 21.92180 30.45375 0 0 0 + 1770 590 2 0.4236 22.63704 21.59233 29.26456 0 0 0 + 1771 591 1 -0.8472 20.54184 13.51645 22.88712 -1 0 0 + 1772 591 2 0.4236 20.07263 13.38529 23.76036 -1 0 0 + 1773 591 2 0.4236 21.43912 13.92704 23.04922 -1 0 0 + 1774 592 1 -0.8472 13.50917 24.41399 28.85132 0 -1 0 + 1775 592 2 0.4236 13.78505 25.37319 28.78968 0 -1 0 + 1776 592 2 0.4236 12.70570 24.26063 28.27612 0 -1 0 + 1777 593 1 -0.8472 10.69717 24.26781 31.92530 0 0 0 + 1778 593 2 0.4236 10.60386 23.69449 32.73927 0 0 0 + 1779 593 2 0.4236 11.64253 24.58417 31.84703 0 0 0 + 1780 594 1 -0.8472 15.95641 28.97606 35.05433 0 0 0 + 1781 594 2 0.4236 15.96032 28.35359 34.27173 0 0 0 + 1782 594 2 0.4236 16.63849 29.69350 34.91293 0 0 0 + 1783 595 1 -0.8472 1.56519 0.69624 33.04969 1 0 0 + 1784 595 2 0.4236 2.41404 1.17829 33.26655 1 0 0 + 1785 595 2 0.4236 1.02145 0.58791 33.88191 1 0 0 + 1786 596 1 -0.8472 26.14808 0.15636 10.03297 -1 0 0 + 1787 596 2 0.4236 25.35902 0.37673 9.45958 -1 0 0 + 1788 596 2 0.4236 26.26507 34.67035 10.07186 -1 -1 0 + 1789 597 1 -0.8472 9.77710 17.72509 3.30862 0 0 0 + 1790 597 2 0.4236 9.87566 17.93734 4.28081 0 0 0 + 1791 597 2 0.4236 9.23151 16.89378 3.20261 0 0 0 + 1792 598 1 -0.8472 8.75437 5.30451 3.09848 0 0 0 + 1793 598 2 0.4236 8.58717 5.03388 4.04651 0 0 0 + 1794 598 2 0.4236 9.12882 6.23147 3.07757 0 0 0 + 1795 599 1 -0.8472 20.48155 15.91696 9.33594 0 1 0 + 1796 599 2 0.4236 20.56631 15.50253 8.42984 0 1 0 + 1797 599 2 0.4236 20.25513 15.21003 10.00593 0 1 0 + 1798 600 1 -0.8472 16.82592 18.58235 26.17211 0 0 0 + 1799 600 2 0.4236 16.58729 18.45481 27.13479 0 0 0 + 1800 600 2 0.4236 17.21880 17.73656 25.81123 0 0 0 + 1801 601 1 -0.8472 13.75988 16.98609 22.32067 0 0 0 + 1802 601 2 0.4236 13.58551 17.90045 21.95528 0 0 0 + 1803 601 2 0.4236 13.59282 16.98375 23.30660 0 0 0 + 1804 602 1 -0.8472 32.62092 30.40547 6.88962 -1 -1 0 + 1805 602 2 0.4236 33.54931 30.43647 6.51938 -1 -1 0 + 1806 602 2 0.4236 32.51760 29.59252 7.46270 -1 -1 0 + 1807 603 1 -0.8472 17.63259 14.60870 27.18186 0 0 0 + 1808 603 2 0.4236 17.90163 15.20858 27.93536 0 0 0 + 1809 603 2 0.4236 17.69519 15.10944 26.31855 0 0 0 + 1810 604 1 -0.8472 11.62304 21.25939 16.47149 0 -1 0 + 1811 604 2 0.4236 11.45555 21.34865 15.48971 0 -1 0 + 1812 604 2 0.4236 12.53275 21.61367 16.68797 0 -1 0 + 1813 605 1 -0.8472 1.37600 22.27800 20.75203 1 -1 0 + 1814 605 2 0.4236 2.16943 21.85760 20.31195 1 -1 0 + 1815 605 2 0.4236 0.71462 21.56840 20.99487 1 -1 0 + 1816 606 1 -0.8472 7.18336 3.26655 24.36441 0 0 0 + 1817 606 2 0.4236 7.73090 3.12923 23.53899 0 0 0 + 1818 606 2 0.4236 6.93062 2.37846 24.74831 0 0 0 + 1819 607 1 -0.8472 17.64928 11.95310 5.42292 0 1 0 + 1820 607 2 0.4236 17.65692 12.93297 5.62238 0 1 0 + 1821 607 2 0.4236 17.41402 11.81051 4.46152 0 1 0 + 1822 608 1 -0.8472 12.84893 11.66550 32.14876 0 0 0 + 1823 608 2 0.4236 12.95939 10.67321 32.09304 0 0 0 + 1824 608 2 0.4236 12.82748 11.94512 33.10860 0 0 0 + 1825 609 1 -0.8472 28.12575 28.57384 6.21194 0 0 0 + 1826 609 2 0.4236 28.72740 27.81667 6.46617 0 0 0 + 1827 609 2 0.4236 28.67723 29.34738 5.89972 0 0 0 + 1828 610 1 -0.8472 9.60709 10.13056 3.09602 1 -1 0 + 1829 610 2 0.4236 9.17047 10.54496 3.89451 1 -1 0 + 1830 610 2 0.4236 8.91309 9.92399 2.40636 1 -1 0 + 1831 611 1 -0.8472 14.42399 9.53487 14.30475 0 1 0 + 1832 611 2 0.4236 13.60791 9.60745 14.87804 0 1 0 + 1833 611 2 0.4236 14.17935 9.71599 13.35221 0 1 0 + 1834 612 1 -0.8472 6.04272 31.14841 20.46104 0 0 0 + 1835 612 2 0.4236 6.17572 30.16410 20.34531 0 0 0 + 1836 612 2 0.4236 5.07705 31.33535 20.64129 0 0 0 + 1837 613 1 -0.8472 8.78576 13.58167 21.88541 0 1 0 + 1838 613 2 0.4236 8.51872 14.54379 21.93947 0 1 0 + 1839 613 2 0.4236 8.23602 13.12187 21.18806 0 1 0 + 1840 614 1 -0.8472 23.47264 35.27453 4.51530 0 -1 0 + 1841 614 2 0.4236 22.72332 35.48142 5.14431 0 -1 0 + 1842 614 2 0.4236 24.07799 34.59518 4.92999 0 -1 0 + 1843 615 1 -0.8472 22.09421 26.54415 21.45579 0 -1 0 + 1844 615 2 0.4236 22.63260 26.42361 22.28983 0 -1 0 + 1845 615 2 0.4236 22.48660 27.28439 20.90987 0 -1 0 + 1846 616 1 -0.8472 1.89433 19.06162 16.66270 0 0 0 + 1847 616 2 0.4236 1.86443 20.00623 16.33594 0 0 0 + 1848 616 2 0.4236 2.02229 19.05590 17.65444 0 0 0 + 1849 617 1 -0.8472 23.96341 29.62000 2.08747 0 -1 0 + 1850 617 2 0.4236 23.99764 29.18700 1.18675 0 -1 0 + 1851 617 2 0.4236 23.53567 30.52040 2.00840 0 -1 0 + 1852 618 1 -0.8472 1.04699 21.68909 30.59706 1 0 0 + 1853 618 2 0.4236 0.06322 21.75332 30.42968 1 0 0 + 1854 618 2 0.4236 1.52221 21.52537 29.73261 1 0 0 + 1855 619 1 -0.8472 13.73343 18.77132 16.05091 0 0 0 + 1856 619 2 0.4236 14.06321 19.15745 16.91235 0 0 0 + 1857 619 2 0.4236 13.14349 17.98673 16.24155 0 0 0 + 1858 620 1 -0.8472 15.02561 6.07506 33.93037 -1 0 0 + 1859 620 2 0.4236 14.69203 5.58795 33.12331 -1 0 0 + 1860 620 2 0.4236 16.00179 5.89373 34.04930 -1 0 0 + 1861 621 1 -0.8472 31.82004 4.72525 13.93145 -1 0 0 + 1862 621 2 0.4236 32.80133 4.70708 14.12290 -1 0 0 + 1863 621 2 0.4236 31.64404 4.28540 13.05084 -1 0 0 + 1864 622 1 -0.8472 24.24718 18.83076 7.32403 0 -1 0 + 1865 622 2 0.4236 24.38484 18.48542 8.25232 0 -1 0 + 1866 622 2 0.4236 25.05566 18.63064 6.77066 0 -1 0 + 1867 623 1 -0.8472 30.09284 6.70704 9.34689 0 1 0 + 1868 623 2 0.4236 30.69726 6.91436 10.11606 0 1 0 + 1869 623 2 0.4236 30.28333 5.78402 9.01266 0 1 0 + 1870 624 1 -0.8472 6.44543 22.20950 16.52672 1 0 0 + 1871 624 2 0.4236 6.63187 21.34098 16.98587 1 0 0 + 1872 624 2 0.4236 6.77340 22.16484 15.58312 1 0 0 + 1873 625 1 -0.8472 26.53478 4.89718 31.49008 0 1 0 + 1874 625 2 0.4236 26.41051 4.03196 31.00435 0 1 0 + 1875 625 2 0.4236 26.22715 5.65186 30.91066 0 1 0 + 1876 626 1 -0.8472 16.62247 30.95933 12.57652 1 0 0 + 1877 626 2 0.4236 17.56488 31.25972 12.42949 1 0 0 + 1878 626 2 0.4236 16.48769 30.74633 13.54423 1 0 0 + 1879 627 1 -0.8472 4.87356 34.61588 34.44684 1 0 0 + 1880 627 2 0.4236 5.82893 34.71196 34.72609 1 0 0 + 1881 627 2 0.4236 4.56619 35.46296 34.01332 1 0 0 + 1882 628 1 -0.8472 2.50288 12.28157 26.08722 0 0 0 + 1883 628 2 0.4236 3.13155 12.43414 25.32471 0 0 0 + 1884 628 2 0.4236 2.99882 11.86718 26.85029 0 0 0 + 1885 629 1 -0.8472 11.00864 34.29008 8.17472 1 0 0 + 1886 629 2 0.4236 10.75288 34.35239 9.13942 1 0 0 + 1887 629 2 0.4236 11.92072 33.88754 8.09702 1 0 0 + 1888 630 1 -0.8472 10.46049 16.76421 26.41355 1 0 0 + 1889 630 2 0.4236 10.39178 17.07209 27.36247 1 0 0 + 1890 630 2 0.4236 11.33817 17.05388 26.03185 1 0 0 + 1891 631 1 -0.8472 17.21021 4.69788 34.67997 0 0 0 + 1892 631 2 0.4236 18.05686 4.48907 34.19055 0 0 0 + 1893 631 2 0.4236 16.82710 3.85507 35.05788 0 0 0 + 1894 632 1 -0.8472 6.97884 35.13516 32.26371 1 0 0 + 1895 632 2 0.4236 7.59758 34.77477 32.96174 1 0 0 + 1896 632 2 0.4236 6.61568 0.51094 32.56360 1 1 0 + 1897 633 1 -0.8472 30.26859 4.69039 18.04698 -1 1 0 + 1898 633 2 0.4236 29.55779 4.00235 17.90097 -1 1 0 + 1899 633 2 0.4236 30.81486 4.44085 18.84653 -1 1 0 + 1900 634 1 -0.8472 14.15940 29.63583 31.68196 0 0 0 + 1901 634 2 0.4236 14.48471 30.44261 31.18878 0 0 0 + 1902 634 2 0.4236 13.52149 29.12674 31.10417 0 0 0 + 1903 635 1 -0.8472 22.40695 9.51538 9.73709 0 -1 0 + 1904 635 2 0.4236 22.71650 9.39313 10.68006 0 -1 0 + 1905 635 2 0.4236 23.02803 9.03544 9.11755 0 -1 0 +1906 636 1 -0.8472 22.24110 0.13405 35.41380 0 1 0 +1907 636 2 0.4236 23.18710 0.36198 0.19705 0 1 1 +1908 636 2 0.4236 21.82447 35.13999 0.72551 0 0 1 + 1909 637 1 -0.8472 30.80385 21.12738 11.62863 -1 0 0 + 1910 637 2 0.4236 31.14074 21.74078 10.91430 -1 0 0 + 1911 637 2 0.4236 31.43501 20.35877 11.73284 -1 0 0 + 1912 638 1 -0.8472 27.44982 5.23264 21.39756 -1 1 0 + 1913 638 2 0.4236 27.87749 4.69834 22.12667 -1 1 0 + 1914 638 2 0.4236 26.53230 5.51030 21.68213 -1 1 0 + 1915 639 1 -0.8472 34.24917 16.52998 18.53573 0 0 0 + 1916 639 2 0.4236 34.91854 15.83455 18.79704 0 0 0 + 1917 639 2 0.4236 33.88140 16.31527 17.63097 0 0 0 + 1918 640 1 -0.8472 32.11496 35.09444 16.48223 -1 -1 0 + 1919 640 2 0.4236 32.98099 34.62132 16.64370 -1 -1 0 + 1920 640 2 0.4236 32.28074 0.41008 15.93745 -1 0 0 + 1921 641 1 -0.8472 5.27734 31.52491 24.59110 0 0 0 + 1922 641 2 0.4236 5.60492 31.56543 25.53502 0 0 0 + 1923 641 2 0.4236 5.51573 32.37348 24.11884 0 0 0 + 1924 642 1 -0.8472 25.79032 25.41548 18.29376 -1 -1 0 + 1925 642 2 0.4236 25.40295 24.65387 17.77428 -1 -1 0 + 1926 642 2 0.4236 25.77416 26.24352 17.73341 -1 -1 0 + 1927 643 1 -0.8472 4.93641 22.51784 12.73054 0 0 0 + 1928 643 2 0.4236 4.69948 21.69044 13.23963 0 0 0 + 1929 643 2 0.4236 4.10361 23.03009 12.52080 0 0 0 + 1930 644 1 -0.8472 12.37112 3.40763 24.86826 0 1 0 + 1931 644 2 0.4236 11.49379 3.77028 25.18239 0 1 0 + 1932 644 2 0.4236 12.78779 4.05198 24.22706 0 1 0 + 1933 645 1 -0.8472 17.22029 21.06776 32.22767 0 -1 0 + 1934 645 2 0.4236 16.31836 21.34141 31.89361 0 -1 0 + 1935 645 2 0.4236 17.79546 20.80136 31.45424 0 -1 0 + 1936 646 1 -0.8472 25.41708 13.65890 27.43081 -1 1 0 + 1937 646 2 0.4236 25.98599 14.41622 27.11030 -1 1 0 + 1938 646 2 0.4236 25.01532 13.18819 26.64533 -1 1 0 + 1939 647 1 -0.8472 31.34632 0.92208 6.09316 0 0 0 + 1940 647 2 0.4236 31.49282 0.15259 5.47156 0 0 0 + 1941 647 2 0.4236 31.97499 1.66376 5.85935 0 0 0 + 1942 648 1 -0.8472 14.52004 27.14727 0.85218 -1 0 0 + 1943 648 2 0.4236 15.06648 27.88580 0.45734 -1 0 0 + 1944 648 2 0.4236 14.58562 27.18061 1.84945 -1 0 0 + 1945 649 1 -0.8472 28.87721 13.80291 31.40189 -1 0 0 + 1946 649 2 0.4236 29.22211 14.24724 32.22866 -1 0 0 + 1947 649 2 0.4236 28.79132 14.47981 30.67082 -1 0 0 + 1948 650 1 -0.8472 32.29796 1.69820 30.01991 0 -1 0 + 1949 650 2 0.4236 33.00137 1.81610 29.31905 0 -1 0 + 1950 650 2 0.4236 31.71924 2.51309 30.05174 0 -1 0 + 1951 651 1 -0.8472 12.62275 8.26297 20.33641 0 0 0 + 1952 651 2 0.4236 12.83214 8.94927 21.03291 0 0 0 + 1953 651 2 0.4236 12.11033 8.68982 19.59131 0 0 0 + 1954 652 1 -0.8472 12.52870 25.44736 7.87215 0 -1 0 + 1955 652 2 0.4236 13.00949 24.63086 8.19160 0 -1 0 + 1956 652 2 0.4236 11.64782 25.18323 7.47946 0 -1 0 + 1957 653 1 -0.8472 1.10135 20.97932 6.76187 1 1 0 + 1958 653 2 0.4236 0.77452 21.00470 5.81713 1 1 0 + 1959 653 2 0.4236 1.95251 21.49916 6.83418 1 1 0 + 1960 654 1 -0.8472 4.82318 0.41033 13.47479 0 0 0 + 1961 654 2 0.4236 4.34431 1.17994 13.05247 0 0 0 + 1962 654 2 0.4236 4.19429 35.42103 14.07377 0 -1 0 + 1963 655 1 -0.8472 7.97884 14.32280 18.39989 0 0 0 + 1964 655 2 0.4236 7.48239 15.14935 18.66497 0 0 0 + 1965 655 2 0.4236 7.76346 13.58505 19.03968 0 0 0 + 1966 656 1 -0.8472 6.57446 10.83838 7.69395 0 0 0 + 1967 656 2 0.4236 6.93009 11.43436 6.97402 0 0 0 + 1968 656 2 0.4236 5.66493 11.15166 7.96697 0 0 0 + 1969 657 1 -0.8472 10.13095 13.45872 3.45462 0 1 0 + 1970 657 2 0.4236 9.41951 14.02714 3.04153 0 1 0 + 1971 657 2 0.4236 11.02593 13.74220 3.11019 0 1 0 + 1972 658 1 -0.8472 2.78532 23.98202 22.68429 0 0 0 + 1973 658 2 0.4236 2.24782 23.30083 22.18728 0 0 0 + 1974 658 2 0.4236 2.17788 24.69474 23.03500 0 0 0 + 1975 659 1 -0.8472 29.67656 34.06660 9.42289 -1 0 0 + 1976 659 2 0.4236 29.37866 33.82615 10.34668 -1 0 0 + 1977 659 2 0.4236 30.23579 34.89486 9.45798 -1 0 0 + 1978 660 1 -0.8472 17.39594 25.01387 9.24906 0 0 0 + 1979 660 2 0.4236 18.05931 25.56908 8.74748 0 0 0 + 1980 660 2 0.4236 17.77142 24.09901 9.39751 0 0 0 + 1981 661 1 -0.8472 11.13962 14.07290 32.95291 1 1 0 + 1982 661 2 0.4236 12.10521 14.31041 33.05855 1 1 0 + 1983 661 2 0.4236 10.89910 13.35632 33.60762 1 1 0 + 1984 662 1 -0.8472 1.12560 32.01749 27.53520 1 -1 0 + 1985 662 2 0.4236 0.50802 31.29622 27.84873 1 -1 0 + 1986 662 2 0.4236 1.59583 31.71636 26.70563 1 -1 0 + 1987 663 1 -0.8472 16.74625 10.36654 22.14588 0 0 0 + 1988 663 2 0.4236 17.64843 10.13447 22.50939 0 0 0 + 1989 663 2 0.4236 16.51070 11.30163 22.41056 0 0 0 + 1990 664 1 -0.8472 3.76112 22.81755 16.72253 1 0 0 + 1991 664 2 0.4236 3.76488 23.70592 16.26345 1 0 0 + 1992 664 2 0.4236 4.62662 22.34826 16.54749 1 0 0 + 1993 665 1 -0.8472 1.85948 2.74210 27.37838 0 0 0 + 1994 665 2 0.4236 2.54531 2.13125 27.77390 0 0 0 + 1995 665 2 0.4236 2.26198 3.24393 26.61279 0 0 0 + 1996 666 1 -0.8472 20.97424 33.57107 23.88091 0 -1 0 + 1997 666 2 0.4236 21.63334 32.88392 24.18640 0 -1 0 + 1998 666 2 0.4236 21.20496 33.86510 22.95340 0 -1 0 + 1999 667 1 -0.8472 14.93408 6.66163 21.35215 0 0 0 + 2000 667 2 0.4236 14.05135 6.86086 20.92664 0 0 0 + 2001 667 2 0.4236 15.24543 5.75568 21.06534 0 0 0 + 2002 668 1 -0.8472 22.24661 5.18993 17.87120 -1 0 0 + 2003 668 2 0.4236 21.88334 5.84047 17.20429 -1 0 0 + 2004 668 2 0.4236 23.18057 5.45019 18.11607 -1 0 0 + 2005 669 1 -0.8472 31.73682 29.92390 26.37363 0 0 0 + 2006 669 2 0.4236 32.14422 30.71878 26.82324 0 0 0 + 2007 669 2 0.4236 32.44996 29.40671 25.90048 0 0 0 + 2008 670 1 -0.8472 24.55434 23.28950 6.20609 0 0 0 + 2009 670 2 0.4236 25.36818 22.74753 5.99656 0 0 0 + 2010 670 2 0.4236 24.07519 22.88402 6.98448 0 0 0 + 2011 671 1 -0.8472 25.88342 13.36903 31.69745 -1 0 0 + 2012 671 2 0.4236 26.03190 12.91533 30.81878 -1 0 0 + 2013 671 2 0.4236 26.76589 13.60848 32.10225 -1 0 0 + 2014 672 1 -0.8472 31.90813 10.22292 30.85168 0 0 0 + 2015 672 2 0.4236 32.27321 10.76208 31.61060 0 0 0 + 2016 672 2 0.4236 31.57679 9.34449 31.19597 0 0 0 + 2017 673 1 -0.8472 0.84917 3.66386 11.68793 1 0 0 + 2018 673 2 0.4236 0.87147 4.59942 12.04035 1 0 0 + 2019 673 2 0.4236 0.11734 3.57978 11.01167 1 0 0 + 2020 674 1 -0.8472 12.64903 6.40652 27.25223 0 0 0 + 2021 674 2 0.4236 11.96507 5.83731 26.79599 0 0 0 + 2022 674 2 0.4236 12.95893 7.12172 26.62584 0 0 0 + 2023 675 1 -0.8472 15.96014 7.99372 15.88506 0 0 0 + 2024 675 2 0.4236 16.82916 7.82162 15.42119 0 0 0 + 2025 675 2 0.4236 15.41302 8.63081 15.34222 0 0 0 + 2026 676 1 -0.8472 20.03414 20.13508 12.66468 0 0 0 + 2027 676 2 0.4236 19.40056 20.36704 11.92663 0 0 0 + 2028 676 2 0.4236 20.38174 19.20773 12.52643 0 0 0 + 2029 677 1 -0.8472 21.35607 6.30596 10.13191 -1 -1 0 + 2030 677 2 0.4236 20.87120 5.49454 10.45802 -1 -1 0 + 2031 677 2 0.4236 21.92900 6.66958 10.86641 -1 -1 0 + 2032 678 1 -0.8472 35.15594 30.93388 9.83376 0 -1 0 + 2033 678 2 0.4236 0.35036 31.64684 9.85606 1 -1 0 + 2034 678 2 0.4236 34.54498 31.04238 10.61796 0 -1 0 + 2035 679 1 -0.8472 4.27599 27.25591 26.29171 1 0 0 + 2036 679 2 0.4236 3.35993 27.34560 25.90091 1 0 0 + 2037 679 2 0.4236 4.46769 28.04433 26.87613 1 0 0 + 2038 680 1 -0.8472 26.85910 19.33388 13.68598 0 0 0 + 2039 680 2 0.4236 25.94374 19.21433 14.07041 0 0 0 + 2040 680 2 0.4236 27.54078 19.00690 14.34044 0 0 0 + 2041 681 1 -0.8472 7.83799 28.18316 15.83373 0 -1 0 + 2042 681 2 0.4236 7.63358 27.28088 16.21326 0 -1 0 + 2043 681 2 0.4236 7.00487 28.73603 15.82129 0 -1 0 + 2044 682 1 -0.8472 32.63008 18.22477 29.78603 0 1 0 + 2045 682 2 0.4236 32.15689 17.45297 29.36130 0 1 0 + 2046 682 2 0.4236 31.95728 18.87141 30.14536 0 1 0 + 2047 683 1 -0.8472 3.25697 33.47986 7.39333 -1 -1 0 + 2048 683 2 0.4236 4.14306 33.83996 7.68509 -1 -1 0 + 2049 683 2 0.4236 3.20470 33.49200 6.39478 -1 -1 0 + 2050 684 1 -0.8472 0.84315 3.50002 3.05779 0 0 0 + 2051 684 2 0.4236 1.36600 4.15296 3.60572 0 0 0 + 2052 684 2 0.4236 1.42306 2.71869 2.82722 0 0 0 + 2053 685 1 -0.8472 25.12323 15.97604 21.47090 0 0 0 + 2054 685 2 0.4236 25.42059 15.67903 22.37827 0 0 0 + 2055 685 2 0.4236 25.51738 16.87233 21.26778 0 0 0 + 2056 686 1 -0.8472 7.27468 22.21053 14.03439 0 0 0 + 2057 686 2 0.4236 7.89057 21.43472 13.89746 0 0 0 + 2058 686 2 0.4236 6.46872 22.10302 13.45235 0 0 0 + 2059 687 1 -0.8472 14.24074 9.72792 11.56220 0 1 0 + 2060 687 2 0.4236 13.63511 9.58057 10.78023 0 1 0 + 2061 687 2 0.4236 15.02007 10.28708 11.27947 0 1 0 + 2062 688 1 -0.8472 24.56624 34.10605 22.50422 0 -1 0 + 2063 688 2 0.4236 24.42631 33.16115 22.20827 0 -1 0 + 2064 688 2 0.4236 24.89936 34.11414 23.44704 0 -1 0 + 2065 689 1 -0.8472 28.18228 24.21568 14.50155 0 0 0 + 2066 689 2 0.4236 28.15529 24.11183 15.49574 0 0 0 + 2067 689 2 0.4236 27.53670 23.57749 14.08211 0 0 0 + 2068 690 1 -0.8472 35.20714 9.78963 0.99118 0 1 0 + 2069 690 2 0.4236 0.60853 10.16753 0.80907 1 1 0 + 2070 690 2 0.4236 34.53749 10.53157 1.02295 0 1 0 + 2071 691 1 -0.8472 0.69131 26.23859 12.99395 0 0 0 + 2072 691 2 0.4236 0.05017 25.81851 12.35175 0 0 0 + 2073 691 2 0.4236 0.86619 27.18423 12.71973 0 0 0 + 2074 692 1 -0.8472 22.19338 29.11773 4.33016 0 -1 0 + 2075 692 2 0.4236 21.46426 28.43597 4.38932 0 -1 0 + 2076 692 2 0.4236 22.95489 28.74612 3.79919 0 -1 0 + 2077 693 1 -0.8472 17.91530 28.67983 14.99455 0 0 0 + 2078 693 2 0.4236 18.48897 28.40781 15.76712 0 0 0 + 2079 693 2 0.4236 17.45208 27.87490 14.62374 0 0 0 + 2080 694 1 -0.8472 12.79492 25.77029 32.65179 0 -1 0 + 2081 694 2 0.4236 13.77749 25.61527 32.75437 0 -1 0 + 2082 694 2 0.4236 12.43237 26.17705 33.49026 0 -1 0 + 2083 695 1 -0.8472 30.87195 23.17070 13.46252 0 0 0 + 2084 695 2 0.4236 30.40061 23.03858 14.33449 0 0 0 + 2085 695 2 0.4236 30.94240 22.29458 12.98565 0 0 0 + 2086 696 1 -0.8472 9.82436 23.58229 1.74889 0 0 0 + 2087 696 2 0.4236 9.70115 23.45578 0.76463 0 0 0 + 2088 696 2 0.4236 10.38990 22.84180 2.11190 0 0 0 + 2089 697 1 -0.8472 6.89586 12.57425 5.59544 1 0 0 + 2090 697 2 0.4236 7.86359 12.41511 5.40009 1 0 0 + 2091 697 2 0.4236 6.36472 12.47100 4.75450 1 0 0 + 2092 698 1 -0.8472 11.02525 29.55403 32.81425 0 -1 0 + 2093 698 2 0.4236 11.59857 29.39549 33.61808 0 -1 0 + 2094 698 2 0.4236 11.36970 29.01280 32.04720 0 -1 0 + 2095 699 1 -0.8472 5.68577 15.11644 32.97163 1 0 0 + 2096 699 2 0.4236 6.45292 15.18049 32.33340 1 0 0 + 2097 699 2 0.4236 5.80866 15.78136 33.70833 1 0 0 + 2098 700 1 -0.8472 21.25974 27.21306 29.97592 0 0 0 + 2099 700 2 0.4236 21.13419 27.98997 30.59288 0 0 0 + 2100 700 2 0.4236 20.57820 27.25410 29.24532 0 0 0 + 2101 701 1 -0.8472 6.19813 8.13823 30.34241 0 0 0 + 2102 701 2 0.4236 7.03828 7.67736 30.05652 0 0 0 + 2103 701 2 0.4236 5.46127 7.46723 30.42450 0 0 0 + 2104 702 1 -0.8472 32.41076 28.40941 3.86814 0 0 0 + 2105 702 2 0.4236 32.55062 27.42269 3.95038 0 0 0 + 2106 702 2 0.4236 31.47031 28.63505 4.12222 0 0 0 + 2107 703 1 -0.8472 23.98855 26.62099 31.21987 0 0 0 + 2108 703 2 0.4236 23.15933 26.86421 30.71666 0 0 0 + 2109 703 2 0.4236 24.52935 27.44488 31.38923 0 0 0 + 2110 704 1 -0.8472 28.50283 29.57437 20.87556 -1 -1 0 + 2111 704 2 0.4236 29.24214 30.01036 20.36250 -1 -1 0 + 2112 704 2 0.4236 28.88225 29.10445 21.67252 -1 -1 0 + 2113 705 1 -0.8472 27.75031 21.27721 18.51655 0 0 0 + 2114 705 2 0.4236 27.38498 20.41242 18.17211 0 0 0 + 2115 705 2 0.4236 28.28475 21.10710 19.34442 0 0 0 + 2116 706 1 -0.8472 25.25588 27.43813 16.54983 0 0 0 + 2117 706 2 0.4236 25.68771 28.33889 16.59591 0 0 0 + 2118 706 2 0.4236 24.90231 27.28485 15.62711 0 0 0 + 2119 707 1 -0.8472 5.61274 2.12353 21.50202 0 1 0 + 2120 707 2 0.4236 4.74517 2.16269 21.99775 0 1 0 + 2121 707 2 0.4236 5.95246 1.18305 21.49858 0 1 0 + 2122 708 1 -0.8472 14.20520 17.29851 2.13547 -1 0 0 + 2123 708 2 0.4236 13.44849 17.62485 1.56903 -1 0 0 + 2124 708 2 0.4236 13.92488 17.29329 3.09534 -1 0 0 +2125 709 1 -0.8472 33.77325 2.92399 0.15598 -1 1 0 +2126 709 2 0.4236 33.43998 3.50499 34.86065 -1 1 -1 +2127 709 2 0.4236 33.65455 3.39756 1.02868 -1 1 0 + 2128 710 1 -0.8472 0.22885 13.68528 9.35288 0 0 0 + 2129 710 2 0.4236 0.61334 13.37045 10.22063 0 0 0 + 2130 710 2 0.4236 0.84262 13.43233 8.60505 0 0 0 + 2131 711 1 -0.8472 27.57011 24.36623 33.91899 0 0 0 + 2132 711 2 0.4236 26.97918 24.28932 33.11594 0 0 0 + 2133 711 2 0.4236 28.12870 23.54099 34.00201 0 0 0 + 2134 712 1 -0.8472 18.75243 23.43472 22.11490 0 1 0 + 2135 712 2 0.4236 17.78133 23.32273 21.90431 0 1 0 + 2136 712 2 0.4236 19.23205 23.77381 21.30561 0 1 0 + 2137 713 1 -0.8472 2.29530 10.87158 1.51153 0 1 0 + 2138 713 2 0.4236 2.47430 11.79162 1.86004 0 1 0 + 2139 713 2 0.4236 2.87595 10.21436 1.99197 0 1 0 + 2140 714 1 -0.8472 25.61886 25.89666 25.24432 0 -1 0 + 2141 714 2 0.4236 25.42787 26.64268 25.88223 0 -1 0 + 2142 714 2 0.4236 26.42453 25.39162 25.55370 0 -1 0 + 2143 715 1 -0.8472 16.92797 16.32560 9.80037 0 0 0 + 2144 715 2 0.4236 17.19604 17.12020 9.25562 0 0 0 + 2145 715 2 0.4236 17.34592 16.38653 10.70675 0 0 0 + 2146 716 1 -0.8472 10.47600 17.21124 29.11058 1 0 0 + 2147 716 2 0.4236 10.94213 18.08313 29.26056 1 0 0 + 2148 716 2 0.4236 10.16834 16.84254 29.98769 1 0 0 + 2149 717 1 -0.8472 16.06644 22.36974 1.69499 0 0 0 + 2150 717 2 0.4236 15.33260 23.01429 1.48056 0 0 0 + 2151 717 2 0.4236 16.89555 22.63246 1.20148 0 0 0 + 2152 718 1 -0.8472 16.33203 27.58130 32.80553 0 0 0 + 2153 718 2 0.4236 15.97264 26.66864 32.61096 0 0 0 + 2154 718 2 0.4236 15.82791 28.25908 32.27034 0 0 0 + 2155 719 1 -0.8472 20.49220 11.62449 35.26514 1 0 0 + 2156 719 2 0.4236 21.01717 12.11061 34.56651 1 0 0 + 2157 719 2 0.4236 19.68795 11.20396 34.84526 1 0 0 + 2158 720 1 -0.8472 30.40952 26.57771 6.51282 0 0 0 + 2159 720 2 0.4236 30.16665 26.78894 5.56606 0 0 0 + 2160 720 2 0.4236 30.45919 25.58598 6.63086 0 0 0 + 2161 721 1 -0.8472 12.37322 16.37428 4.55968 0 0 0 + 2162 721 2 0.4236 12.38784 15.52288 4.03541 0 0 0 + 2163 721 2 0.4236 11.65794 16.97416 4.20127 0 0 0 + 2164 722 1 -0.8472 33.56731 11.90620 21.46299 0 0 0 + 2165 722 2 0.4236 33.23117 12.42030 20.67390 0 0 0 + 2166 722 2 0.4236 32.79888 11.66522 22.05577 0 0 0 + 2167 723 1 -0.8472 31.32107 31.23181 1.41386 -1 0 0 + 2168 723 2 0.4236 31.49725 32.04700 0.86213 -1 0 0 + 2169 723 2 0.4236 31.18817 30.44435 0.81204 -1 0 0 + 2170 724 1 -0.8472 28.87096 19.63852 21.22815 -1 -1 0 + 2171 724 2 0.4236 29.73458 19.39950 20.78432 -1 -1 0 + 2172 724 2 0.4236 28.83879 20.62564 21.38486 -1 -1 0 + 2173 725 1 -0.8472 4.42927 23.17098 25.42492 -1 -1 0 + 2174 725 2 0.4236 3.94617 23.57806 24.64980 -1 -1 0 + 2175 725 2 0.4236 4.55071 22.19140 25.26477 -1 -1 0 + 2176 726 1 -0.8472 24.00712 31.99530 18.18540 -1 0 0 + 2177 726 2 0.4236 23.23226 32.61152 18.04443 -1 0 0 + 2178 726 2 0.4236 23.69523 31.04742 18.12064 -1 0 0 + 2179 727 1 -0.8472 12.59691 11.80803 19.01561 -1 0 0 + 2180 727 2 0.4236 12.20861 11.09527 18.43151 -1 0 0 + 2181 727 2 0.4236 12.30089 12.70527 18.68815 -1 0 0 + 2182 728 1 -0.8472 22.60920 27.90494 24.82016 0 0 0 + 2183 728 2 0.4236 23.54351 28.24544 24.92530 0 0 0 + 2184 728 2 0.4236 22.02693 28.30545 25.52765 0 0 0 + 2185 729 1 -0.8472 10.89154 10.44597 15.79910 0 1 0 + 2186 729 2 0.4236 10.71507 9.97810 14.93312 0 1 0 + 2187 729 2 0.4236 10.96922 11.42960 15.63671 0 1 0 + 2188 730 1 -0.8472 0.08050 9.79588 3.64393 1 0 0 + 2189 730 2 0.4236 35.44276 8.87250 3.99968 0 0 0 + 2190 730 2 0.4236 0.10043 9.76868 2.64453 1 0 0 + 2191 731 1 -0.8472 30.40682 21.08786 5.84641 -1 -1 0 + 2192 731 2 0.4236 30.08055 20.97761 4.90763 -1 -1 0 + 2193 731 2 0.4236 30.13488 20.29333 6.38928 -1 -1 0 + 2194 732 1 -0.8472 24.67265 21.92020 3.54047 0 0 0 + 2195 732 2 0.4236 24.31684 22.84343 3.39577 0 0 0 + 2196 732 2 0.4236 25.25080 21.90987 4.35632 0 0 0 + 2197 733 1 -0.8472 28.95227 32.91147 27.71368 -1 0 0 + 2198 733 2 0.4236 28.22236 33.56969 27.89800 -1 0 0 + 2199 733 2 0.4236 29.69126 33.36429 27.21490 -1 0 0 + 2200 734 1 -0.8472 30.03521 19.21092 7.99937 0 0 0 + 2201 734 2 0.4236 30.75755 18.74029 8.50601 0 0 0 + 2202 734 2 0.4236 29.43004 19.68149 8.64149 0 0 0 + 2203 735 1 -0.8472 24.49927 23.34755 12.15367 0 -1 0 + 2204 735 2 0.4236 25.45013 23.07497 12.30028 0 -1 0 + 2205 735 2 0.4236 23.95535 22.54362 11.91337 0 -1 0 + 2206 736 1 -0.8472 31.77314 12.63461 3.01134 -1 0 0 + 2207 736 2 0.4236 30.89137 12.32301 3.36544 -1 0 0 + 2208 736 2 0.4236 32.34380 12.95066 3.76924 -1 0 0 + 2209 737 1 -0.8472 14.30093 32.51323 11.88551 1 -1 0 + 2210 737 2 0.4236 15.16348 32.00737 11.89441 1 -1 0 + 2211 737 2 0.4236 14.04074 32.74491 12.82281 1 -1 0 + 2212 738 1 -0.8472 29.43476 16.82816 3.26399 1 0 0 + 2213 738 2 0.4236 29.24168 16.37876 4.13619 1 0 0 + 2214 738 2 0.4236 30.06337 17.59119 3.41419 1 0 0 + 2215 739 1 -0.8472 16.93347 0.29074 22.92551 0 1 0 + 2216 739 2 0.4236 17.69583 35.41574 23.44835 0 0 0 + 2217 739 2 0.4236 16.96046 35.44329 21.99062 0 0 0 + 2218 740 1 -0.8472 6.39175 32.07466 27.11458 1 -1 0 + 2219 740 2 0.4236 7.22451 31.66481 27.48671 1 -1 0 + 2220 740 2 0.4236 6.37538 33.05084 27.33071 1 -1 0 + 2221 741 1 -0.8472 11.38564 0.73620 34.44418 0 0 0 + 2222 741 2 0.4236 11.86164 0.78090 33.56590 0 0 0 + 2223 741 2 0.4236 11.33377 35.29097 34.74711 0 -1 0 + 2224 742 1 -0.8472 5.62992 0.95326 5.96640 0 1 0 + 2225 742 2 0.4236 5.57858 0.12421 6.52317 0 1 0 + 2226 742 2 0.4236 6.52933 1.00840 5.53281 0 1 0 + 2227 743 1 -0.8472 31.44385 14.78115 9.52565 0 1 0 + 2228 743 2 0.4236 30.75006 15.29894 9.02517 0 1 0 + 2229 743 2 0.4236 32.31791 15.26482 9.48035 0 1 0 + 2230 744 1 -0.8472 4.14274 30.46651 11.25686 0 0 0 + 2231 744 2 0.4236 4.24418 30.27143 10.28139 0 0 0 + 2232 744 2 0.4236 4.98630 30.87589 11.60442 0 0 0 + 2233 745 1 -0.8472 2.68589 19.08193 19.35984 1 0 0 + 2234 745 2 0.4236 2.25025 18.54564 20.08271 1 0 0 + 2235 745 2 0.4236 2.89396 19.99767 19.70350 1 0 0 + 2236 746 1 -0.8472 22.93007 35.32345 10.64943 0 0 0 + 2237 746 2 0.4236 23.02317 34.50265 11.21294 0 0 0 + 2238 746 2 0.4236 23.03648 35.07981 9.68545 0 0 0 + 2239 747 1 -0.8472 21.42354 28.83286 27.26180 0 0 0 + 2240 747 2 0.4236 20.97399 28.00329 27.59298 0 0 0 + 2241 747 2 0.4236 20.73823 29.54462 27.10803 0 0 0 + 2242 748 1 -0.8472 30.85260 9.03928 4.70154 0 0 0 + 2243 748 2 0.4236 30.83123 8.53171 5.56283 0 0 0 + 2244 748 2 0.4236 30.48394 9.95743 4.84645 0 0 0 + 2245 749 1 -0.8472 4.86366 22.69509 4.35096 0 -1 0 + 2246 749 2 0.4236 5.74042 23.17460 4.38730 0 -1 0 + 2247 749 2 0.4236 4.79229 22.19598 3.48739 0 -1 0 + 2248 750 1 -0.8472 13.44540 3.46032 20.90541 0 0 0 + 2249 750 2 0.4236 14.28871 3.71208 20.43070 0 0 0 + 2250 750 2 0.4236 13.34566 2.46533 20.90261 0 0 0 + 2251 751 1 -0.8472 22.15447 21.84585 0.90028 -1 0 0 + 2252 751 2 0.4236 22.49980 20.96721 1.22992 -1 0 0 + 2253 751 2 0.4236 21.26628 22.03343 1.31967 -1 0 0 + 2254 752 1 -0.8472 9.81374 3.47732 1.49633 0 1 0 + 2255 752 2 0.4236 9.63291 4.24716 2.10834 0 1 0 + 2256 752 2 0.4236 9.29957 2.67749 1.80593 0 1 0 + 2257 753 1 -0.8472 25.04380 17.85028 9.69757 -1 0 0 + 2258 753 2 0.4236 24.14790 17.88394 10.14048 -1 0 0 + 2259 753 2 0.4236 25.75957 17.81957 10.39524 -1 0 0 + 2260 754 1 -0.8472 15.18930 12.41305 1.71036 0 1 0 + 2261 754 2 0.4236 15.50981 12.43400 0.76337 0 1 0 + 2262 754 2 0.4236 15.83054 11.88561 2.26766 0 1 0 + 2263 755 1 -0.8472 10.82347 25.41843 22.69425 0 0 0 + 2264 755 2 0.4236 11.56321 24.78166 22.91154 0 0 0 + 2265 755 2 0.4236 11.20822 26.31734 22.48472 0 0 0 + 2266 756 1 -0.8472 18.25594 35.02978 9.19947 1 -1 0 + 2267 756 2 0.4236 17.84180 0.07856 8.47819 1 0 0 + 2268 756 2 0.4236 17.96845 35.37547 10.09267 1 -1 0 + 2269 757 1 -0.8472 20.67847 9.17313 32.72868 -2 0 0 + 2270 757 2 0.4236 21.62862 9.12414 33.03650 -2 0 0 + 2271 757 2 0.4236 20.51184 8.46081 32.04696 -2 0 0 + 2272 758 1 -0.8472 26.32004 10.13277 13.93904 0 0 0 + 2273 758 2 0.4236 26.65302 10.03054 14.87641 0 0 0 + 2274 758 2 0.4236 25.48681 10.68560 13.93975 0 0 0 + 2275 759 1 -0.8472 0.82292 13.72334 27.81676 0 0 0 + 2276 759 2 0.4236 0.81930 14.69363 27.57501 0 0 0 + 2277 759 2 0.4236 1.36210 13.21465 27.14560 0 0 0 +2278 760 1 -0.8472 21.84848 3.72532 35.36222 1 0 0 +2279 760 2 0.4236 22.80203 3.91962 0.14510 1 0 1 +2280 760 2 0.4236 21.26702 3.93165 0.70199 1 0 1 + 2281 761 1 -0.8472 19.27100 12.35007 7.94418 0 -1 0 + 2282 761 2 0.4236 18.53371 12.99546 7.74468 0 -1 0 + 2283 761 2 0.4236 20.09563 12.62243 7.44845 0 -1 0 + 2284 762 1 -0.8472 29.80637 8.79620 16.49510 0 1 0 + 2285 762 2 0.4236 30.54579 9.39281 16.80697 0 1 0 + 2286 762 2 0.4236 30.06674 7.84108 16.63611 0 1 0 + 2287 763 1 -0.8472 27.86105 15.07042 19.60552 0 0 0 + 2288 763 2 0.4236 28.38626 14.86999 20.43254 0 0 0 + 2289 763 2 0.4236 28.25993 15.86093 19.14081 0 0 0 + 2290 764 1 -0.8472 23.97935 35.02669 31.20469 0 1 0 + 2291 764 2 0.4236 23.16342 35.21431 31.75150 0 1 0 + 2292 764 2 0.4236 24.57607 34.39674 31.70170 0 1 0 + 2293 765 1 -0.8472 28.68895 19.26482 23.89389 0 -1 0 + 2294 765 2 0.4236 29.47375 19.59056 24.42113 0 -1 0 + 2295 765 2 0.4236 28.75893 19.59595 22.95294 0 -1 0 + 2296 766 1 -0.8472 20.73446 23.11914 11.38748 0 -1 0 + 2297 766 2 0.4236 21.15404 22.50279 12.05385 0 -1 0 + 2298 766 2 0.4236 21.43213 23.44527 10.74964 0 -1 0 + 2299 767 1 -0.8472 5.76836 34.80673 20.93589 0 0 0 + 2300 767 2 0.4236 5.91884 33.81987 20.87765 0 0 0 + 2301 767 2 0.4236 5.27138 35.11652 20.12532 0 0 0 + 2302 768 1 -0.8472 26.37188 21.61587 25.72215 -1 -1 0 + 2303 768 2 0.4236 25.53695 21.94736 25.28292 -1 -1 0 + 2304 768 2 0.4236 27.13913 21.70066 25.08644 -1 -1 0 + 2305 769 1 -0.8472 31.19538 33.50312 26.15716 0 0 0 + 2306 769 2 0.4236 31.15565 33.39063 25.16432 0 0 0 + 2307 769 2 0.4236 31.86528 32.86595 26.53821 0 0 0 + 2308 770 1 -0.8472 24.54540 11.80739 22.67823 1 1 0 + 2309 770 2 0.4236 24.18408 12.50615 22.06088 1 1 0 + 2310 770 2 0.4236 25.52601 11.69543 22.51747 1 1 0 + 2311 771 1 -0.8472 5.96311 23.16757 33.38140 1 0 0 + 2312 771 2 0.4236 5.33976 22.81627 34.07999 1 0 0 + 2313 771 2 0.4236 5.56640 23.98337 32.96057 1 0 0 + 2314 772 1 -0.8472 5.02457 11.13969 19.20624 1 0 0 + 2315 772 2 0.4236 5.60681 10.82397 19.95542 1 0 0 + 2316 772 2 0.4236 5.03100 12.13923 19.17757 1 0 0 + 2317 773 1 -0.8472 22.69289 11.29961 3.68803 0 0 0 + 2318 773 2 0.4236 23.22768 12.13699 3.57506 0 0 0 + 2319 773 2 0.4236 22.64169 10.81637 2.81407 0 0 0 + 2320 774 1 -0.8472 13.27284 8.09133 34.27364 0 0 0 + 2321 774 2 0.4236 12.32436 7.84246 34.46966 0 0 0 + 2322 774 2 0.4236 13.86320 7.29815 34.42305 0 0 0 + 2323 775 1 -0.8472 34.14695 7.38518 17.29660 -1 1 0 + 2324 775 2 0.4236 34.85981 8.00973 17.61548 -1 1 0 + 2325 775 2 0.4236 33.33691 7.48069 17.87510 -1 1 0 + 2326 776 1 -0.8472 20.66778 14.07322 27.68068 0 1 0 + 2327 776 2 0.4236 20.85554 15.05125 27.77103 0 1 0 + 2328 776 2 0.4236 20.64343 13.65329 28.58788 0 1 0 + 2329 777 1 -0.8472 6.84647 29.77811 7.44210 0 0 0 + 2330 777 2 0.4236 7.16340 29.12266 8.12754 0 0 0 + 2331 777 2 0.4236 5.94607 30.12533 7.70416 0 0 0 + 2332 778 1 -0.8472 8.84506 31.91105 32.16028 -1 -1 0 + 2333 778 2 0.4236 8.21208 31.23536 32.53808 -1 -1 0 + 2334 778 2 0.4236 9.75068 31.49931 32.05904 -1 -1 0 + 2335 779 1 -0.8472 12.70318 30.32629 23.63215 0 0 0 + 2336 779 2 0.4236 12.38699 31.24576 23.39851 0 0 0 + 2337 779 2 0.4236 12.43482 30.11006 24.57086 0 0 0 + 2338 780 1 -0.8472 21.76695 30.45784 17.16849 0 -1 0 + 2339 780 2 0.4236 21.33757 30.08595 16.34555 0 -1 0 + 2340 780 2 0.4236 21.12138 31.05996 17.63824 0 -1 0 + 2341 781 1 -0.8472 6.27827 16.18969 19.53970 0 0 0 + 2342 781 2 0.4236 6.77621 16.02181 20.39046 0 0 0 + 2343 781 2 0.4236 6.29187 17.16799 19.33300 0 0 0 + 2344 782 1 -0.8472 6.99905 25.03102 14.18710 0 0 0 + 2345 782 2 0.4236 6.99187 24.03133 14.21039 0 0 0 + 2346 782 2 0.4236 7.78825 25.34645 13.66025 0 0 0 +2347 783 1 -0.8472 7.49439 35.07402 0.57318 0 0 0 +2348 783 2 0.4236 7.72721 0.45223 0.97731 0 1 0 +2349 783 2 0.4236 8.10031 34.89152 35.24610 0 0 -1 + 2350 784 1 -0.8472 6.48253 7.54091 20.64983 0 1 0 + 2351 784 2 0.4236 7.23016 7.35251 21.28664 0 1 0 + 2352 784 2 0.4236 6.65080 7.06313 19.78761 0 1 0 + 2353 785 1 -0.8472 5.23939 9.16145 11.08029 1 1 0 + 2354 785 2 0.4236 5.14998 8.69820 10.19862 1 1 0 + 2355 785 2 0.4236 4.33355 9.42580 11.41127 1 1 0 + 2356 786 1 -0.8472 27.63885 15.53268 26.97939 0 0 0 + 2357 786 2 0.4236 27.56862 16.48567 26.68472 0 0 0 + 2358 786 2 0.4236 28.27637 15.46727 27.74700 0 0 0 + 2359 787 1 -0.8472 22.84734 5.25153 24.86060 0 0 0 + 2360 787 2 0.4236 23.62550 4.64803 25.03440 0 0 0 + 2361 787 2 0.4236 23.14815 6.20420 24.90392 0 0 0 + 2362 788 1 -0.8472 29.52317 32.08275 3.42793 0 -1 0 + 2363 788 2 0.4236 29.87256 31.71462 4.28953 0 -1 0 + 2364 788 2 0.4236 30.13165 31.81621 2.68048 0 -1 0 + 2365 789 1 -0.8472 13.98726 20.89644 14.31914 0 1 0 + 2366 789 2 0.4236 13.71170 20.12407 14.89136 0 1 0 + 2367 789 2 0.4236 13.20414 21.20918 13.78169 0 1 0 + 2368 790 1 -0.8472 8.36348 15.60252 2.20981 0 1 0 + 2369 790 2 0.4236 7.40456 15.45392 2.45137 0 1 0 + 2370 790 2 0.4236 8.45076 15.66632 1.21569 0 1 0 + 2371 791 1 -0.8472 25.80754 29.12614 29.49717 0 0 0 + 2372 791 2 0.4236 25.48585 29.29941 30.42801 0 0 0 + 2373 791 2 0.4236 25.18000 29.54909 28.84349 0 0 0 + 2374 792 1 -0.8472 21.33150 23.75797 26.23790 0 0 0 + 2375 792 2 0.4236 21.19338 22.77082 26.31770 0 0 0 + 2376 792 2 0.4236 21.28495 24.17549 27.14535 0 0 0 + 2377 793 1 -0.8472 32.60946 28.83470 33.79971 0 0 0 + 2378 793 2 0.4236 32.86613 29.79320 33.67597 0 0 0 + 2379 793 2 0.4236 32.61258 28.61201 34.77458 0 0 0 + 2380 794 1 -0.8472 22.06496 16.58778 18.19685 0 0 0 + 2381 794 2 0.4236 22.26078 17.03822 19.06789 0 0 0 + 2382 794 2 0.4236 21.18163 16.90294 17.84982 0 0 0 + 2383 795 1 -0.8472 10.53697 29.73866 12.93262 1 1 0 + 2384 795 2 0.4236 9.76680 30.37241 13.00426 1 1 0 + 2385 795 2 0.4236 11.31257 30.20358 12.50573 1 1 0 + 2386 796 1 -0.8472 17.15354 2.96270 22.81665 0 0 0 + 2387 796 2 0.4236 16.88895 2.00964 22.96368 0 0 0 + 2388 796 2 0.4236 17.93997 3.18308 23.39362 0 0 0 + 2389 797 1 -0.8472 2.84564 16.62453 31.83402 1 0 0 + 2390 797 2 0.4236 3.10323 15.76698 31.38884 1 0 0 + 2391 797 2 0.4236 2.03370 16.47622 32.39859 1 0 0 + 2392 798 1 -0.8472 17.81943 7.51506 34.65704 0 0 0 + 2393 798 2 0.4236 18.80665 7.65481 34.73307 0 0 0 + 2394 798 2 0.4236 17.61228 6.54140 34.75225 0 0 0 + 2395 799 1 -0.8472 26.88072 17.82532 18.49156 0 -1 0 + 2396 799 2 0.4236 26.11776 17.51858 17.92256 0 -1 0 + 2397 799 2 0.4236 26.52995 18.16675 19.36352 0 -1 0 + 2398 800 1 -0.8472 19.05473 1.05564 26.01912 0 1 0 + 2399 800 2 0.4236 18.53612 0.26365 25.69698 0 1 0 + 2400 800 2 0.4236 20.00776 0.97065 25.72849 0 1 0 + 2401 801 1 -0.8472 1.10090 34.65231 5.12389 1 -2 0 + 2402 801 2 0.4236 0.88676 33.69453 4.93230 1 -2 0 + 2403 801 2 0.4236 0.81296 34.87856 6.05441 1 -2 0 + 2404 802 1 -0.8472 3.04415 35.09483 27.41560 -1 -1 0 + 2405 802 2 0.4236 2.04659 35.14369 27.46491 -1 -1 0 + 2406 802 2 0.4236 3.36467 34.29913 27.92952 -1 -1 0 + 2407 803 1 -0.8472 14.12553 17.71309 32.84625 1 0 0 + 2408 803 2 0.4236 14.80874 18.03250 33.50288 1 0 0 + 2409 803 2 0.4236 13.54887 18.48031 32.56548 1 0 0 + 2410 804 1 -0.8472 20.95283 30.69605 2.27698 1 -1 0 + 2411 804 2 0.4236 21.43674 31.56358 2.16209 1 -1 0 + 2412 804 2 0.4236 21.10945 30.34685 3.20082 1 -1 0 + 2413 805 1 -0.8472 16.96120 25.71167 27.42127 0 -1 0 + 2414 805 2 0.4236 17.00892 25.25432 26.53332 0 -1 0 + 2415 805 2 0.4236 16.03527 26.05992 27.56749 0 -1 0 + 2416 806 1 -0.8472 28.75134 33.83258 11.82146 0 -1 0 + 2417 806 2 0.4236 29.41268 33.26260 12.30897 0 -1 0 + 2418 806 2 0.4236 28.26501 34.41222 12.47525 0 -1 0 + 2419 807 1 -0.8472 13.89605 4.37296 31.95128 0 0 0 + 2420 807 2 0.4236 13.85063 3.37752 32.03487 0 0 0 + 2421 807 2 0.4236 14.53001 4.61728 31.21756 0 0 0 + 2422 808 1 -0.8472 34.19027 7.66458 13.39454 -1 0 0 + 2423 808 2 0.4236 34.28081 6.84995 12.82167 -1 0 0 + 2424 808 2 0.4236 33.22886 7.79758 13.63529 -1 0 0 + 2425 809 1 -0.8472 2.17980 12.87336 21.40333 0 0 0 + 2426 809 2 0.4236 3.07992 12.88277 20.96786 0 0 0 + 2427 809 2 0.4236 2.18575 13.48520 22.19423 0 0 0 + 2428 810 1 -0.8472 20.02462 29.93870 11.78367 0 -1 0 + 2429 810 2 0.4236 20.80388 29.40100 12.10550 0 -1 0 + 2430 810 2 0.4236 19.99075 30.80637 12.27962 0 -1 0 + 2431 811 1 -0.8472 12.68365 8.32013 9.63150 0 0 0 + 2432 811 2 0.4236 12.61121 7.49198 10.18724 0 0 0 + 2433 811 2 0.4236 11.84641 8.43744 9.09745 0 0 0 + 2434 812 1 -0.8472 27.17613 35.07242 17.94308 0 0 0 + 2435 812 2 0.4236 26.68391 0.14013 17.28876 0 1 0 + 2436 812 2 0.4236 26.95688 35.36482 18.87389 0 0 0 + 2437 813 1 -0.8472 23.49934 14.01290 4.26713 0 0 0 + 2438 813 2 0.4236 22.61339 14.33182 3.93045 0 0 0 + 2439 813 2 0.4236 23.75755 14.53985 5.07682 0 0 0 + 2440 814 1 -0.8472 13.20350 1.23982 26.22755 0 0 0 + 2441 814 2 0.4236 12.88408 2.07198 25.77429 0 0 0 + 2442 814 2 0.4236 13.26656 1.40003 27.21260 0 0 0 + 2443 815 1 -0.8472 32.61237 13.56172 31.22696 0 0 0 + 2444 815 2 0.4236 32.55463 13.01499 32.06223 0 0 0 + 2445 815 2 0.4236 32.54636 14.53225 31.45860 0 0 0 + 2446 816 1 -0.8472 11.36654 10.00604 5.49041 1 0 0 + 2447 816 2 0.4236 11.88120 10.10621 6.34193 1 0 0 + 2448 816 2 0.4236 12.00022 9.99830 4.71689 1 0 0 + 2449 817 1 -0.8472 9.17409 26.41828 20.72491 0 0 0 + 2450 817 2 0.4236 8.20218 26.24356 20.56724 0 0 0 + 2451 817 2 0.4236 9.53423 25.74235 21.36785 0 0 0 + 2452 818 1 -0.8472 9.87087 7.59502 4.03917 0 0 0 + 2453 818 2 0.4236 10.28509 7.32965 4.90975 0 0 0 + 2454 818 2 0.4236 10.18784 8.50846 3.78395 0 0 0 + 2455 819 1 -0.8472 20.55690 13.77494 1.37155 0 0 0 + 2456 819 2 0.4236 20.36111 13.42403 2.28724 0 0 0 + 2457 819 2 0.4236 20.65014 13.00995 0.73433 0 0 0 + 2458 820 1 -0.8472 29.93334 5.93329 4.91025 -1 1 0 + 2459 820 2 0.4236 30.44733 6.10116 5.75141 -1 1 0 + 2460 820 2 0.4236 30.43331 5.28306 4.33824 -1 1 0 + 2461 821 1 -0.8472 12.51631 1.74380 15.29578 -1 0 0 + 2462 821 2 0.4236 12.90148 1.92344 14.39060 -1 0 0 + 2463 821 2 0.4236 12.50505 2.59074 15.82728 -1 0 0 + 2464 822 1 -0.8472 11.62432 32.78866 23.82742 0 0 0 + 2465 822 2 0.4236 11.47814 32.97102 24.79971 0 0 0 + 2466 822 2 0.4236 10.99539 33.34611 23.28551 0 0 0 + 2467 823 1 -0.8472 13.06353 18.99199 9.84223 -1 0 0 + 2468 823 2 0.4236 12.64747 18.31758 9.23233 -1 0 0 + 2469 823 2 0.4236 13.60323 19.63963 9.30439 -1 0 0 + 2470 824 1 -0.8472 26.02381 14.57180 17.78265 0 0 0 + 2471 824 2 0.4236 25.40295 15.34481 17.91249 0 0 0 + 2472 824 2 0.4236 26.77232 14.63089 18.44310 0 0 0 + 2473 825 1 -0.8472 27.04004 16.24973 1.75746 -1 0 0 + 2474 825 2 0.4236 28.02140 16.29883 1.94311 -1 0 0 + 2475 825 2 0.4236 26.63441 15.51478 2.30084 -1 0 0 + 2476 826 1 -0.8472 15.35923 20.72413 28.72655 -1 0 0 + 2477 826 2 0.4236 15.98094 21.50730 28.71864 -1 0 0 + 2478 826 2 0.4236 15.88483 19.88260 28.85111 -1 0 0 + 2479 827 1 -0.8472 27.10562 30.50244 27.42418 0 0 0 + 2480 827 2 0.4236 26.85157 29.94872 28.21717 0 0 0 + 2481 827 2 0.4236 27.49967 31.36762 27.73431 0 0 0 + 2482 828 1 -0.8472 24.97410 18.81542 24.69035 0 -1 0 + 2483 828 2 0.4236 25.47556 19.25392 23.94454 0 -1 0 + 2484 828 2 0.4236 24.12908 19.32039 24.86599 0 -1 0 + 2485 829 1 -0.8472 29.23419 2.55747 15.47922 0 0 0 + 2486 829 2 0.4236 29.50393 2.04589 14.66347 0 0 0 + 2487 829 2 0.4236 28.38704 3.05587 15.29511 0 0 0 + 2488 830 1 -0.8472 23.98692 0.63026 13.05063 0 0 0 + 2489 830 2 0.4236 23.91026 1.62337 13.13896 0 0 0 + 2490 830 2 0.4236 23.75542 0.35991 12.11613 0 0 0 + 2491 831 1 -0.8472 30.84135 19.97655 31.52762 0 0 0 + 2492 831 2 0.4236 30.18071 19.23471 31.64257 0 0 0 + 2493 831 2 0.4236 30.39532 20.75442 31.08499 0 0 0 + 2494 832 1 -0.8472 30.16396 12.12918 29.67611 -1 0 0 + 2495 832 2 0.4236 29.80722 12.67140 30.43684 -1 0 0 + 2496 832 2 0.4236 30.99193 11.65024 29.96762 -1 0 0 + 2497 833 1 -0.8472 8.49954 25.11534 30.47891 0 0 0 + 2498 833 2 0.4236 9.25949 24.81659 31.05610 0 0 0 + 2499 833 2 0.4236 7.88880 25.70294 31.00970 0 0 0 + 2500 834 1 -0.8472 5.73690 9.02997 5.59303 0 0 0 + 2501 834 2 0.4236 6.08273 9.73744 6.20935 0 0 0 + 2502 834 2 0.4236 6.03640 8.13191 5.91504 0 0 0 + 2503 835 1 -0.8472 11.25292 34.76600 31.52106 1 0 0 + 2504 835 2 0.4236 12.23126 34.77144 31.31419 1 0 0 + 2505 835 2 0.4236 10.93027 0.19972 31.63102 1 1 0 + 2506 836 1 -0.8472 5.35443 22.01777 22.23507 1 -1 0 + 2507 836 2 0.4236 5.97242 21.37209 22.68358 1 -1 0 + 2508 836 2 0.4236 5.66259 22.95233 22.41273 1 -1 0 + 2509 837 1 -0.8472 33.55893 31.09610 3.22281 -1 0 0 + 2510 837 2 0.4236 32.83407 31.61340 2.76793 -1 0 0 + 2511 837 2 0.4236 33.28962 30.13559 3.29237 -1 0 0 + 2512 838 1 -0.8472 25.96434 29.58289 7.26377 0 -1 0 + 2513 838 2 0.4236 26.80640 29.16573 6.92195 0 -1 0 + 2514 838 2 0.4236 25.58609 30.19055 6.56546 0 -1 0 + 2515 839 1 -0.8472 33.87842 4.18772 2.43606 0 1 0 + 2516 839 2 0.4236 34.84944 3.98367 2.55999 0 1 0 + 2517 839 2 0.4236 33.75503 5.17533 2.33928 0 1 0 + 2518 840 1 -0.8472 13.81168 25.37248 3.67048 1 0 0 + 2519 840 2 0.4236 13.19977 26.10015 3.36064 1 0 0 + 2520 840 2 0.4236 14.72533 25.52182 3.29245 1 0 0 + 2521 841 1 -0.8472 30.88527 4.38191 2.92050 0 0 0 + 2522 841 2 0.4236 31.38047 3.51892 2.82042 0 0 0 + 2523 841 2 0.4236 30.98738 4.92349 2.08611 0 0 0 + 2524 842 1 -0.8472 12.07003 4.99198 33.85700 1 0 0 + 2525 842 2 0.4236 12.07748 4.21318 34.48420 1 0 0 + 2526 842 2 0.4236 12.88025 4.95661 33.27201 1 0 0 + 2527 843 1 -0.8472 7.37641 22.99672 25.99460 1 -1 0 + 2528 843 2 0.4236 7.72980 23.93199 26.01296 1 -1 0 + 2529 843 2 0.4236 6.39689 23.01440 25.79418 1 -1 0 + 2530 844 1 -0.8472 6.26662 14.45666 7.69016 0 1 0 + 2531 844 2 0.4236 5.26733 14.49397 7.68828 0 1 0 + 2532 844 2 0.4236 6.57485 13.74763 7.05594 0 1 0 + 2533 845 1 -0.8472 3.42084 16.32571 24.55314 0 0 0 + 2534 845 2 0.4236 2.77021 15.56634 24.54874 0 0 0 + 2535 845 2 0.4236 2.94499 17.17090 24.79634 0 0 0 + 2536 846 1 -0.8472 11.89267 13.73016 11.88059 0 0 0 + 2537 846 2 0.4236 11.21898 14.10411 11.24321 0 0 0 + 2538 846 2 0.4236 12.59005 14.42090 12.07165 0 0 0 + 2539 847 1 -0.8472 27.08577 4.13603 19.06782 0 1 0 + 2540 847 2 0.4236 27.68203 3.34019 18.96269 0 1 0 + 2541 847 2 0.4236 27.28308 4.58777 19.93784 0 1 0 + 2542 848 1 -0.8472 27.82810 10.58117 29.11657 0 0 0 + 2543 848 2 0.4236 28.01188 9.59839 29.09796 0 0 0 + 2544 848 2 0.4236 28.69236 11.08086 29.17439 0 0 0 + 2545 849 1 -0.8472 8.69913 11.82528 17.33215 0 0 0 + 2546 849 2 0.4236 8.89196 12.79954 17.21553 0 0 0 + 2547 849 2 0.4236 9.43475 11.28818 16.91944 0 0 0 + 2548 850 1 -0.8472 13.42854 16.97612 24.95943 0 1 0 + 2549 850 2 0.4236 13.93336 16.42101 25.62045 0 1 0 + 2550 850 2 0.4236 13.65180 17.94136 25.09526 0 1 0 + 2551 851 1 -0.8472 25.29362 29.77548 23.45629 0 -1 0 + 2552 851 2 0.4236 24.94292 29.55669 24.36686 0 -1 0 + 2553 851 2 0.4236 26.16750 30.25393 23.54179 0 -1 0 + 2554 852 1 -0.8472 1.80838 18.66945 8.01684 1 0 0 + 2555 852 2 0.4236 2.29523 17.98783 7.47064 1 0 0 + 2556 852 2 0.4236 1.49659 19.41139 7.42331 1 0 0 + 2557 853 1 -0.8472 30.12316 27.61353 26.54594 -1 -1 0 + 2558 853 2 0.4236 30.51614 26.94751 25.91193 -1 -1 0 + 2559 853 2 0.4236 30.70632 28.42514 26.58050 -1 -1 0 + 2560 854 1 -0.8472 33.18420 17.88586 6.35292 0 0 0 + 2561 854 2 0.4236 32.56568 18.63391 6.59328 0 0 0 + 2562 854 2 0.4236 33.30133 17.28346 7.14243 0 0 0 + 2563 855 1 -0.8472 33.27243 21.18032 2.29436 0 0 0 + 2564 855 2 0.4236 32.31035 20.90983 2.25953 0 0 0 + 2565 855 2 0.4236 33.73202 20.88576 1.45652 0 0 0 + 2566 856 1 -0.8472 28.34333 11.53743 12.37493 0 1 0 + 2567 856 2 0.4236 28.30293 11.14523 11.45596 0 1 0 + 2568 856 2 0.4236 27.66669 11.08888 12.95882 0 1 0 + 2569 857 1 -0.8472 28.30112 2.64491 2.85806 -1 1 0 + 2570 857 2 0.4236 28.30594 2.79162 3.84719 -1 1 0 + 2571 857 2 0.4236 29.22236 2.40458 2.55225 -1 1 0 + 2572 858 1 -0.8472 31.44250 35.43431 23.40153 -1 -1 0 + 2573 858 2 0.4236 32.25162 0.43229 23.70308 -1 0 0 + 2574 858 2 0.4236 31.62859 34.45252 23.43896 -1 -1 0 + 2575 859 1 -0.8472 3.71364 28.20102 15.87903 0 0 0 + 2576 859 2 0.4236 3.95001 28.35402 14.91951 0 0 0 + 2577 859 2 0.4236 3.89064 27.24622 16.11777 0 0 0 + 2578 860 1 -0.8472 10.94561 21.02636 2.86138 0 0 0 + 2579 860 2 0.4236 11.66778 20.96916 3.55069 0 0 0 + 2580 860 2 0.4236 10.18759 20.42872 3.12244 0 0 0 + 2581 861 1 -0.8472 15.57876 3.78370 6.21942 0 1 0 + 2582 861 2 0.4236 15.75140 2.89661 6.64741 0 1 0 + 2583 861 2 0.4236 14.68720 3.76754 5.76683 0 1 0 + 2584 862 1 -0.8472 19.64502 9.99145 5.15182 0 0 0 + 2585 862 2 0.4236 18.87851 10.56189 5.44685 0 0 0 + 2586 862 2 0.4236 19.29720 9.19863 4.65138 0 0 0 + 2587 863 1 -0.8472 3.53630 21.94683 7.08827 0 0 0 + 2588 863 2 0.4236 4.17231 22.62170 7.46241 0 0 0 + 2589 863 2 0.4236 3.79062 21.73660 6.14431 0 0 0 + 2590 864 1 -0.8472 12.52461 18.64953 6.43774 0 0 0 + 2591 864 2 0.4236 11.73637 18.38334 5.88299 0 0 0 + 2592 864 2 0.4236 12.89090 19.51724 6.10174 0 0 0 + 2593 865 1 -0.8472 23.94939 18.89381 33.06954 0 0 0 + 2594 865 2 0.4236 23.54241 18.01770 32.81123 0 0 0 + 2595 865 2 0.4236 23.22403 19.55824 33.24918 0 0 0 + 2596 866 1 -0.8472 19.85103 7.44596 22.03046 0 -1 0 + 2597 866 2 0.4236 19.50211 7.83913 22.88113 0 -1 0 + 2598 866 2 0.4236 19.83736 8.14114 21.31177 0 -1 0 + 2599 867 1 -0.8472 8.00029 26.63881 28.43389 1 -1 0 + 2600 867 2 0.4236 8.46716 27.49058 28.67157 1 -1 0 + 2601 867 2 0.4236 8.28274 25.91654 29.06517 1 -1 0 + 2602 868 1 -0.8472 23.83651 15.01649 11.21893 0 0 0 + 2603 868 2 0.4236 24.18952 14.99547 10.28358 0 0 0 + 2604 868 2 0.4236 23.14282 15.73247 11.29737 0 0 0 + 2605 869 1 -0.8472 17.62223 5.88933 12.54249 1 1 0 + 2606 869 2 0.4236 16.72054 6.23868 12.28780 1 1 0 + 2607 869 2 0.4236 17.53364 5.28246 13.33229 1 1 0 + 2608 870 1 -0.8472 33.24750 25.79249 27.99548 0 0 0 + 2609 870 2 0.4236 33.02399 26.03127 28.94043 0 0 0 + 2610 870 2 0.4236 32.68988 25.01397 27.70758 0 0 0 + 2611 871 1 -0.8472 8.52461 32.10974 20.14850 0 0 0 + 2612 871 2 0.4236 8.91213 31.73007 19.30847 0 0 0 + 2613 871 2 0.4236 7.58494 31.78581 20.25814 0 0 0 + 2614 872 1 -0.8472 27.82661 4.07116 5.11535 0 -1 0 + 2615 872 2 0.4236 28.49917 4.80088 4.99256 0 -1 0 + 2616 872 2 0.4236 28.22481 3.34080 5.67031 0 -1 0 + 2617 873 1 -0.8472 26.08680 14.33409 3.52290 -1 0 0 + 2618 873 2 0.4236 26.16612 13.67463 2.77538 -1 0 0 + 2619 873 2 0.4236 25.17681 14.26645 3.93187 -1 0 0 + 2620 874 1 -0.8472 30.17457 24.55253 31.22908 -1 0 0 + 2621 874 2 0.4236 29.87880 25.19861 30.52549 -1 0 0 + 2622 874 2 0.4236 30.45908 25.05559 32.04511 -1 0 0 + 2623 875 1 -0.8472 4.56924 31.17933 8.22970 0 -1 0 + 2624 875 2 0.4236 4.00014 31.89659 7.82770 0 -1 0 + 2625 875 2 0.4236 5.22266 31.58961 8.86584 0 -1 0 + 2626 876 1 -0.8472 6.38127 30.89315 16.11454 0 -1 0 + 2627 876 2 0.4236 5.40346 30.80048 16.30241 0 -1 0 + 2628 876 2 0.4236 6.63635 31.85960 16.14485 0 -1 0 + 2629 877 1 -0.8472 22.93838 32.25635 1.41573 0 -1 0 + 2630 877 2 0.4236 22.70262 32.88541 2.15644 0 -1 0 + 2631 877 2 0.4236 23.77665 32.56766 0.96817 0 -1 0 + 2632 878 1 -0.8472 26.74043 10.53690 34.14043 -1 0 0 + 2633 878 2 0.4236 26.64411 9.85226 33.41798 -1 0 0 + 2634 878 2 0.4236 27.20748 10.12975 34.92534 -1 0 0 + 2635 879 1 -0.8472 18.12890 0.52898 11.63908 0 1 0 + 2636 879 2 0.4236 17.58828 0.63169 12.47404 0 1 0 + 2637 879 2 0.4236 19.06300 0.84452 11.80590 0 1 0 + 2638 880 1 -0.8472 5.04130 17.24838 27.16687 0 0 0 + 2639 880 2 0.4236 4.48860 16.56822 26.68532 0 0 0 + 2640 880 2 0.4236 4.46897 18.03161 27.40961 0 0 0 + 2641 881 1 -0.8472 0.20092 22.73060 26.74870 0 0 0 + 2642 881 2 0.4236 0.46042 22.50399 25.80995 0 0 0 + 2643 881 2 0.4236 0.98848 22.60455 27.35187 0 0 0 + 2644 882 1 -0.8472 6.67370 10.39196 29.12618 0 1 0 + 2645 882 2 0.4236 7.10798 10.06630 28.28636 0 1 0 + 2646 882 2 0.4236 6.37648 9.60922 29.67288 0 1 0 + 2647 883 1 -0.8472 0.36169 3.59757 32.96571 1 1 0 + 2648 883 2 0.4236 0.87742 2.77151 32.73853 1 1 0 + 2649 883 2 0.4236 0.63239 3.92065 33.87249 1 1 0 + 2650 884 1 -0.8472 1.12537 7.04733 21.97028 -1 0 0 + 2651 884 2 0.4236 2.12283 7.02656 21.90261 -1 0 0 + 2652 884 2 0.4236 0.84706 7.80508 22.56044 -1 0 0 + 2653 885 1 -0.8472 20.46699 7.96130 8.20960 0 -1 0 + 2654 885 2 0.4236 20.71352 7.27486 8.89370 0 -1 0 + 2655 885 2 0.4236 20.65472 8.87456 8.57113 0 -1 0 + 2656 886 1 -0.8472 29.22087 8.66749 31.72871 0 1 0 + 2657 886 2 0.4236 28.46753 8.10986 32.07723 0 1 0 + 2658 886 2 0.4236 29.02683 9.63362 31.89868 0 1 0 + 2659 887 1 -0.8472 13.34328 26.47105 19.29320 0 0 0 + 2660 887 2 0.4236 12.39942 26.30189 19.00962 0 0 0 + 2661 887 2 0.4236 13.35088 26.84291 20.22145 0 0 0 + 2662 888 1 -0.8472 19.94676 12.70619 3.72610 0 0 0 + 2663 888 2 0.4236 20.76802 12.44352 4.23254 0 0 0 + 2664 888 2 0.4236 19.38323 13.30678 4.29329 0 0 0 + 2665 889 1 -0.8472 35.23224 31.63158 19.72906 -1 -1 0 + 2666 889 2 0.4236 0.38022 30.95809 19.38525 0 -1 0 + 2667 889 2 0.4236 35.31912 31.70365 20.72264 -1 -1 0 + 2668 890 1 -0.8472 24.90334 4.71897 4.92216 0 -1 0 + 2669 890 2 0.4236 24.54870 3.87726 4.51512 0 -1 0 + 2670 890 2 0.4236 25.89173 4.63677 5.04984 0 -1 0 + 2671 891 1 -0.8472 24.35419 20.38980 21.39334 0 -1 0 + 2672 891 2 0.4236 24.59812 21.35941 21.41149 0 -1 0 + 2673 891 2 0.4236 23.71902 20.21994 20.63991 0 -1 0 + 2674 892 1 -0.8472 27.50765 1.34560 12.13095 0 0 0 + 2675 892 2 0.4236 26.89496 0.89751 11.48000 0 0 0 + 2676 892 2 0.4236 26.98383 1.98011 12.69924 0 0 0 + 2677 893 1 -0.8472 13.13728 1.89420 29.11232 0 0 0 + 2678 893 2 0.4236 13.11182 2.85364 28.83168 0 0 0 + 2679 893 2 0.4236 13.88348 1.75716 29.76377 0 0 0 + 2680 894 1 -0.8472 28.14563 3.96173 23.80406 0 1 0 + 2681 894 2 0.4236 28.04813 2.97062 23.89421 0 1 0 + 2682 894 2 0.4236 29.11641 4.20075 23.78439 0 1 0 + 2683 895 1 -0.8472 18.25055 23.22783 0.51719 -1 0 0 + 2684 895 2 0.4236 18.48570 24.19917 0.55041 -1 0 0 + 2685 895 2 0.4236 18.98979 22.69179 0.92475 -1 0 0 + 2686 896 1 -0.8472 2.51465 29.09014 7.57570 2 -1 0 + 2687 896 2 0.4236 3.10673 29.87007 7.37298 2 -1 0 + 2688 896 2 0.4236 3.07310 28.31692 7.87608 2 -1 0 + 2689 897 1 -0.8472 25.00737 10.68975 1.68683 0 0 0 + 2690 897 2 0.4236 25.28105 9.84935 2.15462 0 0 0 + 2691 897 2 0.4236 24.06560 10.59939 1.36302 0 0 0 + 2692 898 1 -0.8472 0.50800 31.98245 4.59059 0 0 0 + 2693 898 2 0.4236 0.60245 31.58627 5.50388 0 0 0 + 2694 898 2 0.4236 35.23000 31.56962 4.12765 -1 0 0 + 2695 899 1 -0.8472 8.51918 24.05662 22.25963 0 -1 0 + 2696 899 2 0.4236 7.69557 24.41189 22.70173 0 -1 0 + 2697 899 2 0.4236 9.32524 24.50158 22.64980 0 -1 0 + 2698 900 1 -0.8472 3.74077 33.15075 4.30432 1 -1 0 + 2699 900 2 0.4236 2.74523 33.05868 4.28503 1 -1 0 + 2700 900 2 0.4236 4.15286 32.43271 3.74351 1 -1 0 + 2701 901 1 -0.8472 23.70766 3.65644 13.10291 0 1 0 + 2702 901 2 0.4236 22.83186 3.36378 13.48663 0 1 0 + 2703 901 2 0.4236 23.72321 3.46266 12.12202 0 1 0 + 2704 902 1 -0.8472 24.60561 3.76942 1.43568 0 -1 0 + 2705 902 2 0.4236 25.48227 3.58820 0.99007 0 -1 0 + 2706 902 2 0.4236 24.42968 3.06545 2.12375 0 -1 0 + 2707 903 1 -0.8472 19.48773 29.49157 22.01061 0 1 0 + 2708 903 2 0.4236 19.53211 28.50847 22.18799 0 1 0 + 2709 903 2 0.4236 20.40113 29.88825 22.10143 0 1 0 + 2710 904 1 -0.8472 16.23080 13.90116 33.79773 0 0 0 + 2711 904 2 0.4236 16.83015 14.21028 33.05940 0 0 0 + 2712 904 2 0.4236 16.41710 14.43372 34.62333 0 0 0 + 2713 905 1 -0.8472 31.40149 4.69855 20.61956 0 0 0 + 2714 905 2 0.4236 31.88814 3.87342 20.90643 0 0 0 + 2715 905 2 0.4236 31.98877 5.49631 20.75600 0 0 0 + 2716 906 1 -0.8472 11.91330 27.21469 2.83126 0 -1 0 + 2717 906 2 0.4236 11.77039 27.62948 3.72986 0 -1 0 + 2718 906 2 0.4236 11.05540 27.22822 2.31766 0 -1 0 + 2719 907 1 -0.8472 22.29397 12.32624 33.31082 -1 -1 0 + 2720 907 2 0.4236 23.15046 12.43347 33.81570 -1 -1 0 + 2721 907 2 0.4236 22.45521 11.77078 32.49511 -1 -1 0 + 2722 908 1 -0.8472 34.76448 23.65326 16.89257 -1 0 0 + 2723 908 2 0.4236 35.51917 23.16707 16.45207 -1 0 0 + 2724 908 2 0.4236 33.92404 23.11964 16.79832 -1 0 0 + 2725 909 1 -0.8472 1.74181 27.58620 25.60166 0 0 0 + 2726 909 2 0.4236 1.15598 27.69342 26.40493 0 0 0 + 2727 909 2 0.4236 1.52271 28.29593 24.93217 0 0 0 + 2728 910 1 -0.8472 12.89470 5.60990 10.67435 0 0 0 + 2729 910 2 0.4236 12.12215 5.02493 10.92114 0 0 0 + 2730 910 2 0.4236 13.50768 5.11149 10.06133 0 0 0 + 2731 911 1 -0.8472 26.31194 30.23025 9.95928 -1 -1 0 + 2732 911 2 0.4236 26.26983 29.99008 8.98948 -1 -1 0 + 2733 911 2 0.4236 26.89847 29.57778 10.43909 -1 -1 0 + 2734 912 1 -0.8472 2.44644 10.30330 33.95784 0 1 0 + 2735 912 2 0.4236 3.27225 10.40567 33.40331 0 1 0 + 2736 912 2 0.4236 2.65024 10.54393 34.90680 0 1 0 + 2737 913 1 -0.8472 23.59507 21.37553 7.99419 0 0 0 + 2738 913 2 0.4236 22.79284 21.02924 8.48049 0 0 0 + 2739 913 2 0.4236 24.00179 20.63828 7.45479 0 0 0 + 2740 914 1 -0.8472 35.31436 2.94640 23.58373 -1 0 0 + 2741 914 2 0.4236 0.21370 3.41681 22.80009 0 0 0 + 2742 914 2 0.4236 0.53235 2.53576 24.13752 0 0 0 + 2743 915 1 -0.8472 13.99749 14.34449 15.50116 0 0 0 + 2744 915 2 0.4236 14.78839 13.91007 15.93209 0 0 0 + 2745 915 2 0.4236 14.12556 15.33622 15.49251 0 0 0 + 2746 916 1 -0.8472 6.46769 25.98596 20.36657 0 1 0 + 2747 916 2 0.4236 6.61622 25.29763 19.65657 0 1 0 + 2748 916 2 0.4236 6.11029 25.54426 21.18944 0 1 0 + 2749 917 1 -0.8472 13.24177 10.22217 7.85839 1 0 0 + 2750 917 2 0.4236 12.93251 10.97416 8.44047 1 0 0 + 2751 917 2 0.4236 13.34595 9.39473 8.41013 1 0 0 + 2752 918 1 -0.8472 19.88118 15.75995 34.78713 -1 0 0 + 2753 918 2 0.4236 20.21312 15.08235 35.44333 -1 0 0 + 2754 918 2 0.4236 20.26709 15.56722 33.88500 -1 0 0 +2755 919 1 -0.8472 28.06120 14.47494 34.86905 -1 0 0 +2756 919 2 0.4236 27.85061 15.02284 0.23143 -1 0 1 +2757 919 2 0.4236 27.89570 15.01872 34.04632 -1 0 0 + 2758 920 1 -0.8472 19.62699 30.99995 7.02680 0 -1 0 + 2759 920 2 0.4236 18.89452 31.41499 6.48719 0 -1 0 + 2760 920 2 0.4236 19.30945 30.13154 7.40754 0 -1 0 + 2761 921 1 -0.8472 23.51923 19.57951 13.65426 0 0 0 + 2762 921 2 0.4236 23.70354 20.49167 14.02025 0 0 0 + 2763 921 2 0.4236 22.92226 19.08218 14.28373 0 0 0 + 2764 922 1 -0.8472 17.23063 33.39138 27.42826 0 0 0 + 2765 922 2 0.4236 17.91483 33.70380 28.08718 0 0 0 + 2766 922 2 0.4236 17.60174 32.62856 26.89878 0 0 0 +2767 923 1 -0.8472 14.52742 35.08290 34.70376 1 0 0 +2768 923 2 0.4236 14.91728 34.45866 34.02682 1 0 0 +2769 923 2 0.4236 14.84723 34.82725 0.16889 1 0 1 + 2770 924 1 -0.8472 23.44598 32.97673 12.21251 0 0 0 + 2771 924 2 0.4236 24.25669 32.40696 12.34679 0 0 0 + 2772 924 2 0.4236 23.30310 33.54871 13.02021 0 0 0 + 2773 925 1 -0.8472 27.83250 1.81797 26.90445 1 0 0 + 2774 925 2 0.4236 28.40408 1.28779 27.53066 1 0 0 + 2775 925 2 0.4236 27.92130 1.44938 25.97910 1 0 0 + 2776 926 1 -0.8472 33.48976 28.09017 24.91668 -1 0 0 + 2777 926 2 0.4236 33.93640 28.56163 24.15630 -1 0 0 + 2778 926 2 0.4236 33.81589 27.14592 24.96117 -1 0 0 + 2779 927 1 -0.8472 4.51573 24.79905 27.51794 1 -1 0 + 2780 927 2 0.4236 4.61827 24.08055 26.83001 1 -1 0 + 2781 927 2 0.4236 4.53544 25.69346 27.07120 1 -1 0 +2782 928 1 -0.8472 21.98652 18.50956 0.09403 0 0 0 +2783 928 2 0.4236 22.42105 17.88902 34.88847 0 0 -1 +2784 928 2 0.4236 22.64332 18.75754 0.80610 0 0 0 +2785 929 1 -0.8472 1.51311 16.03062 0.14690 0 0 0 +2786 929 2 0.4236 1.10681 16.69203 0.77731 0 0 0 +2787 929 2 0.4236 1.19358 16.21607 34.66487 0 0 -1 + 2788 930 1 -0.8472 15.60223 24.97747 19.15233 0 -1 0 + 2789 930 2 0.4236 16.21656 25.42691 18.50382 0 -1 0 + 2790 930 2 0.4236 14.89182 25.62028 19.43878 0 -1 0 + 2791 931 1 -0.8472 14.89459 26.77828 7.57077 0 0 0 + 2792 931 2 0.4236 15.10000 26.99785 8.52448 0 0 0 + 2793 931 2 0.4236 14.05018 26.24452 7.52604 0 0 0 + 2794 932 1 -0.8472 20.12223 16.99391 24.38823 0 -1 0 + 2795 932 2 0.4236 20.03531 16.23201 23.74646 0 -1 0 + 2796 932 2 0.4236 20.97165 17.48876 24.20508 0 -1 0 + 2797 933 1 -0.8472 31.97095 9.82411 26.24234 0 0 0 + 2798 933 2 0.4236 31.18540 10.35809 25.92973 0 0 0 + 2799 933 2 0.4236 32.47982 9.48527 25.45101 0 0 0 + 2800 934 1 -0.8472 0.38419 34.93068 18.53576 0 -1 0 + 2801 934 2 0.4236 0.38508 0.23464 17.94978 0 0 0 + 2802 934 2 0.4236 35.16957 34.30492 18.23811 -1 -1 0 + 2803 935 1 -0.8472 13.41437 12.33309 24.23677 1 0 0 + 2804 935 2 0.4236 13.12758 11.74937 24.99633 1 0 0 + 2805 935 2 0.4236 12.60873 12.74142 23.80761 1 0 0 + 2806 936 1 -0.8472 5.18744 5.18751 32.87277 1 1 0 + 2807 936 2 0.4236 4.57151 5.77599 32.34907 1 1 0 + 2808 936 2 0.4236 5.82066 4.72905 32.24922 1 1 0 + 2809 937 1 -0.8472 32.42642 26.66747 22.09377 -2 0 0 + 2810 937 2 0.4236 33.12682 27.03535 22.70538 -2 0 0 + 2811 937 2 0.4236 32.68856 25.74768 21.80190 -2 0 0 + 2812 938 1 -0.8472 26.55012 2.70441 29.91123 0 1 0 + 2813 938 2 0.4236 26.41349 3.09573 29.00119 0 1 0 + 2814 938 2 0.4236 27.49214 2.37992 29.99651 0 1 0 + 2815 939 1 -0.8472 31.36066 7.26066 30.58650 -1 0 0 + 2816 939 2 0.4236 31.50950 7.56090 29.64434 -1 0 0 + 2817 939 2 0.4236 30.45500 7.55543 30.89113 -1 0 0 + 2818 940 1 -0.8472 10.17399 14.38777 10.06711 0 -1 0 + 2819 940 2 0.4236 9.26801 14.52344 10.46802 0 -1 0 + 2820 940 2 0.4236 10.08025 13.95584 9.17012 0 -1 0 + 2821 941 1 -0.8472 28.15600 13.94540 15.95737 0 0 0 + 2822 941 2 0.4236 27.27058 14.23684 16.31935 0 0 0 + 2823 941 2 0.4236 28.13899 13.99060 14.95857 0 0 0 + 2824 942 1 -0.8472 8.84083 24.57103 17.24477 -1 0 0 + 2825 942 2 0.4236 7.85233 24.69033 17.33754 -1 0 0 + 2826 942 2 0.4236 9.13305 23.78027 17.78261 -1 0 0 + 2827 943 1 -0.8472 6.95722 20.29000 23.84983 0 1 0 + 2828 943 2 0.4236 7.04815 19.41917 24.33287 0 1 0 + 2829 943 2 0.4236 7.86565 20.64730 23.63296 0 1 0 + 2830 944 1 -0.8472 5.75302 13.11004 29.05447 0 0 0 + 2831 944 2 0.4236 5.97810 12.96788 30.01835 0 0 0 + 2832 944 2 0.4236 4.76843 13.25708 28.96028 0 0 0 + 2833 945 1 -0.8472 30.02928 16.68514 32.84891 0 0 0 + 2834 945 2 0.4236 29.04487 16.85998 32.83002 0 0 0 + 2835 945 2 0.4236 30.27126 16.23411 33.70794 0 0 0 + 2836 946 1 -0.8472 19.41079 24.31013 19.45445 0 -1 0 + 2837 946 2 0.4236 18.91910 23.72864 18.80633 0 -1 0 + 2838 946 2 0.4236 20.21515 24.70141 19.00742 0 -1 0 + 2839 947 1 -0.8472 21.08786 32.52428 34.05522 -1 0 0 + 2840 947 2 0.4236 21.43422 33.32512 33.56665 -1 0 0 + 2841 947 2 0.4236 20.99938 32.73966 35.02771 -1 0 0 + 2842 948 1 -0.8472 27.57938 10.46951 5.09160 0 0 0 + 2843 948 2 0.4236 26.59109 10.61508 5.13655 0 0 0 + 2844 948 2 0.4236 27.76614 9.50420 4.90926 0 0 0 + 2845 949 1 -0.8472 28.73415 28.94644 29.88776 -1 0 0 + 2846 949 2 0.4236 27.74647 28.86939 30.02388 -1 0 0 + 2847 949 2 0.4236 29.09021 28.08186 29.53322 -1 0 0 + 2848 950 1 -0.8472 23.12812 9.19046 12.33250 1 0 0 + 2849 950 2 0.4236 23.47605 8.37982 12.80345 1 0 0 + 2850 950 2 0.4236 23.49814 10.01219 12.76592 1 0 0 +2851 951 1 -0.8472 7.73215 20.48784 35.38931 0 0 0 +2852 951 2 0.4236 7.10638 20.79855 0.65752 0 0 1 +2853 951 2 0.4236 8.34921 19.79575 0.31656 0 0 1 + 2854 952 1 -0.8472 15.75196 1.18543 29.47016 1 0 0 + 2855 952 2 0.4236 16.64637 1.19402 29.91732 1 0 0 + 2856 952 2 0.4236 15.77930 1.76944 28.65891 1 0 0 + 2857 953 1 -0.8472 21.01330 1.65554 12.01507 0 1 0 + 2858 953 2 0.4236 21.26148 1.90066 12.95223 0 1 0 + 2859 953 2 0.4236 21.76578 1.15061 11.59222 0 1 0 + 2860 954 1 -0.8472 17.77139 21.95343 29.40937 -1 0 0 + 2861 954 2 0.4236 18.03328 22.88377 29.66597 -1 0 0 + 2862 954 2 0.4236 18.43500 21.30533 29.78294 -1 0 0 + 2863 955 1 -0.8472 22.46831 13.05746 12.45944 1 0 0 + 2864 955 2 0.4236 23.14396 13.67278 12.05350 1 0 0 + 2865 955 2 0.4236 21.99568 12.55653 11.73444 1 0 0 + 2866 956 1 -0.8472 11.21873 21.74164 13.52771 0 0 0 + 2867 956 2 0.4236 11.52085 22.26841 12.73327 0 0 0 + 2868 956 2 0.4236 10.30802 21.36754 13.35285 0 0 0 + 2869 957 1 -0.8472 21.72722 10.88987 26.22695 0 0 0 + 2870 957 2 0.4236 20.94466 11.47054 26.00254 0 0 0 + 2871 957 2 0.4236 21.66050 10.59424 27.17988 0 0 0 + 2872 958 1 -0.8472 18.46838 8.09516 14.49744 0 0 0 + 2873 958 2 0.4236 19.27863 8.50562 14.07916 0 0 0 + 2874 958 2 0.4236 17.95254 7.59374 13.80289 0 0 0 + 2875 959 1 -0.8472 2.92072 4.40179 25.40986 1 0 0 + 2876 959 2 0.4236 2.92375 3.55851 24.87244 1 0 0 + 2877 959 2 0.4236 3.86068 4.71670 25.54126 1 0 0 + 2878 960 1 -0.8472 5.27077 8.45307 8.47067 0 1 0 + 2879 960 2 0.4236 6.13663 8.79876 8.10908 0 1 0 + 2880 960 2 0.4236 4.51250 8.83717 7.94393 0 1 0 + 2881 961 1 -0.8472 1.76958 24.14716 31.58841 1 1 0 + 2882 961 2 0.4236 1.60567 23.16857 31.46406 1 1 0 + 2883 961 2 0.4236 0.90327 24.60856 31.77958 1 1 0 + 2884 962 1 -0.8472 24.44793 22.66778 34.86292 -1 -1 0 + 2885 962 2 0.4236 24.15877 23.46022 34.32593 -1 -1 0 + 2886 962 2 0.4236 23.64563 22.22751 35.26595 -1 -1 0 + 2887 963 1 -0.8472 4.18215 10.93176 27.72718 0 -1 0 + 2888 963 2 0.4236 4.41887 10.10976 27.20934 0 -1 0 + 2889 963 2 0.4236 4.91894 11.14537 28.36860 0 -1 0 + 2890 964 1 -0.8472 17.69104 23.19157 14.16626 0 -1 0 + 2891 964 2 0.4236 17.12986 23.22868 13.33945 0 -1 0 + 2892 964 2 0.4236 18.57188 23.63143 13.99125 0 -1 0 + 2893 965 1 -0.8472 7.47643 4.12395 28.94377 1 1 0 + 2894 965 2 0.4236 6.53643 4.10965 28.60301 1 1 0 + 2895 965 2 0.4236 8.06807 3.62466 28.31082 1 1 0 + 2896 966 1 -0.8472 24.00375 12.48900 25.21944 0 1 0 + 2897 966 2 0.4236 24.26664 12.12701 24.32514 0 1 0 + 2898 966 2 0.4236 23.24029 11.95566 25.58362 0 1 0 + 2899 967 1 -0.8472 29.91768 1.50145 13.03496 -1 1 0 + 2900 967 2 0.4236 29.05335 1.37915 12.54717 -1 1 0 + 2901 967 2 0.4236 30.45234 0.65805 12.98236 -1 1 0 + 2902 968 1 -0.8472 10.68432 11.21692 11.70509 1 0 0 + 2903 968 2 0.4236 10.03839 11.20065 10.94187 1 0 0 + 2904 968 2 0.4236 10.94732 12.16199 11.89905 1 0 0 + 2905 969 1 -0.8472 26.83864 6.78430 27.89995 0 1 0 + 2906 969 2 0.4236 27.65156 6.22192 28.05128 0 1 0 + 2907 969 2 0.4236 26.70471 6.91923 26.91824 0 1 0 + 2908 970 1 -0.8472 19.45471 27.26305 28.04780 0 -1 0 + 2909 970 2 0.4236 19.04134 26.44985 27.63821 0 -1 0 + 2910 970 2 0.4236 18.84070 27.63413 28.74441 0 -1 0 + 2911 971 1 -0.8472 9.18329 17.54386 23.21323 0 0 0 + 2912 971 2 0.4236 9.75033 18.33292 22.97698 0 0 0 + 2913 971 2 0.4236 9.51723 17.13593 24.06294 0 0 0 + 2914 972 1 -0.8472 18.24866 34.63289 20.65628 1 -1 0 + 2915 972 2 0.4236 17.34524 34.34596 20.33775 1 -1 0 + 2916 972 2 0.4236 18.85770 34.74797 19.87159 1 -1 0 + 2917 973 1 -0.8472 32.52843 32.10108 29.97826 1 -1 0 + 2918 973 2 0.4236 33.25404 32.71935 30.28020 1 -1 0 + 2919 973 2 0.4236 31.64947 32.57665 30.01282 1 -1 0 + 2920 974 1 -0.8472 3.80241 11.49035 8.14956 0 0 0 + 2921 974 2 0.4236 2.91277 11.28906 7.73975 0 0 0 + 2922 974 2 0.4236 3.77571 11.27632 9.12598 0 0 0 + 2923 975 1 -0.8472 19.18653 31.11922 32.50674 0 -1 0 + 2924 975 2 0.4236 19.84180 31.57338 33.11034 0 -1 0 + 2925 975 2 0.4236 18.78222 31.79206 31.88727 0 -1 0 + 2926 976 1 -0.8472 13.34133 28.41424 10.64040 0 -1 0 + 2927 976 2 0.4236 14.24813 28.04636 10.43466 0 -1 0 + 2928 976 2 0.4236 12.65347 27.92404 10.10514 0 -1 0 + 2929 977 1 -0.8472 9.56133 12.05955 5.53720 0 1 0 + 2930 977 2 0.4236 10.08838 11.22096 5.67478 0 1 0 + 2931 977 2 0.4236 9.83717 12.48936 4.67747 0 1 0 + 2932 978 1 -0.8472 7.60177 1.79201 2.09133 0 1 0 + 2933 978 2 0.4236 7.01076 2.58930 1.96898 0 1 0 + 2934 978 2 0.4236 7.51467 1.45316 3.02810 0 1 0 + 2935 979 1 -0.8472 28.78709 11.03509 9.63724 -1 0 0 + 2936 979 2 0.4236 28.90284 11.24252 8.66588 -1 0 0 + 2937 979 2 0.4236 29.64333 11.21599 10.12102 -1 0 0 + 2938 980 1 -0.8472 4.02056 14.84396 0.81996 0 0 0 + 2939 980 2 0.4236 4.39483 14.28239 0.08204 0 0 0 + 2940 980 2 0.4236 3.13862 15.22111 0.53729 0 0 0 + 2941 981 1 -0.8472 28.30108 1.63976 9.02932 -1 0 0 + 2942 981 2 0.4236 28.30413 2.50150 9.53661 -1 0 0 + 2943 981 2 0.4236 27.77598 0.95633 9.53643 -1 0 0 + 2944 982 1 -0.8472 8.86171 20.46625 30.05231 1 0 0 + 2945 982 2 0.4236 8.40488 20.55409 29.16712 1 0 0 + 2946 982 2 0.4236 8.28711 19.93206 30.67235 1 0 0 + 2947 983 1 -0.8472 18.15383 12.23297 19.15095 0 0 0 + 2948 983 2 0.4236 18.65379 11.76439 18.42265 0 0 0 + 2949 983 2 0.4236 17.17857 12.02685 19.07172 0 0 0 + 2950 984 1 -0.8472 30.27019 30.12955 18.83444 1 -1 0 + 2951 984 2 0.4236 31.18072 29.72570 18.74618 1 -1 0 + 2952 984 2 0.4236 29.82132 30.13800 17.94092 1 -1 0 + 2953 985 1 -0.8472 28.72211 14.94210 4.97487 0 1 0 + 2954 985 2 0.4236 29.12355 14.04801 5.17337 0 1 0 + 2955 985 2 0.4236 27.75045 14.83217 4.76577 0 1 0 + 2956 986 1 -0.8472 30.79139 17.34339 25.87787 -1 1 0 + 2957 986 2 0.4236 31.64276 17.65688 25.45739 -1 1 0 + 2958 986 2 0.4236 30.40203 16.60291 25.33010 -1 1 0 + 2959 987 1 -0.8472 20.85050 21.78155 22.79995 0 0 0 + 2960 987 2 0.4236 19.95588 22.19630 22.63385 0 0 0 + 2961 987 2 0.4236 21.56993 22.42549 22.53966 0 0 0 + 2962 988 1 -0.8472 3.21863 15.29262 10.93758 0 0 0 + 2963 988 2 0.4236 3.54765 14.72274 11.69052 0 0 0 + 2964 988 2 0.4236 3.77983 16.11786 10.87424 0 0 0 + 2965 989 1 -0.8472 33.92695 7.67534 6.62621 0 0 0 + 2966 989 2 0.4236 34.05946 8.56683 7.05938 0 0 0 + 2967 989 2 0.4236 34.43072 7.64604 5.76290 0 0 0 + 2968 990 1 -0.8472 33.09880 6.74056 25.13263 0 0 0 + 2969 990 2 0.4236 33.49921 6.29904 24.32971 0 0 0 + 2970 990 2 0.4236 32.69648 7.61622 24.86557 0 0 0 + 2971 991 1 -0.8472 29.48955 24.63672 11.59942 0 0 0 + 2972 991 2 0.4236 29.87987 24.05306 12.31141 0 0 0 + 2973 991 2 0.4236 28.56720 24.32043 11.37763 0 0 0 + 2974 992 1 -0.8472 2.34540 1.81481 21.23390 1 0 0 + 2975 992 2 0.4236 1.37305 1.62113 21.10363 1 0 0 + 2976 992 2 0.4236 2.62003 2.56305 20.63002 1 0 0 +2977 993 1 -0.8472 11.06680 31.42120 0.34581 0 1 0 +2978 993 2 0.4236 10.24429 30.97031 35.44638 0 1 -1 +2979 993 2 0.4236 11.86807 30.86173 0.13383 0 1 0 + 2980 994 1 -0.8472 34.93665 6.07698 11.10142 0 0 0 + 2981 994 2 0.4236 0.18436 6.56576 10.66276 1 0 0 + 2982 994 2 0.4236 34.63985 5.32489 10.51300 0 0 0 + 2983 995 1 -0.8472 20.00992 11.13905 17.51811 0 0 0 + 2984 995 2 0.4236 20.59134 10.54105 16.96644 0 0 0 + 2985 995 2 0.4236 20.46099 11.33089 18.38972 0 0 0 + 2986 996 1 -0.8472 26.63427 15.23996 11.58010 0 0 0 + 2987 996 2 0.4236 25.74658 14.89786 11.27210 0 0 0 + 2988 996 2 0.4236 26.79731 16.14495 11.18720 0 0 0 + 2989 997 1 -0.8472 7.32425 7.27500 11.52314 1 -1 0 + 2990 997 2 0.4236 7.92814 7.17928 10.73188 1 -1 0 + 2991 997 2 0.4236 6.60180 7.93375 11.31325 1 -1 0 + 2992 998 1 -0.8472 8.60862 9.07382 19.34881 1 0 0 + 2993 998 2 0.4236 8.98896 8.19288 19.63037 1 0 0 + 2994 998 2 0.4236 7.76257 8.92345 18.83745 1 0 0 + 2995 999 1 -0.8472 11.21280 13.03889 22.91023 0 0 0 + 2996 999 2 0.4236 10.95126 13.31509 23.83501 0 0 0 + 2997 999 2 0.4236 10.41518 13.08938 22.30919 0 0 0 + 2998 1000 1 -0.8472 15.70634 18.82610 34.73704 0 0 0 + 2999 1000 2 0.4236 15.21674 19.57997 35.17510 0 0 0 + 3000 1000 2 0.4236 16.64346 19.11026 34.53453 0 0 0 + 3001 1001 1 -0.8472 23.12248 30.95440 32.80659 0 -1 0 + 3002 1001 2 0.4236 22.36914 31.30904 33.36034 0 -1 0 + 3003 1001 2 0.4236 23.81237 31.66815 32.68603 0 -1 0 + 3004 1002 1 -0.8472 2.07358 30.68505 32.50823 1 0 0 + 3005 1002 2 0.4236 2.42659 29.78201 32.26368 1 0 0 + 3006 1002 2 0.4236 2.83305 31.27939 32.77274 1 0 0 + 3007 1003 1 -0.8472 12.19941 21.23716 5.42888 1 0 0 + 3008 1003 2 0.4236 11.39729 21.44906 5.98714 1 0 0 + 3009 1003 2 0.4236 12.89772 21.93912 5.56872 1 0 0 + 3010 1004 1 -0.8472 16.49685 26.02267 23.28030 0 -1 0 + 3011 1004 2 0.4236 15.74529 26.37284 23.83930 0 -1 0 + 3012 1004 2 0.4236 16.83643 26.75059 22.68468 0 -1 0 + 3013 1005 1 -0.8472 3.71190 6.69664 23.09349 1 1 0 + 3014 1005 2 0.4236 4.51275 7.22874 23.36821 1 1 0 + 3015 1005 2 0.4236 3.05011 6.67935 23.84295 1 1 0 + 3016 1006 1 -0.8472 23.69417 19.64211 1.85845 0 0 0 + 3017 1006 2 0.4236 23.57866 19.06634 2.66783 0 0 0 + 3018 1006 2 0.4236 24.60178 20.06158 1.87460 0 0 0 + 3019 1007 1 -0.8472 12.49699 4.32659 15.95705 0 0 0 + 3020 1007 2 0.4236 12.08185 4.98442 15.32868 0 0 0 + 3021 1007 2 0.4236 13.47285 4.52425 16.04971 0 0 0 + 3022 1008 1 -0.8472 2.88817 24.31400 12.33073 0 0 0 + 3023 1008 2 0.4236 2.75143 24.30782 11.34016 0 0 0 + 3024 1008 2 0.4236 2.08913 24.71913 12.77495 0 0 0 + 3025 1009 1 -0.8472 4.47007 13.67005 19.84575 0 1 0 + 3026 1009 2 0.4236 4.22969 13.43095 18.90498 0 1 0 + 3027 1009 2 0.4236 4.94923 14.54770 19.85454 0 1 0 + 3028 1010 1 -0.8472 7.11277 19.85870 18.43619 0 0 0 + 3029 1010 2 0.4236 7.43137 20.07081 19.36001 0 0 0 + 3030 1010 2 0.4236 7.65350 19.10536 18.06190 0 0 0 + 3031 1011 1 -0.8472 23.31116 24.86119 33.37442 0 0 0 + 3032 1011 2 0.4236 23.60537 25.39212 32.57973 0 0 0 + 3033 1011 2 0.4236 23.46057 25.39567 34.20626 0 0 0 + 3034 1012 1 -0.8472 6.59172 35.26853 10.29528 0 0 0 + 3035 1012 2 0.4236 6.03345 0.59177 10.30124 0 1 0 + 3036 1012 2 0.4236 6.84495 35.03042 11.23290 0 0 0 + 3037 1013 1 -0.8472 21.58857 3.03691 28.35042 0 -1 0 + 3038 1013 2 0.4236 21.37983 3.73264 27.66317 0 -1 0 + 3039 1013 2 0.4236 20.77001 2.84809 28.89286 0 -1 0 + 3040 1014 1 -0.8472 24.81563 5.45260 22.57323 -1 0 0 + 3041 1014 2 0.4236 24.32945 4.64426 22.24134 -1 0 0 + 3042 1014 2 0.4236 24.64602 6.22053 21.95564 -1 0 0 + 3043 1015 1 -0.8472 34.44382 0.14050 3.28625 -1 1 0 + 3044 1015 2 0.4236 34.61155 35.38786 2.33507 -1 0 0 + 3045 1015 2 0.4236 35.31692 0.29608 3.74826 -1 1 0 + 3046 1016 1 -0.8472 4.27791 2.41161 7.98065 1 1 0 + 3047 1016 2 0.4236 4.66546 1.93228 7.19326 1 1 0 + 3048 1016 2 0.4236 3.77390 3.21600 7.66616 1 1 0 + 3049 1017 1 -0.8472 24.52356 31.65334 21.53988 0 -1 0 + 3050 1017 2 0.4236 24.66693 30.96367 22.24963 0 -1 0 + 3051 1017 2 0.4236 25.23898 31.57129 20.84603 0 -1 0 + 3052 1018 1 -0.8472 34.42078 34.68317 25.27342 0 -1 0 + 3053 1018 2 0.4236 33.95998 0.05744 25.16350 0 0 0 + 3054 1018 2 0.4236 33.89777 34.10967 25.90389 0 -1 0 + 3055 1019 1 -0.8472 11.26425 8.85731 13.25615 0 0 0 + 3056 1019 2 0.4236 11.21883 9.47948 12.47465 0 0 0 + 3057 1019 2 0.4236 10.33767 8.63461 13.55915 0 0 0 +3058 1020 1 -0.8472 17.04930 2.12995 0.32083 0 0 0 +3059 1020 2 0.4236 17.15528 2.34363 1.29194 0 0 0 +3060 1020 2 0.4236 17.87979 1.68577 35.43191 0 0 -1 + 3061 1021 1 -0.8472 24.82320 22.07636 14.94227 0 0 0 + 3062 1021 2 0.4236 25.10356 22.41409 15.84075 0 0 0 + 3063 1021 2 0.4236 24.74082 22.84510 14.30805 0 0 0 + 3064 1022 1 -0.8472 17.77824 29.21423 7.78133 0 0 0 + 3065 1022 2 0.4236 17.50573 29.18071 8.74291 0 0 0 + 3066 1022 2 0.4236 17.68695 28.30413 7.37716 0 0 0 + 3067 1023 1 -0.8472 14.04599 24.76525 11.84064 0 -1 0 + 3068 1023 2 0.4236 14.75054 24.05867 11.90618 0 -1 0 + 3069 1023 2 0.4236 13.88820 25.16342 12.74426 0 -1 0 + 3070 1024 1 -0.8472 24.42854 23.40458 24.83813 -1 0 0 + 3071 1024 2 0.4236 24.80885 24.10174 25.44580 -1 0 0 + 3072 1024 2 0.4236 23.67780 23.79831 24.30777 -1 0 0 +3073 1025 1 -0.8472 11.88653 15.08214 0.29503 1 0 0 +3074 1025 2 0.4236 12.54716 15.05647 34.99198 1 0 -1 +3075 1025 2 0.4236 11.40432 15.95808 0.28467 1 0 0 + 3076 1026 1 -0.8472 11.00548 18.76354 17.70020 1 0 0 + 3077 1026 2 0.4236 11.20683 19.08015 18.62714 1 0 0 + 3078 1026 2 0.4236 11.36832 19.41906 17.03794 1 0 0 + 3079 1027 1 -0.8472 0.21383 34.43335 28.44343 0 -1 0 + 3080 1027 2 0.4236 35.41456 34.46051 29.39515 -1 -1 0 + 3081 1027 2 0.4236 0.52632 33.51015 28.21990 0 -1 0 + 3082 1028 1 -0.8472 27.69776 7.75551 5.28429 0 0 0 + 3083 1028 2 0.4236 28.61915 7.37076 5.22974 0 0 0 + 3084 1028 2 0.4236 27.22506 7.37690 6.08001 0 0 0 + 3085 1029 1 -0.8472 18.57223 5.67473 20.22081 0 0 0 + 3086 1029 2 0.4236 19.11772 6.30191 20.77677 0 0 0 + 3087 1029 2 0.4236 19.17712 5.02379 19.76220 0 0 0 + 3088 1030 1 -0.8472 2.33426 13.17303 32.36357 1 1 0 + 3089 1030 2 0.4236 1.55811 13.79024 32.49242 1 1 0 + 3090 1030 2 0.4236 2.00010 12.26705 32.10388 1 1 0 + 3091 1031 1 -0.8472 35.01515 31.73394 0.97577 0 0 0 + 3092 1031 2 0.4236 34.61233 31.43018 1.83914 0 0 0 + 3093 1031 2 0.4236 34.51750 31.32192 0.21256 0 0 0 + 3094 1032 1 -0.8472 17.16853 3.81647 14.52459 0 1 0 + 3095 1032 2 0.4236 16.50516 3.96386 15.25821 0 1 0 + 3096 1032 2 0.4236 17.89736 3.21428 14.85032 0 1 0 + 3097 1033 1 -0.8472 23.73791 2.45782 3.91262 -1 1 0 + 3098 1033 2 0.4236 23.84521 1.51408 4.22530 -1 1 0 + 3099 1033 2 0.4236 22.76894 2.70462 3.92407 -1 1 0 + 3100 1034 1 -0.8472 22.46593 29.50499 22.79524 -1 1 0 + 3101 1034 2 0.4236 22.76962 29.36450 21.85291 -1 1 0 + 3102 1034 2 0.4236 22.69303 28.69928 23.34222 -1 1 0 + 3103 1035 1 -0.8472 8.33742 6.62626 29.93646 0 0 0 + 3104 1035 2 0.4236 8.89033 7.18506 29.31844 0 0 0 + 3105 1035 2 0.4236 8.14846 5.74280 29.50784 0 0 0 + 3106 1036 1 -0.8472 35.17706 29.80069 28.03160 0 0 0 + 3107 1036 2 0.4236 35.01927 29.87252 29.01643 0 0 0 + 3108 1036 2 0.4236 35.31696 28.84163 27.78553 0 0 0 + 3109 1037 1 -0.8472 30.28414 0.50891 35.01782 0 2 0 + 3110 1037 2 0.4236 29.35072 0.49034 34.65963 0 2 0 + 3111 1037 2 0.4236 30.92912 0.62031 34.26180 0 2 0 + 3112 1038 1 -0.8472 9.04361 33.89809 34.15394 0 -1 0 + 3113 1038 2 0.4236 10.01843 33.94915 34.37094 0 -1 0 + 3114 1038 2 0.4236 8.90680 33.28308 33.37739 0 -1 0 + 3115 1039 1 -0.8472 8.19458 7.90346 34.62935 0 1 0 + 3116 1039 2 0.4236 8.09541 8.76794 35.12211 0 1 0 + 3117 1039 2 0.4236 8.56665 8.08000 33.71811 0 1 0 + 3118 1040 1 -0.8472 16.36566 30.72861 16.14680 0 1 0 + 3119 1040 2 0.4236 15.39278 30.64034 16.36047 0 1 0 + 3120 1040 2 0.4236 16.69757 29.87490 15.74557 0 1 0 + 3121 1041 1 -0.8472 1.38336 21.51866 15.82979 1 -1 0 + 3122 1041 2 0.4236 2.18483 22.02775 16.14354 1 -1 0 + 3123 1041 2 0.4236 1.45028 21.36630 14.84376 1 -1 0 + 3124 1042 1 -0.8472 24.49576 16.91647 14.41347 -1 0 0 + 3125 1042 2 0.4236 24.46970 15.94853 14.16378 -1 0 0 + 3126 1042 2 0.4236 23.81844 17.41789 13.87513 -1 0 0 + 3127 1043 1 -0.8472 6.59665 30.08094 32.96472 1 -1 0 + 3128 1043 2 0.4236 6.38187 29.47645 33.73180 1 -1 0 + 3129 1043 2 0.4236 5.91582 30.81166 32.91541 1 -1 0 + 3130 1044 1 -0.8472 13.78850 1.75258 32.98996 0 1 0 + 3131 1044 2 0.4236 13.86420 0.79825 32.70103 0 1 0 + 3132 1044 2 0.4236 13.84481 1.80443 33.98698 0 1 0 + 3133 1045 1 -0.8472 4.51044 20.46650 25.38327 0 -1 0 + 3134 1045 2 0.4236 4.07148 20.23567 26.25162 0 -1 0 + 3135 1045 2 0.4236 5.12949 19.72804 25.11607 0 -1 0 + 3136 1046 1 -0.8472 8.55000 4.97817 10.53916 -1 0 0 + 3137 1046 2 0.4236 9.43556 4.58181 10.29702 -1 0 0 + 3138 1046 2 0.4236 7.81839 4.44049 10.12007 -1 0 0 + 3139 1047 1 -0.8472 6.56871 13.42917 12.46635 1 0 0 + 3140 1047 2 0.4236 6.84090 14.14672 11.82525 1 0 0 + 3141 1047 2 0.4236 6.79037 13.71379 13.39897 1 0 0 + 3142 1048 1 -0.8472 17.30718 29.56617 18.82292 0 0 0 + 3143 1048 2 0.4236 16.96795 30.09198 18.04294 0 0 0 + 3144 1048 2 0.4236 17.82287 30.16861 19.43211 0 0 0 + 3145 1049 1 -0.8472 7.07716 33.67983 15.88066 1 0 0 + 3146 1049 2 0.4236 8.00388 33.31507 15.97055 1 0 0 + 3147 1049 2 0.4236 7.04393 34.60331 16.26278 1 0 0 + 3148 1050 1 -0.8472 4.32190 32.08805 0.42408 0 0 0 + 3149 1050 2 0.4236 5.29471 32.01881 0.20306 0 0 0 + 3150 1050 2 0.4236 3.99734 33.01036 0.21455 0 0 0 + 3151 1051 1 -0.8472 12.50008 15.43329 7.00231 0 1 0 + 3152 1051 2 0.4236 12.41639 16.01705 7.80983 0 1 0 + 3153 1051 2 0.4236 12.42069 15.99245 6.17710 0 1 0 + 3154 1052 1 -0.8472 20.22282 27.44026 4.69087 -1 0 0 + 3155 1052 2 0.4236 20.73730 27.36336 5.54490 -1 0 0 + 3156 1052 2 0.4236 20.53211 26.73685 4.05097 -1 0 0 + 3157 1053 1 -0.8472 30.00375 27.99420 32.63038 1 1 0 + 3158 1053 2 0.4236 30.47584 28.62820 32.01789 1 1 0 + 3159 1053 2 0.4236 30.56692 27.17887 32.76465 1 1 0 + 3160 1054 1 -0.8472 14.63493 3.86305 1.20786 0 0 0 + 3161 1054 2 0.4236 15.22310 3.05436 1.20241 0 0 0 + 3162 1054 2 0.4236 13.73513 3.62321 0.84348 0 0 0 + 3163 1055 1 -0.8472 4.28675 7.12818 1.70239 1 1 0 + 3164 1055 2 0.4236 4.90666 6.75032 1.01472 1 1 0 + 3165 1055 2 0.4236 4.14189 6.45424 2.42680 1 1 0 + 3166 1056 1 -0.8472 29.16030 26.19530 28.88081 0 -1 0 + 3167 1056 2 0.4236 29.28848 26.91125 28.19459 0 -1 0 + 3168 1056 2 0.4236 28.24711 26.27605 29.28020 0 -1 0 + 3169 1057 1 -0.8472 3.63095 32.78344 28.91318 0 0 0 + 3170 1057 2 0.4236 3.02331 32.00049 28.78007 0 0 0 + 3171 1057 2 0.4236 4.53711 32.45965 29.18516 0 0 0 + 3172 1058 1 -0.8472 23.76056 25.84720 5.67648 0 0 0 + 3173 1058 2 0.4236 23.89776 24.87390 5.86034 0 0 0 + 3174 1058 2 0.4236 24.62472 26.25609 5.38322 0 0 0 + 3175 1059 1 -0.8472 25.38019 33.27871 0.14931 -1 0 0 + 3176 1059 2 0.4236 26.31880 33.35051 0.48664 -1 0 0 + 3177 1059 2 0.4236 24.90181 34.14226 0.30861 -1 0 0 + 3178 1060 1 -0.8472 0.68491 1.52207 17.07190 0 1 0 + 3179 1060 2 0.4236 0.51175 2.44926 17.40397 0 1 0 + 3180 1060 2 0.4236 1.30523 1.55905 16.28844 0 1 0 + 3181 1061 1 -0.8472 23.65181 8.67747 18.77421 0 0 0 + 3182 1061 2 0.4236 22.68856 8.74187 18.51354 0 0 0 + 3183 1061 2 0.4236 24.14900 9.46503 18.41024 0 0 0 + 3184 1062 1 -0.8472 33.05211 15.62986 13.34261 0 0 0 + 3185 1062 2 0.4236 34.00020 15.64431 13.02507 0 0 0 + 3186 1062 2 0.4236 32.54828 14.91657 12.85545 0 0 0 + 3187 1063 1 -0.8472 18.32997 24.65156 30.13550 0 0 0 + 3188 1063 2 0.4236 18.99945 24.85640 30.84948 0 0 0 + 3189 1063 2 0.4236 18.12862 25.48369 29.61879 0 0 0 + 3190 1064 1 -0.8472 5.36408 26.76539 9.70657 1 0 0 + 3191 1064 2 0.4236 4.61487 26.83602 9.04804 1 0 0 + 3192 1064 2 0.4236 5.01332 26.41228 10.57390 1 0 0 + 3193 1065 1 -0.8472 10.10567 10.08061 21.54331 0 0 0 + 3194 1065 2 0.4236 10.54602 10.94036 21.28469 0 0 0 + 3195 1065 2 0.4236 9.48169 9.79389 20.81643 0 0 0 + 3196 1066 1 -0.8472 19.00683 35.07186 29.84480 0 0 0 + 3197 1066 2 0.4236 19.04738 0.45529 29.39029 0 1 0 + 3198 1066 2 0.4236 19.86037 34.57629 29.68408 0 0 0 + 3199 1067 1 -0.8472 29.99711 14.98272 24.72885 -1 0 0 + 3200 1067 2 0.4236 29.69719 14.56470 25.58631 -1 0 0 + 3201 1067 2 0.4236 30.81369 14.51127 24.39589 -1 0 0 + 3202 1068 1 -0.8472 6.63614 29.00766 30.49536 1 0 0 + 3203 1068 2 0.4236 7.11384 29.32519 29.67625 1 0 0 + 3204 1068 2 0.4236 6.90641 29.56908 31.27747 1 0 0 + 3205 1069 1 -0.8472 10.11551 30.29423 18.30231 0 0 0 + 3206 1069 2 0.4236 10.98545 30.70152 18.58028 0 0 0 + 3207 1069 2 0.4236 10.19849 29.93235 17.37380 0 0 0 + 3208 1070 1 -0.8472 32.38903 34.15199 4.78994 0 0 0 + 3209 1070 2 0.4236 33.33485 34.20962 4.47042 0 0 0 + 3210 1070 2 0.4236 32.32473 33.47169 5.51998 0 0 0 + 3211 1071 1 -0.8472 8.26907 25.90440 25.87939 0 0 0 + 3212 1071 2 0.4236 7.44369 26.19768 25.39703 0 0 0 + 3213 1071 2 0.4236 8.21908 26.19751 26.83413 0 0 0 + 3214 1072 1 -0.8472 34.39606 28.76401 22.33914 -1 0 0 + 3215 1072 2 0.4236 34.78287 28.19023 21.61726 -1 0 0 + 3216 1072 2 0.4236 33.95646 29.56347 21.92983 -1 0 0 + 3217 1073 1 -0.8472 26.17808 0.33318 20.75823 0 0 0 + 3218 1073 2 0.4236 27.03602 0.13193 21.23088 0 0 0 + 3219 1073 2 0.4236 25.41800 35.41922 21.25378 0 -1 0 + 3220 1074 1 -0.8472 10.61100 26.37578 18.41595 0 -1 0 + 3221 1074 2 0.4236 10.08330 25.72318 17.87230 0 -1 0 + 3222 1074 2 0.4236 10.12037 26.57693 19.26374 0 -1 0 + 3223 1075 1 -0.8472 8.64359 7.64068 22.40826 1 1 0 + 3224 1075 2 0.4236 9.38042 7.39398 21.77883 1 1 0 + 3225 1075 2 0.4236 9.00778 7.70978 23.33701 1 1 0 + 3226 1076 1 -0.8472 34.94158 6.28899 29.47998 -1 1 0 + 3227 1076 2 0.4236 34.74992 5.34456 29.21292 -1 1 0 + 3228 1076 2 0.4236 35.35594 6.30049 30.38998 -1 1 0 + 3229 1077 1 -0.8472 22.96785 8.26787 24.97489 0 0 0 + 3230 1077 2 0.4236 23.63128 8.57475 24.29249 0 0 0 + 3231 1077 2 0.4236 22.64048 9.05422 25.49872 0 0 0 + 3232 1078 1 -0.8472 11.45410 14.58764 29.16903 -1 -1 0 + 3233 1078 2 0.4236 11.28686 15.56996 29.25265 -1 -1 0 + 3234 1078 2 0.4236 10.98431 14.10543 29.90843 -1 -1 0 + 3235 1079 1 -0.8472 30.98568 28.19268 16.35175 -1 -1 0 + 3236 1079 2 0.4236 30.86407 29.08556 15.91827 -1 -1 0 + 3237 1079 2 0.4236 31.87810 27.81855 16.09965 -1 -1 0 + 3238 1080 1 -0.8472 19.60511 34.63779 0.73034 0 -1 0 + 3239 1080 2 0.4236 20.07181 34.47947 1.60044 0 -1 0 + 3240 1080 2 0.4236 19.65749 0.10212 0.49583 0 0 0 + 3241 1081 1 -0.8472 6.52788 20.20134 8.34657 -1 0 0 + 3242 1081 2 0.4236 6.44859 20.82437 9.12471 -1 0 0 + 3243 1081 2 0.4236 6.75885 20.72214 7.52476 -1 0 0 + 3244 1082 1 -0.8472 2.22359 5.95019 13.03372 1 0 0 + 3245 1082 2 0.4236 3.20048 5.73680 13.04474 1 0 0 + 3246 1082 2 0.4236 1.93257 6.22714 13.94943 1 0 0 + 3247 1083 1 -0.8472 33.54633 3.02522 18.32549 -1 0 0 + 3248 1083 2 0.4236 33.26547 2.84323 19.26782 -1 0 0 + 3249 1083 2 0.4236 32.87661 2.63119 17.69609 -1 0 0 + 3250 1084 1 -0.8472 27.71341 5.53405 14.97739 0 0 0 + 3251 1084 2 0.4236 28.61577 5.86831 15.24942 0 0 0 + 3252 1084 2 0.4236 27.30630 6.16972 14.32155 0 0 0 + 3253 1085 1 -0.8472 15.94416 23.73383 21.56160 0 1 0 + 3254 1085 2 0.4236 15.98176 24.62802 22.00764 0 1 0 + 3255 1085 2 0.4236 15.63717 23.84542 20.61651 0 1 0 + 3256 1086 1 -0.8472 6.69227 14.39868 15.10160 0 0 0 + 3257 1086 2 0.4236 7.68503 14.51748 15.11791 0 0 0 + 3258 1086 2 0.4236 6.25284 15.28527 14.95723 0 0 0 + 3259 1087 1 -0.8472 5.95374 12.14523 3.05261 0 0 0 + 3260 1087 2 0.4236 6.30415 12.22359 2.11933 0 0 0 + 3261 1087 2 0.4236 5.21340 11.47341 3.07535 0 0 0 + 3262 1088 1 -0.8472 12.10252 30.80971 11.22350 1 1 0 + 3263 1088 2 0.4236 12.52277 29.92727 11.01224 1 1 0 + 3264 1088 2 0.4236 12.81790 31.48717 11.39446 1 1 0 + 3265 1089 1 -0.8472 18.93365 2.00832 15.99019 0 -1 0 + 3266 1089 2 0.4236 19.47296 1.23800 15.65004 0 -1 0 + 3267 1089 2 0.4236 18.54763 1.77650 16.88304 0 -1 0 + 3268 1090 1 -0.8472 23.96448 13.72856 20.70151 -1 1 0 + 3269 1090 2 0.4236 24.48869 14.52327 21.00735 -1 1 0 + 3270 1090 2 0.4236 23.44154 13.96884 19.88371 -1 1 0 + 3271 1091 1 -0.8472 8.25157 5.78700 16.02100 0 1 0 + 3272 1091 2 0.4236 8.74962 5.11181 16.56511 0 1 0 + 3273 1091 2 0.4236 8.66944 5.85908 15.11539 0 1 0 + 3274 1092 1 -0.8472 28.35054 22.25648 21.49869 -1 -1 0 + 3275 1092 2 0.4236 29.09362 22.80409 21.11415 -1 -1 0 + 3276 1092 2 0.4236 28.28496 22.42712 22.48181 -1 -1 0 + 3277 1093 1 -0.8472 1.81162 0.06670 2.46594 0 1 0 + 3278 1093 2 0.4236 1.21799 34.98582 1.91575 0 0 0 + 3279 1093 2 0.4236 1.94043 35.16403 3.36929 0 0 0 + 3280 1094 1 -0.8472 35.53166 20.98042 3.82252 -1 0 0 + 3281 1094 2 0.4236 34.63470 21.34595 3.57389 -1 0 0 + 3282 1094 2 0.4236 0.74022 21.58995 3.47998 0 0 0 + 3283 1095 1 -0.8472 11.17694 3.63400 11.43282 0 0 0 + 3284 1095 2 0.4236 10.46131 2.93763 11.37926 0 0 0 + 3285 1095 2 0.4236 11.26616 3.94827 12.37791 0 0 0 + 3286 1096 1 -0.8472 9.92654 28.89578 20.68262 0 0 0 + 3287 1096 2 0.4236 9.98108 29.33694 19.78687 0 0 0 + 3288 1096 2 0.4236 9.37314 28.06585 20.61226 0 0 0 + 3289 1097 1 -0.8472 8.08827 0.98702 4.46142 1 1 0 + 3290 1097 2 0.4236 8.92399 1.53197 4.39386 1 1 0 + 3291 1097 2 0.4236 8.30273 35.53173 4.29053 1 0 0 + 3292 1098 1 -0.8472 2.10866 25.58374 29.35790 0 -1 0 + 3293 1098 2 0.4236 2.19232 25.06311 30.20753 0 -1 0 + 3294 1098 2 0.4236 3.01122 25.90930 29.07623 0 -1 0 + 3295 1099 1 -0.8472 0.21720 17.12702 5.62458 1 0 0 + 3296 1099 2 0.4236 0.72164 16.95368 6.47042 1 0 0 + 3297 1099 2 0.4236 34.80790 17.46284 5.84542 0 0 0 + 3298 1100 1 -0.8472 29.35960 32.88200 7.15034 0 0 0 + 3299 1100 2 0.4236 28.43114 33.02456 6.80738 0 0 0 + 3300 1100 2 0.4236 29.47698 33.37806 8.01064 0 0 0 + 3301 1101 1 -0.8472 33.46143 17.44437 24.98963 1 0 0 + 3302 1101 2 0.4236 33.72538 16.51329 25.24138 1 0 0 + 3303 1101 2 0.4236 33.83439 18.08860 25.65735 1 0 0 + 3304 1102 1 -0.8472 14.63472 31.97268 20.11819 1 -1 0 + 3305 1102 2 0.4236 14.36253 32.40462 20.97800 1 -1 0 + 3306 1102 2 0.4236 15.60752 31.74378 20.15354 1 -1 0 + 3307 1103 1 -0.8472 17.90095 30.19602 29.07393 0 0 0 + 3308 1103 2 0.4236 17.96731 30.36446 28.09048 0 0 0 + 3309 1103 2 0.4236 18.56336 30.76959 29.55576 0 0 0 + 3310 1104 1 -0.8472 29.81731 30.58982 9.54416 -1 -1 0 + 3311 1104 2 0.4236 29.63821 31.39695 8.98161 -1 -1 0 + 3312 1104 2 0.4236 30.53858 30.79746 10.20493 -1 -1 0 + 3313 1105 1 -0.8472 21.82976 0.18004 6.59623 0 1 0 + 3314 1105 2 0.4236 21.32664 34.98930 7.10702 0 0 0 + 3315 1105 2 0.4236 21.47662 1.08550 6.83159 0 1 0 + 3316 1106 1 -0.8472 29.69215 11.04418 25.02770 0 0 0 + 3317 1106 2 0.4236 28.84149 10.53054 24.91587 0 0 0 + 3318 1106 2 0.4236 29.59049 11.69820 25.77727 0 0 0 + 3319 1107 1 -0.8472 7.91759 12.41881 27.49079 1 0 0 + 3320 1107 2 0.4236 7.27216 12.31040 28.24684 1 0 0 + 3321 1107 2 0.4236 8.07535 13.39211 27.32415 1 0 0 + 3322 1108 1 -0.8472 1.58250 18.15258 25.23805 0 0 0 + 3323 1108 2 0.4236 0.92416 18.82419 25.57788 0 0 0 + 3324 1108 2 0.4236 1.37849 17.93983 24.28250 0 0 0 + 3325 1109 1 -0.8472 6.93347 23.53201 2.35517 1 -1 0 + 3326 1109 2 0.4236 6.65652 22.58772 2.17757 1 -1 0 + 3327 1109 2 0.4236 7.76609 23.73748 1.84092 1 -1 0 + 3328 1110 1 -0.8472 4.74464 5.25742 7.60193 0 1 0 + 3329 1110 2 0.4236 5.42640 4.59168 7.90511 0 1 0 + 3330 1110 2 0.4236 4.27163 5.63599 8.39747 0 1 0 + 3331 1111 1 -0.8472 8.12552 1.12029 8.50091 1 0 0 + 3332 1111 2 0.4236 7.73332 0.58543 9.24927 1 0 0 + 3333 1111 2 0.4236 8.35748 0.51169 7.74213 1 0 0 + 3334 1112 1 -0.8472 19.74359 22.07163 2.67622 0 0 0 + 3335 1112 2 0.4236 20.34968 22.23695 3.45421 0 0 0 + 3336 1112 2 0.4236 19.04678 21.40195 2.93298 0 0 0 +3337 1113 1 -0.8472 26.99679 3.40644 0.23973 0 1 0 +3338 1113 2 0.4236 27.37894 4.26925 35.35606 0 1 -1 +3339 1113 2 0.4236 27.54590 3.07037 1.00492 0 1 0 + 3340 1114 1 -0.8472 2.76236 30.72744 25.44148 1 0 0 + 3341 1114 2 0.4236 2.26842 30.38711 24.64140 1 0 0 + 3342 1114 2 0.4236 3.56792 31.23927 25.14305 1 0 0 + 3343 1115 1 -0.8472 32.12430 13.85696 23.83760 -1 0 0 + 3344 1115 2 0.4236 32.83244 14.29542 24.39093 -1 0 0 + 3345 1115 2 0.4236 32.25120 12.86530 23.85869 -1 0 0 + 3346 1116 1 -0.8472 10.21628 20.26624 34.40125 1 0 0 + 3347 1116 2 0.4236 10.71138 21.11413 34.59057 1 0 0 + 3348 1116 2 0.4236 9.23602 20.45695 34.35014 1 0 0 + 3349 1117 1 -0.8472 23.09081 20.87638 25.30398 -1 -1 0 + 3350 1117 2 0.4236 22.37116 20.90702 25.99764 -1 -1 0 + 3351 1117 2 0.4236 23.32969 21.80796 25.03015 -1 -1 0 + 3352 1118 1 -0.8472 29.45887 17.10774 18.22032 0 0 0 + 3353 1118 2 0.4236 30.22190 17.47370 18.75302 0 0 0 + 3354 1118 2 0.4236 28.68228 17.73528 18.27593 0 0 0 + 3355 1119 1 -0.8472 20.05651 13.40407 10.45749 1 1 0 + 3356 1119 2 0.4236 19.47165 13.23293 11.25034 1 1 0 + 3357 1119 2 0.4236 19.78197 12.80508 9.70530 1 1 0 + 3358 1120 1 -0.8472 20.70762 15.06236 6.66545 -1 0 0 + 3359 1120 2 0.4236 21.25889 14.32457 6.27596 -1 0 0 + 3360 1120 2 0.4236 21.16718 15.93745 6.51395 -1 0 0 + 3361 1121 1 -0.8472 18.89325 16.33477 29.01551 0 0 0 + 3362 1121 2 0.4236 19.58381 17.05767 29.03667 0 0 0 + 3363 1121 2 0.4236 18.82390 15.91345 29.91973 0 0 0 + 3364 1122 1 -0.8472 32.53070 22.11236 16.33258 0 0 0 + 3365 1122 2 0.4236 31.61485 21.71487 16.27572 0 0 0 + 3366 1122 2 0.4236 32.95674 22.09858 15.42800 0 0 0 + 3367 1123 1 -0.8472 25.66860 12.05813 29.52616 0 0 0 + 3368 1123 2 0.4236 25.53751 12.57429 28.67979 0 0 0 + 3369 1123 2 0.4236 26.45841 11.45268 29.42833 0 0 0 + 3370 1124 1 -0.8472 17.50953 29.89865 23.74962 0 0 0 + 3371 1124 2 0.4236 18.29724 29.81518 23.13925 0 0 0 + 3372 1124 2 0.4236 16.70119 29.51259 23.30525 0 0 0 + 3373 1125 1 -0.8472 20.42758 5.60773 4.90458 0 0 0 + 3374 1125 2 0.4236 20.79830 5.93925 5.77208 0 0 0 + 3375 1125 2 0.4236 19.45567 5.40009 5.01507 0 0 0 + 3376 1126 1 -0.8472 21.22300 7.21858 16.33609 0 0 0 + 3377 1126 2 0.4236 21.71859 8.08305 16.25232 0 0 0 + 3378 1126 2 0.4236 20.23901 7.39452 16.30900 0 0 0 + 3379 1127 1 -0.8472 8.72497 17.28708 17.55696 -1 -1 0 + 3380 1127 2 0.4236 9.58423 17.79092 17.64519 -1 -1 0 + 3381 1127 2 0.4236 8.91490 16.35639 17.24449 -1 -1 0 + 3382 1128 1 -0.8472 8.76414 18.06755 33.54747 0 1 0 + 3383 1128 2 0.4236 9.30454 18.84130 33.87802 0 1 0 + 3384 1128 2 0.4236 7.81310 18.35231 33.42741 0 1 0 + 3385 1129 1 -0.8472 32.82700 35.52108 32.09864 0 0 0 + 3386 1129 2 0.4236 32.18718 34.75823 32.19162 0 0 0 + 3387 1129 2 0.4236 32.43608 0.70537 31.49022 0 1 0 + 3388 1130 1 -0.8472 9.68869 32.41541 15.80987 0 -2 0 + 3389 1130 2 0.4236 10.32198 32.88836 15.19734 0 -2 0 + 3390 1130 2 0.4236 9.49060 31.50514 15.44640 0 -2 0 + 3391 1131 1 -0.8472 3.13703 9.32361 6.33009 0 0 0 + 3392 1131 2 0.4236 2.73001 10.14381 5.92815 0 0 0 + 3393 1131 2 0.4236 3.81597 8.94572 5.70069 0 0 0 + 3394 1132 1 -0.8472 8.17658 30.34937 28.56505 0 0 0 + 3395 1132 2 0.4236 8.28700 29.89563 27.68082 0 0 0 + 3396 1132 2 0.4236 8.88873 31.04267 28.67515 0 0 0 + 3397 1133 1 -0.8472 5.53061 5.11969 25.83483 1 1 0 + 3398 1133 2 0.4236 6.24983 4.47405 25.57827 1 1 0 + 3399 1133 2 0.4236 5.62889 5.95828 25.29905 1 1 0 + 3400 1134 1 -0.8472 27.96519 26.59699 23.46335 0 0 0 + 3401 1134 2 0.4236 27.46175 26.09209 22.76224 0 0 0 + 3402 1134 2 0.4236 27.32121 27.10484 24.03546 0 0 0 + 3403 1135 1 -0.8472 8.76854 34.46179 30.13004 0 0 0 + 3404 1135 2 0.4236 7.98772 34.69897 30.70797 0 0 0 + 3405 1135 2 0.4236 9.57720 34.32484 30.70212 0 0 0 + 3406 1136 1 -0.8472 6.61675 26.68586 32.41993 0 1 0 + 3407 1136 2 0.4236 6.44110 27.45464 31.80503 0 1 0 + 3408 1136 2 0.4236 5.81960 26.08236 32.43514 0 1 0 + 3409 1137 1 -0.8472 20.61037 1.66831 21.21259 0 1 0 + 3410 1137 2 0.4236 20.46142 2.52558 20.71980 0 1 0 + 3411 1137 2 0.4236 19.80342 1.08529 21.11862 0 1 0 + 3412 1138 1 -0.8472 17.10976 11.94341 25.70251 0 1 0 + 3413 1138 2 0.4236 16.57575 12.22093 24.90392 0 1 0 + 3414 1138 2 0.4236 17.05537 12.65457 26.40341 0 1 0 + 3415 1139 1 -0.8472 35.29388 32.21867 32.51401 -1 -1 0 + 3416 1139 2 0.4236 0.72519 31.87316 32.55062 0 -1 0 + 3417 1139 2 0.4236 35.30112 33.17035 32.20707 -1 -1 0 + 3418 1140 1 -0.8472 18.63749 20.06698 10.24470 -1 0 0 + 3419 1140 2 0.4236 18.71657 19.22662 9.70852 -1 0 0 + 3420 1140 2 0.4236 17.74969 20.08665 10.70452 -1 0 0 + 3421 1141 1 -0.8472 15.67168 16.70914 30.84639 -1 -1 0 + 3422 1141 2 0.4236 15.18511 17.13458 31.60940 -1 -1 0 + 3423 1141 2 0.4236 15.07898 16.03321 30.40844 -1 -1 0 + 3424 1142 1 -0.8472 25.32085 32.99754 32.93324 0 0 0 + 3425 1142 2 0.4236 25.32696 32.97137 33.93285 0 0 0 + 3426 1142 2 0.4236 26.15749 32.57590 32.58366 0 0 0 + 3427 1143 1 -0.8472 21.95911 34.91495 21.68096 0 -1 0 + 3428 1143 2 0.4236 22.86918 35.09934 22.05209 0 -1 0 + 3429 1143 2 0.4236 21.54269 0.26581 21.37813 0 0 0 + 3430 1144 1 -0.8472 9.90943 22.86932 28.54732 0 -1 0 + 3431 1144 2 0.4236 9.65403 22.01433 28.99867 0 -1 0 + 3432 1144 2 0.4236 9.79947 23.62979 29.18729 0 -1 0 + 3433 1145 1 -0.8472 18.92073 1.83062 28.54452 0 0 0 + 3434 1145 2 0.4236 18.77899 1.62596 27.57604 0 0 0 + 3435 1145 2 0.4236 18.45840 2.68673 28.77539 0 0 0 + 3436 1146 1 -0.8472 16.83842 26.30374 13.63728 1 0 0 + 3437 1146 2 0.4236 16.31083 25.83360 14.34480 1 0 0 + 3438 1146 2 0.4236 16.31854 27.08861 13.30007 1 0 0 + 3439 1147 1 -0.8472 23.03513 33.97776 14.66518 -1 0 0 + 3440 1147 2 0.4236 22.86690 33.83634 15.64068 -1 0 0 + 3441 1147 2 0.4236 23.46646 34.86929 14.52711 -1 0 0 +3442 1148 1 -0.8472 26.37468 30.43214 35.25468 0 0 0 +3443 1148 2 0.4236 25.71419 30.35818 34.50752 0 0 0 +3444 1148 2 0.4236 26.42258 31.38172 0.11725 0 0 1 + 3445 1149 1 -0.8472 32.63917 13.78229 28.38877 -1 0 0 + 3446 1149 2 0.4236 32.58013 13.67594 29.38133 -1 0 0 + 3447 1149 2 0.4236 33.03290 12.95516 27.98776 -1 0 0 + 3448 1150 1 -0.8472 1.19545 11.88827 7.42402 0 0 0 + 3449 1150 2 0.4236 1.37418 11.89679 6.44019 0 0 0 + 3450 1150 2 0.4236 0.43544 11.26847 7.61937 0 0 0 + 3451 1151 1 -0.8472 35.00393 34.58219 14.67872 -1 0 0 + 3452 1151 2 0.4236 35.36912 34.75521 13.76404 -1 0 0 + 3453 1151 2 0.4236 35.20028 35.36471 15.26952 -1 0 0 + 3454 1152 1 -0.8472 9.91103 32.33524 28.62464 1 0 0 + 3455 1152 2 0.4236 10.87005 32.22620 28.36321 1 0 0 + 3456 1152 2 0.4236 9.82922 33.07729 29.28991 1 0 0 + 3457 1153 1 -0.8472 6.36330 25.82391 16.66394 1 -1 0 + 3458 1153 2 0.4236 6.42232 25.33261 15.79498 1 -1 0 + 3459 1153 2 0.4236 5.42022 25.80214 16.99576 1 -1 0 + 3460 1154 1 -0.8472 32.81749 28.00308 1.05981 0 0 0 + 3461 1154 2 0.4236 32.64819 27.98472 2.04517 0 0 0 + 3462 1154 2 0.4236 33.80293 27.98990 0.89062 0 0 0 + 3463 1155 1 -0.8472 2.09892 25.00453 9.72837 1 -1 0 + 3464 1155 2 0.4236 1.31233 25.44939 9.30021 1 -1 0 + 3465 1155 2 0.4236 2.79673 24.82249 9.03567 1 -1 0 + 3466 1156 1 -0.8472 3.70573 6.67896 30.88514 1 0 0 + 3467 1156 2 0.4236 3.24241 7.52493 30.62130 1 0 0 + 3468 1156 2 0.4236 3.21166 5.89633 30.50649 1 0 0 + 3469 1157 1 -0.8472 22.91846 23.35707 22.00831 -1 -1 0 + 3470 1157 2 0.4236 23.89673 23.33055 21.80272 -1 -1 0 + 3471 1157 2 0.4236 22.69047 24.23401 22.43137 -1 -1 0 + 3472 1158 1 -0.8472 4.20157 8.41273 26.79776 0 1 0 + 3473 1158 2 0.4236 4.90641 7.70392 26.82417 0 1 0 + 3474 1158 2 0.4236 3.43075 8.09587 26.24514 0 1 0 + 3475 1159 1 -0.8472 27.83257 24.37855 5.10280 0 -1 0 + 3476 1159 2 0.4236 28.60423 24.78261 4.61164 0 -1 0 + 3477 1159 2 0.4236 27.02864 24.96430 5.00022 0 -1 0 + 3478 1160 1 -0.8472 28.25521 34.70625 30.38048 0 0 0 + 3479 1160 2 0.4236 28.81969 33.89251 30.51886 0 0 0 + 3480 1160 2 0.4236 28.61204 35.23210 29.60840 0 0 0 + 3481 1161 1 -0.8472 30.16072 30.72861 15.64472 0 -1 0 + 3482 1161 2 0.4236 29.37415 31.30921 15.85493 0 -1 0 + 3483 1161 2 0.4236 30.99754 31.17198 15.96580 0 -1 0 + 3484 1162 1 -0.8472 14.30036 15.88614 9.70147 1 -1 0 + 3485 1162 2 0.4236 14.12425 15.15890 9.03808 1 -1 0 + 3486 1162 2 0.4236 15.27146 16.12414 9.68516 1 -1 0 + 3487 1163 1 -0.8472 32.30744 26.75439 30.27938 -2 -1 0 + 3488 1163 2 0.4236 31.79334 27.51898 29.89077 -2 -1 0 + 3489 1163 2 0.4236 33.17053 27.09038 30.65636 -2 -1 0 + 3490 1164 1 -0.8472 26.38370 26.69069 5.02854 0 0 0 + 3491 1164 2 0.4236 26.06841 26.98085 4.12499 0 0 0 + 3492 1164 2 0.4236 26.97407 27.39737 5.41835 0 0 0 + 3493 1165 1 -0.8472 20.86392 29.03407 31.88890 0 0 0 + 3494 1165 2 0.4236 21.65113 29.58265 32.17038 0 0 0 + 3495 1165 2 0.4236 20.02838 29.43015 32.26960 0 0 0 + 3496 1166 1 -0.8472 25.81223 29.44957 19.78123 -1 -1 0 + 3497 1166 2 0.4236 26.68614 29.32640 20.25133 -1 -1 0 + 3498 1166 2 0.4236 25.97911 29.70752 18.82962 -1 -1 0 + 3499 1167 1 -0.8472 9.81268 4.84917 23.38536 1 0 0 + 3500 1167 2 0.4236 10.63486 5.32563 23.07407 1 0 0 + 3501 1167 2 0.4236 9.19011 4.71979 22.61361 1 0 0 + 3502 1168 1 -0.8472 27.09504 22.27136 13.03967 1 -1 0 + 3503 1168 2 0.4236 27.07533 21.30935 13.31187 1 -1 0 + 3504 1168 2 0.4236 27.49938 22.35174 12.12861 1 -1 0 + 3505 1169 1 -0.8472 29.74324 0.48259 28.65165 0 1 0 + 3506 1169 2 0.4236 30.70461 0.21026 28.61248 0 1 0 + 3507 1169 2 0.4236 29.63711 1.23896 29.29707 0 1 0 + 3508 1170 1 -0.8472 23.29461 4.02585 30.19760 0 1 0 + 3509 1170 2 0.4236 23.97815 4.36071 29.54906 0 1 0 + 3510 1170 2 0.4236 22.50367 3.67469 29.69656 0 1 0 + 3511 1171 1 -0.8472 10.11973 1.62285 31.79003 0 0 0 + 3512 1171 2 0.4236 10.13259 2.60573 31.97369 0 0 0 + 3513 1171 2 0.4236 9.27443 1.38412 31.31210 0 0 0 + 3514 1172 1 -0.8472 23.98447 35.12348 7.95924 -1 -1 0 + 3515 1172 2 0.4236 23.32600 35.43441 7.27390 -1 -1 0 + 3516 1172 2 0.4236 24.62710 34.48366 7.53784 -1 -1 0 + 3517 1173 1 -0.8472 18.95755 12.21646 15.09912 0 1 0 + 3518 1173 2 0.4236 19.29259 11.86381 15.97282 0 1 0 + 3519 1173 2 0.4236 18.12141 11.73165 14.84259 0 1 0 + 3520 1174 1 -0.8472 20.64435 34.05811 11.96399 0 -1 0 + 3521 1174 2 0.4236 20.04418 34.42348 11.25246 0 -1 0 + 3522 1174 2 0.4236 21.38622 33.53873 11.53994 0 -1 0 + 3523 1175 1 -0.8472 1.02047 25.62099 23.79503 1 0 0 + 3524 1175 2 0.4236 1.42266 26.30733 24.40092 1 0 0 + 3525 1175 2 0.4236 0.53030 24.94115 24.34042 1 0 0 + 3526 1176 1 -0.8472 10.99120 19.54610 23.24843 1 0 0 + 3527 1176 2 0.4236 10.59016 20.42272 23.51428 1 0 0 + 3528 1176 2 0.4236 11.87233 19.70414 22.80279 1 0 0 + 3529 1177 1 -0.8472 27.88427 14.09439 8.71746 0 0 0 + 3530 1177 2 0.4236 28.03205 13.69075 9.62033 0 0 0 + 3531 1177 2 0.4236 28.05850 13.40695 8.01245 0 0 0 + 3532 1178 1 -0.8472 24.85221 12.37059 16.45030 0 0 0 + 3533 1178 2 0.4236 24.81993 11.76201 17.24311 0 0 0 + 3534 1178 2 0.4236 25.21529 13.26013 16.72749 0 0 0 +3535 1179 1 -0.8472 3.37410 29.51426 35.24851 1 0 0 +3536 1179 2 0.4236 3.67043 30.43590 0.05182 1 0 1 +3537 1179 2 0.4236 3.66223 28.86794 0.50788 1 0 1 + 3538 1180 1 -0.8472 0.35240 31.88385 22.28260 0 0 0 + 3539 1180 2 0.4236 0.75035 32.67685 22.74384 0 0 0 + 3540 1180 2 0.4236 0.12386 31.18441 22.95971 0 0 0 + 3541 1181 1 -0.8472 22.09365 22.96551 19.30426 -1 1 0 + 3542 1181 2 0.4236 22.15372 23.84283 18.82824 -1 1 0 + 3543 1181 2 0.4236 22.25424 23.10473 20.28136 -1 1 0 + 3544 1182 1 -0.8472 21.04348 34.07945 16.88194 0 -1 0 + 3545 1182 2 0.4236 20.64076 33.23881 17.24399 0 -1 0 + 3546 1182 2 0.4236 20.84507 34.83819 17.50233 0 -1 0 + 3547 1183 1 -0.8472 33.55073 31.23980 34.07326 -1 -1 0 + 3548 1183 2 0.4236 33.07430 31.97063 34.56197 -1 -1 0 + 3549 1183 2 0.4236 34.22507 31.63882 33.45194 -1 -1 0 + 3550 1184 1 -0.8472 18.03786 31.41193 20.73579 -1 0 0 + 3551 1184 2 0.4236 18.60060 30.70706 21.16754 -1 0 0 + 3552 1184 2 0.4236 17.84513 32.13633 21.39763 -1 0 0 + 3553 1185 1 -0.8472 21.11935 11.51623 20.22975 1 0 0 + 3554 1185 2 0.4236 21.20219 11.22096 21.18154 1 0 0 + 3555 1185 2 0.4236 21.20649 12.51119 20.18069 1 0 0 + 3556 1186 1 -0.8472 2.50377 22.45553 28.26102 0 0 0 + 3557 1186 2 0.4236 2.69363 23.41750 28.06489 0 0 0 + 3558 1186 2 0.4236 3.26130 22.06638 28.78510 0 0 0 + 3559 1187 1 -0.8472 1.89482 15.34996 4.09798 1 0 0 + 3560 1187 2 0.4236 1.58103 16.17698 4.56436 1 0 0 + 3561 1187 2 0.4236 1.10474 14.83228 3.76974 1 0 0 + 3562 1188 1 -0.8472 11.31662 29.94794 26.07063 1 0 0 + 3563 1188 2 0.4236 10.36281 30.18370 25.88446 1 0 0 + 3564 1188 2 0.4236 11.60685 30.38189 26.92349 1 0 0 + 3565 1189 1 -0.8472 10.46187 0.48151 28.47016 0 0 0 + 3566 1189 2 0.4236 11.30710 0.79408 28.90357 0 0 0 + 3567 1189 2 0.4236 9.81615 0.18539 29.17392 0 0 0 + 3568 1190 1 -0.8472 20.01390 21.11502 19.40053 0 0 0 + 3569 1190 2 0.4236 19.92265 20.43003 20.12333 0 0 0 + 3570 1190 2 0.4236 20.72410 21.77249 19.65214 0 0 0 + 3571 1191 1 -0.8472 17.49785 14.90201 31.45641 0 1 0 + 3572 1191 2 0.4236 17.50843 14.17303 30.77203 0 1 0 + 3573 1191 2 0.4236 16.85603 15.61498 31.17410 0 1 0 + 3574 1192 1 -0.8472 23.93241 26.17915 0.41284 -1 -1 0 + 3575 1192 2 0.4236 23.77665 25.45312 1.08259 -1 -1 0 + 3576 1192 2 0.4236 24.91551 26.30253 0.27758 -1 -1 0 + 3577 1193 1 -0.8472 15.48155 12.36384 9.66953 0 0 0 + 3578 1193 2 0.4236 14.53261 12.38191 9.35473 0 0 0 + 3579 1193 2 0.4236 16.05799 12.85827 9.01897 0 0 0 + 3580 1194 1 -0.8472 24.94921 4.70274 28.23684 -1 0 0 + 3581 1194 2 0.4236 25.43668 5.57148 28.32404 -1 0 0 + 3582 1194 2 0.4236 25.25172 4.23253 27.40777 -1 0 0 + 3583 1195 1 -0.8472 7.23520 24.31872 5.42817 0 0 0 + 3584 1195 2 0.4236 7.41578 24.72360 6.32449 0 0 0 + 3585 1195 2 0.4236 8.06583 24.35494 4.87261 0 0 0 + 3586 1196 1 -0.8472 28.58261 14.10891 13.19958 0 1 0 + 3587 1196 2 0.4236 27.78762 14.51354 12.74766 0 1 0 + 3588 1196 2 0.4236 28.68835 13.16021 12.90164 0 1 0 + 3589 1197 1 -0.8472 34.03337 15.87375 9.37663 -1 0 0 + 3590 1197 2 0.4236 34.58744 16.59549 9.79136 -1 0 0 + 3591 1197 2 0.4236 34.53727 15.01038 9.40222 -1 0 0 + 3592 1198 1 -0.8472 31.82828 18.36860 3.71224 -1 -1 0 + 3593 1198 2 0.4236 32.31781 17.82539 4.39432 -1 -1 0 + 3594 1198 2 0.4236 32.15945 18.13334 2.79850 -1 -1 0 + 3595 1199 1 -0.8472 23.59606 11.47870 18.85195 -1 0 0 + 3596 1199 2 0.4236 23.08158 11.20644 19.66507 -1 0 0 + 3597 1199 2 0.4236 23.26663 12.36661 18.53091 -1 0 0 + 3598 1200 1 -0.8472 15.34275 31.40288 27.99920 0 0 0 + 3599 1200 2 0.4236 16.12521 31.94953 27.70102 0 0 0 + 3600 1200 2 0.4236 15.34691 30.52552 27.51943 0 0 0 + 3601 1201 1 -0.8472 13.77256 0.54624 8.10259 0 1 0 + 3602 1201 2 0.4236 13.86377 35.20962 8.63274 0 0 0 + 3603 1201 2 0.4236 13.42179 1.27416 8.69169 0 1 0 + 3604 1202 1 -0.8472 29.74122 21.73816 15.80987 0 0 0 + 3605 1202 2 0.4236 28.91356 22.21483 16.10610 0 0 0 + 3606 1202 2 0.4236 29.58403 20.75090 15.83419 0 0 0 + 3607 1203 1 -0.8472 9.34349 5.53388 13.38007 1 0 0 + 3608 1203 2 0.4236 8.89988 5.78430 12.51959 1 0 0 + 3609 1203 2 0.4236 10.20861 6.02809 13.46529 1 0 0 + 3610 1204 1 -0.8472 33.68754 33.26661 18.34318 0 -1 0 + 3611 1204 2 0.4236 34.19102 32.52776 18.79102 0 -1 0 + 3612 1204 2 0.4236 33.14660 33.76135 19.02330 0 -1 0 + 3613 1205 1 -0.8472 8.49063 30.24622 2.44138 1 1 0 + 3614 1205 2 0.4236 8.21723 31.11119 2.02057 1 1 0 + 3615 1205 2 0.4236 7.88983 30.04774 3.21571 1 1 0 + 3616 1206 1 -0.8472 25.31936 5.99106 34.51443 1 0 0 + 3617 1206 2 0.4236 25.28141 5.19128 33.91538 1 0 0 + 3618 1206 2 0.4236 25.62081 5.71091 35.42578 1 0 0 + 3619 1207 1 -0.8472 26.73560 34.72965 14.29355 0 -1 0 + 3620 1207 2 0.4236 26.80598 35.39294 15.03854 0 -1 0 + 3621 1207 2 0.4236 25.87589 34.86993 13.80250 0 -1 0 + 3622 1208 1 -0.8472 21.83502 2.05714 14.53069 0 0 0 + 3623 1208 2 0.4236 21.33580 1.21622 14.73940 0 0 0 + 3624 1208 2 0.4236 21.32241 2.84341 14.87559 0 0 0 + 3625 1209 1 -0.8472 35.01867 0.23622 21.00335 0 0 0 + 3626 1209 2 0.4236 35.14727 0.04188 20.03089 0 0 0 + 3627 1209 2 0.4236 0.26234 35.33762 21.52623 1 -1 0 + 3628 1210 1 -0.8472 22.35916 9.63134 16.26225 0 0 0 + 3629 1210 2 0.4236 23.23198 9.14331 16.26402 0 0 0 + 3630 1210 2 0.4236 22.39939 10.38390 15.60499 0 0 0 + 3631 1211 1 -0.8472 11.20172 33.59575 17.81686 1 0 0 + 3632 1211 2 0.4236 10.69181 33.24651 17.03074 1 0 0 + 3633 1211 2 0.4236 11.65801 32.83712 18.28189 1 0 0 + 3634 1212 1 -0.8472 31.82608 18.51865 34.56225 0 -1 0 + 3635 1212 2 0.4236 31.28986 19.16910 35.10013 0 -1 0 + 3636 1212 2 0.4236 31.53461 18.55633 33.60645 0 -1 0 + 3637 1213 1 -0.8472 30.88253 31.96225 12.90618 0 -1 0 + 3638 1213 2 0.4236 31.38406 31.32256 12.32378 0 -1 0 + 3639 1213 2 0.4236 30.61549 31.50276 13.75323 0 -1 0 + 3640 1214 1 -0.8472 13.98169 21.39098 19.92118 0 0 0 + 3641 1214 2 0.4236 13.95328 20.40141 19.78017 0 0 0 + 3642 1214 2 0.4236 14.32905 21.83552 19.09554 0 0 0 + 3643 1215 1 -0.8472 4.16081 28.17702 1.92690 1 0 0 + 3644 1215 2 0.4236 5.01126 27.72808 2.20102 1 0 0 + 3645 1215 2 0.4236 3.39356 27.76728 2.42026 1 0 0 + 3646 1216 1 -0.8472 2.61206 16.00836 16.04875 1 0 0 + 3647 1216 2 0.4236 1.74345 15.51304 16.05970 1 0 0 + 3648 1216 2 0.4236 2.43904 16.98712 16.15867 1 0 0 + 3649 1217 1 -0.8472 13.59737 19.95485 22.53633 1 0 0 + 3650 1217 2 0.4236 14.21305 20.60053 22.08473 1 0 0 + 3651 1217 2 0.4236 13.73769 19.99671 23.52552 1 0 0 + 3652 1218 1 -0.8472 7.18982 10.71677 12.05084 1 0 0 + 3653 1218 2 0.4236 6.43794 10.12559 11.75911 1 0 0 + 3654 1218 2 0.4236 6.83195 11.62130 12.28263 1 0 0 + 3655 1219 1 -0.8472 15.07429 14.72118 6.25090 1 0 0 + 3656 1219 2 0.4236 14.14374 14.54507 6.57184 1 0 0 + 3657 1219 2 0.4236 15.07706 14.77984 5.25263 1 0 0 + 3658 1220 1 -0.8472 17.76755 16.08175 24.87563 0 0 0 + 3659 1220 2 0.4236 17.58977 15.62862 24.00214 0 0 0 + 3660 1220 2 0.4236 18.65997 16.53165 24.84217 0 0 0 + 3661 1221 1 -0.8472 17.65901 7.92406 7.74964 1 0 0 + 3662 1221 2 0.4236 18.59311 8.23144 7.93120 1 0 0 + 3663 1221 2 0.4236 17.05661 8.71812 7.66886 1 0 0 + 3664 1222 1 -0.8472 3.05199 13.20694 2.89879 0 0 0 + 3665 1222 2 0.4236 3.57780 13.70311 2.20789 0 0 0 + 3666 1222 2 0.4236 2.63537 13.85891 3.53230 0 0 0 + 3667 1223 1 -0.8472 5.26002 2.13592 16.40436 0 -1 0 + 3668 1223 2 0.4236 6.09694 1.78456 15.98477 0 -1 0 + 3669 1223 2 0.4236 4.80845 2.76476 15.77141 0 -1 0 + 3670 1224 1 -0.8472 12.61256 32.52531 4.71798 0 0 0 + 3671 1224 2 0.4236 11.90890 33.23199 4.79132 0 0 0 + 3672 1224 2 0.4236 12.25715 31.75468 4.18908 0 0 0 + 3673 1225 1 -0.8472 33.05943 19.81815 12.71554 0 1 0 + 3674 1225 2 0.4236 32.86723 19.31471 13.55788 0 1 0 + 3675 1225 2 0.4236 33.70078 19.29557 12.15385 0 1 0 + 3676 1226 1 -0.8472 8.67356 22.94470 34.61606 0 0 0 + 3677 1226 2 0.4236 8.08575 22.17542 34.86632 0 0 0 + 3678 1226 2 0.4236 8.33529 23.36200 33.77263 0 0 0 + 3679 1227 1 -0.8472 0.86965 12.09925 11.68222 1 0 0 + 3680 1227 2 0.4236 0.73608 12.60411 12.53501 1 0 0 + 3681 1227 2 0.4236 0.57426 11.15194 11.80597 1 0 0 + 3682 1228 1 -0.8472 27.29430 18.02558 25.87602 0 0 0 + 3683 1228 2 0.4236 26.33030 18.09609 25.61981 0 0 0 + 3684 1228 2 0.4236 27.85129 18.53772 25.22227 0 0 0 + 3685 1229 1 -0.8472 20.96856 4.94099 22.32248 0 0 0 + 3686 1229 2 0.4236 21.44516 4.83664 23.19537 0 0 0 + 3687 1229 2 0.4236 20.65070 5.88429 22.22695 0 0 0 + 3688 1230 1 -0.8472 22.27256 31.32924 24.77039 0 0 0 + 3689 1230 2 0.4236 22.20439 30.66428 24.02667 0 0 0 + 3690 1230 2 0.4236 23.23365 31.45980 25.01366 0 0 0 + 3691 1231 1 -0.8472 22.66253 15.15411 23.54686 0 0 0 + 3692 1231 2 0.4236 23.07572 15.58551 22.74490 0 0 0 + 3693 1231 2 0.4236 23.21000 15.36328 24.35708 0 0 0 + 3694 1232 1 -0.8472 11.19746 9.41692 24.15577 0 1 0 + 3695 1232 2 0.4236 10.84190 9.88305 23.34570 0 1 0 + 3696 1232 2 0.4236 11.55668 10.09332 24.79875 0 1 0 + 3697 1233 1 -0.8472 11.99905 23.28826 34.33738 0 0 0 + 3698 1233 2 0.4236 12.52234 23.62944 35.11821 0 0 0 + 3699 1233 2 0.4236 12.62467 22.89914 33.66126 0 0 0 + 3700 1234 1 -0.8472 23.22442 18.59940 28.07421 0 0 0 + 3701 1234 2 0.4236 23.49579 19.56080 28.11912 0 0 0 + 3702 1234 2 0.4236 24.00062 18.02181 28.32688 0 0 0 + 3703 1235 1 -0.8472 30.92539 3.09568 32.51957 0 1 0 + 3704 1235 2 0.4236 31.84312 3.44347 32.71134 0 1 0 + 3705 1235 2 0.4236 30.25006 3.80152 32.73328 0 1 0 + 3706 1236 1 -0.8472 1.33811 18.35817 13.38681 1 -1 0 + 3707 1236 2 0.4236 1.22509 18.30768 14.37908 1 -1 0 + 3708 1236 2 0.4236 0.99542 17.51674 12.96910 1 -1 0 + 3709 1237 1 -0.8472 9.40393 27.68480 1.86054 0 0 0 + 3710 1237 2 0.4236 9.19178 28.63569 2.08582 0 0 0 + 3711 1237 2 0.4236 8.65357 27.09582 2.16051 0 0 0 + 3712 1238 1 -0.8472 22.59500 25.15458 24.16226 0 0 0 + 3713 1238 2 0.4236 22.11992 24.61236 24.85525 0 0 0 + 3714 1238 2 0.4236 22.72289 26.08747 24.49890 0 0 0 + 3715 1239 1 -0.8472 34.94268 12.24206 24.23039 0 1 0 + 3716 1239 2 0.4236 34.99080 11.48130 24.87762 0 1 0 + 3717 1239 2 0.4236 34.50585 11.93365 23.38540 0 1 0 + 3718 1240 1 -0.8472 13.66060 32.55204 14.55827 0 0 0 + 3719 1240 2 0.4236 13.52586 31.65916 14.98789 0 0 0 + 3720 1240 2 0.4236 14.30011 33.09209 15.10536 0 0 0 + 3721 1241 1 -0.8472 25.84720 7.64647 30.19310 0 0 0 + 3722 1241 2 0.4236 26.49207 7.44799 29.45506 0 0 0 + 3723 1241 2 0.4236 24.99842 8.00675 29.80616 0 0 0 + 3724 1242 1 -0.8472 31.34937 3.95974 8.27135 -1 0 0 + 3725 1242 2 0.4236 31.73419 4.48651 7.51346 -1 0 0 + 3726 1242 2 0.4236 30.58520 3.40699 7.93900 -1 0 0 + 3727 1243 1 -0.8472 32.08346 7.74273 18.98396 0 1 0 + 3728 1243 2 0.4236 31.48557 6.94437 18.91249 0 1 0 + 3729 1243 2 0.4236 32.68945 7.63585 19.77216 0 1 0 + 3730 1244 1 -0.8472 33.86230 32.04057 14.82923 -1 -1 0 + 3731 1244 2 0.4236 34.19829 32.96445 14.64614 -1 -1 0 + 3732 1244 2 0.4236 33.23522 32.06216 15.60786 -1 -1 0 + 3733 1245 1 -0.8472 15.80498 9.53892 29.22621 0 0 0 + 3734 1245 2 0.4236 16.28878 9.52316 28.35120 0 0 0 + 3735 1245 2 0.4236 14.88959 9.15417 29.10789 0 0 0 + 3736 1246 1 -0.8472 14.75644 31.78471 33.95051 1 -1 0 + 3737 1246 2 0.4236 14.87879 31.63463 32.96947 1 -1 0 + 3738 1246 2 0.4236 14.49184 30.92567 34.38867 1 -1 0 + 3739 1247 1 -0.8472 19.54738 27.88289 17.04095 0 -1 0 + 3740 1247 2 0.4236 19.89389 28.26231 17.89881 0 -1 0 + 3741 1247 2 0.4236 20.31297 27.55072 16.49007 0 -1 0 + 3742 1248 1 -0.8472 29.41360 2.42179 30.27314 0 0 0 + 3743 1248 2 0.4236 29.56706 3.38868 30.06936 0 0 0 + 3744 1248 2 0.4236 29.72219 2.22420 31.20356 0 0 0 + 3745 1249 1 -0.8472 25.21448 16.45875 28.33471 0 1 0 + 3746 1249 2 0.4236 26.21071 16.47683 28.25035 0 1 0 + 3747 1249 2 0.4236 24.95734 15.91164 29.13128 0 1 0 + 3748 1250 1 -0.8472 4.35720 14.20875 17.03220 1 0 0 + 3749 1250 2 0.4236 5.27798 14.24944 16.64433 1 0 0 + 3750 1250 2 0.4236 3.76871 14.86764 16.56366 1 0 0 + 3751 1251 1 -0.8472 29.13693 0.84916 24.77184 -1 1 0 + 3752 1251 2 0.4236 29.74416 1.35202 25.38696 -1 1 0 + 3753 1251 2 0.4236 29.68242 0.27811 24.15840 -1 1 0 + 3754 1252 1 -0.8472 27.62042 10.22750 23.30181 -1 0 0 + 3755 1252 2 0.4236 28.40039 9.89629 22.77089 -1 0 0 + 3756 1252 2 0.4236 27.57125 11.22405 23.23563 -1 0 0 + 3757 1253 1 -0.8472 13.10422 23.78581 22.52010 0 0 0 + 3758 1253 2 0.4236 13.33878 23.51699 23.45427 0 0 0 + 3759 1253 2 0.4236 13.86548 23.56773 21.90941 0 0 0 + 3760 1254 1 -0.8472 33.19233 2.34733 4.32601 0 1 0 + 3761 1254 2 0.4236 33.68352 3.06983 3.83950 0 1 0 + 3762 1254 2 0.4236 33.54114 1.45534 4.03853 0 1 0 + 3763 1255 1 -0.8472 25.94719 28.29984 2.89677 0 1 0 + 3764 1255 2 0.4236 25.07124 28.70411 2.63365 0 1 0 + 3765 1255 2 0.4236 26.66612 28.63786 2.28947 0 1 0 + 3766 1256 1 -0.8472 9.90709 2.77986 7.29641 -1 0 0 + 3767 1256 2 0.4236 9.28065 2.13448 7.73344 -1 0 0 + 3768 1256 2 0.4236 10.84882 2.54552 7.53760 -1 0 0 + 3769 1257 1 -0.8472 16.30796 34.86244 16.12167 1 0 0 + 3770 1257 2 0.4236 16.62176 35.28557 15.27171 1 0 0 + 3771 1257 2 0.4236 15.30895 34.81894 16.12330 1 0 0 + 3772 1258 1 -0.8472 7.61771 9.73779 26.78220 1 1 0 + 3773 1258 2 0.4236 7.47927 10.72185 26.89368 1 1 0 + 3774 1258 2 0.4236 8.00824 9.55632 25.87971 1 1 0 + 3775 1259 1 -0.8472 3.53455 14.19611 8.20227 0 1 0 + 3776 1259 2 0.4236 3.27523 14.50182 9.11836 0 1 0 + 3777 1259 2 0.4236 3.41713 13.20527 8.13587 0 1 0 + 3778 1260 1 -0.8472 17.36729 21.51876 20.34297 0 0 0 + 3779 1260 2 0.4236 18.19977 21.66761 19.80934 0 0 0 + 3780 1260 2 0.4236 16.76176 22.30825 20.24311 0 0 0 + 3781 1261 1 -0.8472 24.85956 10.53353 5.64142 -1 1 0 + 3782 1261 2 0.4236 24.62252 9.97391 6.43551 -1 1 0 + 3783 1261 2 0.4236 24.04025 10.69600 5.09160 -1 1 0 + 3784 1262 1 -0.8472 17.54124 19.88458 23.16534 0 -1 0 + 3785 1262 2 0.4236 18.30863 20.31961 23.63629 0 -1 0 + 3786 1262 2 0.4236 16.71124 20.42236 23.31326 0 -1 0 + 3787 1263 1 -0.8472 9.13809 2.62440 27.30937 0 1 0 + 3788 1263 2 0.4236 9.78967 1.98344 27.71510 0 1 0 + 3789 1263 2 0.4236 8.56054 2.14095 26.65161 0 1 0 + 3790 1264 1 -0.8472 14.26627 24.14780 0.96270 0 0 0 + 3791 1264 2 0.4236 13.73311 24.38448 1.77489 0 0 0 + 3792 1264 2 0.4236 14.80771 24.93859 0.67731 0 0 0 + 3793 1265 1 -0.8472 34.12753 0.60519 34.36017 0 0 0 + 3794 1265 2 0.4236 33.98288 1.43738 34.89542 0 0 0 + 3795 1265 2 0.4236 33.66886 0.69362 33.47601 0 0 0 + 3796 1266 1 -0.8472 12.28580 18.19129 0.67816 0 0 0 + 3797 1266 2 0.4236 12.91440 18.64946 0.04978 0 0 0 + 3798 1266 2 0.4236 11.43961 18.71959 0.74726 0 0 0 + 3799 1267 1 -0.8472 12.16281 6.42562 30.34156 0 0 0 + 3800 1267 2 0.4236 12.59339 5.68077 30.85118 0 0 0 + 3801 1267 2 0.4236 12.60106 6.51449 29.44712 0 0 0 + 3802 1268 1 -0.8472 22.54245 21.32955 17.15786 0 0 0 + 3803 1268 2 0.4236 22.26706 21.46728 16.20646 0 0 0 + 3804 1268 2 0.4236 22.14968 22.05228 17.72650 0 0 0 + 3805 1269 1 -0.8472 16.46941 17.24841 22.22819 0 -1 0 + 3806 1269 2 0.4236 16.69927 18.02693 22.81218 0 -1 0 + 3807 1269 2 0.4236 15.47597 17.13973 22.19338 0 -1 0 + 3808 1270 1 -0.8472 15.89729 2.63284 27.11334 0 1 0 + 3809 1270 2 0.4236 15.95407 1.69174 26.78010 0 1 0 + 3810 1270 2 0.4236 16.01219 3.26519 26.34726 0 1 0 + 3811 1271 1 -0.8472 17.56428 29.20553 10.53540 0 -1 0 + 3812 1271 2 0.4236 16.89516 29.75865 11.03170 0 -1 0 + 3813 1271 2 0.4236 18.48421 29.42333 10.86137 0 -1 0 + 3814 1272 1 -0.8472 14.13472 0.57957 23.40535 0 0 0 + 3815 1272 2 0.4236 15.12428 0.71936 23.37111 0 0 0 + 3816 1272 2 0.4236 13.74827 1.11485 24.15637 0 0 0 + 3817 1273 1 -0.8472 14.94299 16.74046 14.49758 0 0 0 + 3818 1273 2 0.4236 14.51357 17.57578 14.84078 0 0 0 + 3819 1273 2 0.4236 15.77043 16.54983 15.02574 0 0 0 + 3820 1274 1 -0.8472 18.60554 10.10070 33.85186 0 0 0 + 3821 1274 2 0.4236 18.03410 9.28115 33.89305 0 0 0 + 3822 1274 2 0.4236 19.47154 9.88099 33.40267 0 0 0 + 3823 1275 1 -0.8472 21.91949 17.78066 6.96782 -1 0 0 + 3824 1275 2 0.4236 22.76969 18.25566 7.19482 -1 0 0 + 3825 1275 2 0.4236 21.34876 18.37255 6.39871 -1 0 0 + 3826 1276 1 -0.8472 9.21876 31.25393 9.94935 0 0 0 + 3827 1276 2 0.4236 9.97547 30.68146 10.26497 0 0 0 + 3828 1276 2 0.4236 9.23222 31.30335 8.95070 0 0 0 + 3829 1277 1 -0.8472 13.41060 7.95548 25.27966 1 1 0 + 3830 1277 2 0.4236 12.62868 8.49127 24.96117 1 1 0 + 3831 1277 2 0.4236 14.22917 8.22717 24.77362 1 1 0 + 3832 1278 1 -0.8472 12.07574 15.03988 21.04418 0 0 0 + 3833 1278 2 0.4236 11.95903 14.17033 21.52403 0 0 0 + 3834 1278 2 0.4236 12.31435 15.75417 21.70205 0 0 0 + 3835 1279 1 -0.8472 9.88529 18.41150 5.93407 0 0 0 + 3836 1279 2 0.4236 9.71670 19.36165 6.19620 0 0 0 + 3837 1279 2 0.4236 9.38429 17.80224 6.54865 0 0 0 + 3838 1280 1 -0.8472 17.69317 1.78597 18.64760 0 0 0 + 3839 1280 2 0.4236 17.13270 0.95939 18.69861 0 0 0 + 3840 1280 2 0.4236 17.23908 2.52755 19.14138 0 0 0 + 3841 1281 1 -0.8472 31.78581 31.55779 20.70371 0 -1 0 + 3842 1281 2 0.4236 30.92713 31.48884 20.19593 0 -1 0 + 3843 1281 2 0.4236 32.50326 31.06227 20.21415 0 -1 0 + 3844 1282 1 -0.8472 3.88805 14.07173 13.16821 0 0 0 + 3845 1282 2 0.4236 4.86565 13.88586 13.06952 0 0 0 + 3846 1282 2 0.4236 3.59566 13.83494 14.09469 0 0 0 + 3847 1283 1 -0.8472 17.40159 4.33028 2.50015 -1 0 0 + 3848 1283 2 0.4236 16.66174 4.99386 2.38964 -1 0 0 + 3849 1283 2 0.4236 17.65489 4.27123 3.46570 -1 0 0 + 3850 1284 1 -0.8472 32.95401 16.04344 16.03578 0 0 0 + 3851 1284 2 0.4236 32.18345 15.56378 16.45540 0 0 0 + 3852 1284 2 0.4236 33.06983 15.72825 15.09387 0 0 0 + 3853 1285 1 -0.8472 26.81393 18.75179 3.07330 -1 -1 0 + 3854 1285 2 0.4236 26.78553 17.98088 2.43704 -1 -1 0 + 3855 1285 2 0.4236 27.76469 19.01741 3.23286 -1 -1 0 + 3856 1286 1 -0.8472 31.29948 29.38498 30.48980 -1 -1 0 + 3857 1286 2 0.4236 30.36542 29.67830 30.28626 -1 -1 0 + 3858 1286 2 0.4236 31.94439 30.07640 30.16425 -1 -1 0 + 3859 1287 1 -0.8472 7.70452 13.58317 24.43091 0 1 0 + 3860 1287 2 0.4236 8.11380 13.58285 23.51854 0 1 0 + 3861 1287 2 0.4236 8.38876 13.86587 25.10306 0 1 0 + 3862 1288 1 -0.8472 6.40123 28.60910 19.73785 0 0 0 + 3863 1288 2 0.4236 6.93446 28.83300 18.92206 0 0 0 + 3864 1288 2 0.4236 6.51282 27.63896 19.95312 0 0 0 + 3865 1289 1 -0.8472 3.04379 10.50807 22.19377 0 1 0 + 3866 1289 2 0.4236 2.93294 9.89004 21.41549 0 1 0 + 3867 1289 2 0.4236 2.80507 11.43883 21.91689 0 1 0 + 3868 1290 1 -0.8472 14.37599 26.93440 28.59915 0 0 0 + 3869 1290 2 0.4236 13.74025 27.42354 28.00208 0 0 0 + 3870 1290 2 0.4236 15.27401 27.37252 28.56009 0 0 0 + 3871 1291 1 -0.8472 34.12274 21.85242 14.14839 0 -1 0 + 3872 1291 2 0.4236 33.61787 21.07635 13.77045 0 -1 0 + 3873 1291 2 0.4236 35.09050 21.76777 13.91132 0 -1 0 + 3874 1292 1 -0.8472 32.64567 5.25739 5.99940 -1 1 0 + 3875 1292 2 0.4236 33.11844 4.88493 5.20085 -1 1 0 + 3876 1292 2 0.4236 33.08134 6.11426 6.27500 -1 1 0 + 3877 1293 1 -0.8472 23.64858 3.21683 21.36377 0 0 0 + 3878 1293 2 0.4236 22.71597 3.53283 21.18962 0 0 0 + 3879 1293 2 0.4236 23.97889 2.70125 20.57320 0 0 0 + 3880 1294 1 -0.8472 32.64212 16.11772 31.97213 -1 0 0 + 3881 1294 2 0.4236 31.81837 16.41178 32.45680 -1 0 0 + 3882 1294 2 0.4236 33.26061 16.89594 31.86352 -1 0 0 + 3883 1295 1 -0.8472 6.29229 17.36253 3.95587 0 0 0 + 3884 1295 2 0.4236 6.77184 17.44356 4.82957 0 0 0 + 3885 1295 2 0.4236 6.12602 16.39715 3.75517 0 0 0 + 3886 1296 1 -0.8472 31.92486 23.30850 22.00260 0 0 0 + 3887 1296 2 0.4236 31.25077 23.84248 21.49234 0 0 0 + 3888 1296 2 0.4236 32.82945 23.72002 21.89141 0 0 0 + 3889 1297 1 -0.8472 27.33250 28.07000 32.92133 0 0 0 + 3890 1297 2 0.4236 27.07320 27.40863 33.62506 0 0 0 + 3891 1297 2 0.4236 28.29881 27.95187 32.69273 0 0 0 + 3892 1298 1 -0.8472 2.64561 33.53042 18.92001 1 0 0 + 3893 1298 2 0.4236 1.69345 33.79700 18.77078 1 0 0 + 3894 1298 2 0.4236 3.23109 34.33901 18.86227 1 0 0 + 3895 1299 1 -0.8472 20.25726 25.40408 31.93927 -1 0 0 + 3896 1299 2 0.4236 19.93756 25.82497 32.78816 -1 0 0 + 3897 1299 2 0.4236 20.87305 26.03411 31.46615 -1 0 0 + 3898 1300 1 -0.8472 3.04174 13.82425 29.63913 1 0 0 + 3899 1300 2 0.4236 2.19951 13.71557 29.11115 1 0 0 + 3900 1300 2 0.4236 2.90738 13.45836 30.56002 1 0 0 + 3901 1301 1 -0.8472 27.74001 30.99889 32.58650 0 0 0 + 3902 1301 2 0.4236 28.31372 31.33194 33.33475 0 0 0 + 3903 1301 2 0.4236 27.55154 30.02577 32.71868 0 0 0 + 3904 1302 1 -0.8472 31.54537 3.17343 11.48007 -1 0 0 + 3905 1302 2 0.4236 31.00105 2.58309 12.07601 -1 0 0 + 3906 1302 2 0.4236 31.24008 3.06453 10.53409 -1 0 0 +3907 1303 1 -0.8472 12.78374 29.36396 35.25028 0 -1 0 +3908 1303 2 0.4236 12.48790 28.41019 35.19821 0 -1 0 +3909 1303 2 0.4236 13.10919 29.56198 0.72764 0 -1 1 +3910 1304 1 -0.8472 23.77746 14.97626 35.08085 0 0 0 +3911 1304 2 0.4236 23.65699 15.06343 0.62252 0 0 1 +3912 1304 2 0.4236 23.98557 14.02508 34.85299 0 0 0 + 3913 1305 1 -0.8472 9.02720 31.44392 7.07983 1 0 0 + 3914 1305 2 0.4236 8.96368 32.37089 6.71012 1 0 0 + 3915 1305 2 0.4236 8.13255 31.00077 7.02393 1 0 0 + 3916 1306 1 -0.8472 31.36549 11.59481 10.42094 0 0 0 + 3917 1306 2 0.4236 31.65377 12.22675 11.14031 0 0 0 + 3918 1306 2 0.4236 32.09618 10.93542 10.24406 0 0 0 + 3919 1307 1 -0.8472 29.50183 27.32803 3.75836 0 -1 0 + 3920 1307 2 0.4236 29.53439 26.49065 3.21279 0 -1 0 + 3921 1307 2 0.4236 29.08094 28.05932 3.22170 0 -1 0 + 3922 1308 1 -0.8472 33.81990 14.88458 25.76164 0 0 0 + 3923 1308 2 0.4236 34.72524 14.95250 25.34251 0 0 0 + 3924 1308 2 0.4236 33.89155 14.39907 26.63289 0 0 0 + 3925 1309 1 -0.8472 7.98850 21.50658 20.41280 0 0 0 + 3926 1309 2 0.4236 8.56246 21.56801 19.59624 0 0 0 + 3927 1309 2 0.4236 8.56179 21.31560 21.20954 0 0 0 + 3928 1310 1 -0.8472 32.46665 14.83711 6.59392 0 0 0 + 3929 1310 2 0.4236 32.73572 14.17385 7.29220 0 0 0 + 3930 1310 2 0.4236 31.57544 15.22519 6.82865 0 0 0 + 3931 1311 1 -0.8472 19.27221 16.62720 16.91022 0 -1 0 + 3932 1311 2 0.4236 18.39566 16.51808 16.44147 0 -1 0 + 3933 1311 2 0.4236 19.11058 16.84716 17.87223 0 -1 0 + 3934 1312 1 -0.8472 10.75317 25.30896 14.90281 1 -1 0 + 3935 1312 2 0.4236 10.02110 24.75819 15.30365 1 -1 0 + 3936 1312 2 0.4236 10.68350 26.24920 15.23605 1 -1 0 + 3937 1313 1 -0.8472 10.59250 3.58120 20.02809 0 0 0 + 3938 1313 2 0.4236 10.10365 3.59853 20.90027 0 0 0 + 3939 1313 2 0.4236 11.56509 3.41878 20.19430 0 0 0 + 3940 1314 1 -0.8472 23.15667 28.53308 20.17853 0 -1 0 + 3941 1314 2 0.4236 23.99072 29.08474 20.18023 0 -1 0 + 3942 1314 2 0.4236 23.01997 28.13839 19.26998 0 -1 0 + 3943 1315 1 -0.8472 6.61160 15.88142 29.17336 1 0 0 + 3944 1315 2 0.4236 5.96957 16.51911 28.74778 1 0 0 + 3945 1315 2 0.4236 6.23172 14.95687 29.14422 1 0 0 + 3946 1316 1 -0.8472 33.84117 10.87684 5.32296 -1 0 0 + 3947 1316 2 0.4236 34.05499 11.83082 5.11283 -1 0 0 + 3948 1316 2 0.4236 34.38758 10.27642 4.73908 -1 0 0 + 3949 1317 1 -0.8472 5.10908 7.98804 15.75723 0 1 0 + 3950 1317 2 0.4236 5.83685 8.64150 15.96527 0 1 0 + 3951 1317 2 0.4236 4.52699 8.35699 15.03262 0 1 0 + 3952 1318 1 -0.8472 21.73216 9.98499 28.65661 0 0 0 + 3953 1318 2 0.4236 22.11985 9.08867 28.44162 0 0 0 + 3954 1318 2 0.4236 22.22861 10.38703 29.42592 0 0 0 + 3955 1319 1 -0.8472 15.11924 17.54695 6.96555 1 0 0 + 3956 1319 2 0.4236 15.24394 16.65784 6.52526 1 0 0 + 3957 1319 2 0.4236 14.14970 17.79152 6.95414 1 0 0 + 3958 1320 1 -0.8472 9.36217 21.72925 23.47370 0 0 0 + 3959 1320 2 0.4236 9.44785 22.47012 22.80757 0 0 0 + 3960 1320 2 0.4236 9.25579 22.11545 24.38994 0 0 0 + 3961 1321 1 -0.8472 9.43645 15.00189 14.94078 1 0 0 + 3962 1321 2 0.4236 10.25430 14.52785 15.26696 1 0 0 + 3963 1321 2 0.4236 9.65662 15.51329 14.11018 1 0 0 + 3964 1322 1 -0.8472 8.59033 10.76868 9.86814 0 0 0 + 3965 1322 2 0.4236 7.82215 10.93745 9.25058 0 0 0 + 3966 1322 2 0.4236 8.25906 10.73652 10.81111 0 0 0 + 3967 1323 1 -0.8472 11.61768 13.08370 14.89370 0 0 0 + 3968 1323 2 0.4236 11.67363 13.04553 13.89601 0 0 0 + 3969 1323 2 0.4236 12.47416 13.44533 15.26196 0 0 0 + 3970 1324 1 -0.8472 16.11690 13.08391 16.65249 0 0 0 + 3971 1324 2 0.4236 16.28495 12.57915 15.80576 0 0 0 + 3972 1324 2 0.4236 15.93060 12.43894 17.39362 0 0 0 + 3973 1325 1 -0.8472 15.86534 31.96218 1.66332 0 0 0 + 3974 1325 2 0.4236 15.01496 31.44332 1.57625 0 0 0 + 3975 1325 2 0.4236 15.96614 32.57132 0.87672 0 0 0 + 3976 1326 1 -0.8472 24.92119 12.54887 34.11767 0 0 0 + 3977 1326 2 0.4236 25.52292 11.78246 34.34234 0 0 0 + 3978 1326 2 0.4236 25.18156 12.92786 33.22969 0 0 0 + 3979 1327 1 -0.8472 11.53054 35.48053 2.50601 0 -1 0 + 3980 1327 2 0.4236 11.26027 0.86465 2.87208 0 0 0 + 3981 1327 2 0.4236 11.70360 0.05592 1.52453 0 0 0 + 3982 1328 1 -0.8472 30.37646 11.06481 19.98364 -1 1 0 + 3983 1328 2 0.4236 31.07807 10.97906 19.27629 -1 1 0 + 3984 1328 2 0.4236 29.60175 11.58441 19.62339 -1 1 0 + 3985 1329 1 -0.8472 5.33838 10.35688 23.82633 0 1 0 + 3986 1329 2 0.4236 4.49407 10.43102 23.29565 0 1 0 + 3987 1329 2 0.4236 5.42533 11.15151 24.42705 0 1 0 + 3988 1330 1 -0.8472 17.21081 5.64167 9.21177 -1 1 0 + 3989 1330 2 0.4236 17.44537 5.70658 10.18167 -1 1 0 + 3990 1330 2 0.4236 17.43319 6.50384 8.75666 -1 1 0 + 3991 1331 1 -0.8472 8.48413 30.64049 22.56444 0 -1 0 + 3992 1331 2 0.4236 9.06406 29.97286 22.09767 0 -1 0 + 3993 1331 2 0.4236 8.37563 31.44943 21.98669 0 -1 0 + 3994 1332 1 -0.8472 1.13258 13.38927 14.12372 1 1 0 + 3995 1332 2 0.4236 1.00638 14.05274 14.86113 1 1 0 + 3996 1332 2 0.4236 1.57468 12.56864 14.48574 1 1 0 + 3997 1333 1 -0.8472 3.61348 27.78485 20.21784 1 -1 0 + 3998 1333 2 0.4236 3.54423 26.84230 19.89108 1 -1 0 + 3999 1333 2 0.4236 4.49674 28.16750 19.94691 1 -1 0 + 4000 1334 1 -0.8472 34.87963 19.16306 19.16038 -1 1 0 + 4001 1334 2 0.4236 34.72652 18.19533 18.96042 -1 1 0 + 4002 1334 2 0.4236 34.73462 19.70187 18.33056 -1 1 0 + 4003 1335 1 -0.8472 33.17028 25.43707 7.09681 -1 0 0 + 4004 1335 2 0.4236 32.75169 26.26784 6.73000 -1 0 0 + 4005 1335 2 0.4236 32.53585 24.67095 6.99433 -1 0 0 + 4006 1336 1 -0.8472 10.03853 8.39253 8.37546 0 1 0 + 4007 1336 2 0.4236 9.83643 9.29993 8.74386 0 1 0 + 4008 1336 2 0.4236 9.26076 7.78562 8.53887 0 1 0 + 4009 1337 1 -0.8472 8.03214 12.07162 19.93674 1 0 0 + 4010 1337 2 0.4236 7.62829 11.35734 20.50829 1 0 0 + 4011 1337 2 0.4236 8.51882 11.65059 19.17133 1 0 0 + 4012 1338 1 -0.8472 1.41857 21.54262 10.36366 0 0 0 + 4013 1338 2 0.4236 2.20565 20.94132 10.22637 0 0 0 + 4014 1338 2 0.4236 1.66063 22.47502 10.09525 0 0 0 + 4015 1339 1 -0.8472 34.50038 20.54422 16.81189 -1 0 0 + 4016 1339 2 0.4236 35.23362 20.99512 16.30298 -1 0 0 + 4017 1339 2 0.4236 33.70511 21.14879 16.85659 -1 0 0 + 4018 1340 1 -0.8472 9.44547 24.80491 4.00309 1 0 0 + 4019 1340 2 0.4236 10.06637 25.56510 3.81178 1 0 0 + 4020 1340 2 0.4236 9.34559 24.24167 3.18290 1 0 0 + 4021 1341 1 -0.8472 4.73779 28.13530 4.76587 1 0 0 + 4022 1341 2 0.4236 4.64675 27.24203 5.20595 1 0 0 + 4023 1341 2 0.4236 5.38013 28.06692 4.00255 1 0 0 + 4024 1342 1 -0.8472 28.79309 11.26361 32.56261 -1 0 0 + 4025 1342 2 0.4236 28.66814 12.07106 31.98609 -1 0 0 + 4026 1342 2 0.4236 28.00130 11.15780 33.16414 -1 0 0 + 4027 1343 1 -0.8472 29.00688 2.02184 18.32003 0 1 0 + 4028 1343 2 0.4236 29.92131 1.72090 18.59060 0 1 0 + 4029 1343 2 0.4236 28.91878 1.96743 17.32545 0 1 0 + 4030 1344 1 -0.8472 31.38630 11.70534 6.36117 0 1 0 + 4031 1344 2 0.4236 31.72496 12.31115 7.08107 0 1 0 + 4032 1344 2 0.4236 32.14258 11.15776 6.00323 0 1 0 + 4033 1345 1 -0.8472 14.04432 34.74239 31.23302 -1 -1 0 + 4034 1345 2 0.4236 14.42009 35.30378 30.49572 -1 -1 0 + 4035 1345 2 0.4236 14.59804 33.91641 31.33837 -1 -1 0 + 4036 1346 1 -0.8472 11.74166 19.24522 20.21415 0 0 0 + 4037 1346 2 0.4236 12.20073 18.37581 20.39670 0 0 0 + 4038 1346 2 0.4236 11.64250 19.75559 21.06832 0 0 0 + 4039 1347 1 -0.8472 35.52179 6.24067 32.03171 0 0 0 + 4040 1347 2 0.4236 0.50438 6.83174 32.67320 1 0 0 + 4041 1347 2 0.4236 0.29816 5.29165 32.17109 1 0 0 + 4042 1348 1 -0.8472 15.57283 9.26811 33.08255 0 0 0 + 4043 1348 2 0.4236 15.53601 10.10503 33.62861 0 0 0 + 4044 1348 2 0.4236 15.19689 8.50778 33.61220 0 0 0 + 4045 1349 1 -0.8472 17.80359 31.80829 5.14239 1 -1 0 + 4046 1349 2 0.4236 16.85934 32.13019 5.07387 1 -1 0 + 4047 1349 2 0.4236 17.97257 31.11858 4.43838 1 -1 0 + 4048 1350 1 -0.8472 30.38406 19.12162 16.07527 0 0 0 + 4049 1350 2 0.4236 31.29341 19.04947 15.66557 0 0 0 + 4050 1350 2 0.4236 30.08197 18.21753 16.37749 0 0 0 + 4051 1351 1 -0.8472 11.10369 6.99773 6.45008 0 1 0 + 4052 1351 2 0.4236 12.01908 7.37712 6.31577 0 1 0 + 4053 1351 2 0.4236 10.59371 7.57531 7.08745 0 1 0 + 4054 1352 1 -0.8472 12.27206 16.59023 16.74848 1 -1 0 + 4055 1352 2 0.4236 11.81988 17.30736 17.27877 1 -1 0 + 4056 1352 2 0.4236 12.89672 16.08136 17.34073 1 -1 0 + 4057 1353 1 -0.8472 20.33587 18.83981 32.69894 0 1 0 + 4058 1353 2 0.4236 20.70478 19.62858 33.19059 0 1 0 + 4059 1353 2 0.4236 20.73393 18.00044 33.06900 0 1 0 + 4060 1354 1 -0.8472 2.03095 27.60334 3.55915 1 0 0 + 4061 1354 2 0.4236 1.49012 28.34142 3.15579 1 0 0 + 4062 1354 2 0.4236 2.33241 27.87255 4.47382 1 0 0 + 4063 1355 1 -0.8472 18.88135 18.11410 0.87201 0 1 0 + 4064 1355 2 0.4236 19.39275 17.48421 0.28749 0 1 0 + 4065 1355 2 0.4236 18.56634 18.89200 0.32833 0 1 0 + 4066 1356 1 -0.8472 16.25484 6.69873 5.58205 -1 1 0 + 4067 1356 2 0.4236 16.87855 7.00828 6.29978 -1 1 0 + 4068 1356 2 0.4236 16.02398 5.73754 5.73284 -1 1 0 + 4069 1357 1 -0.8472 4.91653 23.82036 8.55004 0 0 0 + 4070 1357 2 0.4236 5.34761 23.21217 9.21648 0 0 0 + 4071 1357 2 0.4236 5.41269 24.68806 8.52083 0 0 0 + 4072 1358 1 -0.8472 33.30943 33.20561 10.09238 -1 0 0 + 4073 1358 2 0.4236 33.26590 34.05918 10.61147 -1 0 0 + 4074 1358 2 0.4236 32.96598 33.36084 9.16618 -1 0 0 + 4075 1359 1 -0.8472 16.61015 7.10659 25.54548 0 0 0 + 4076 1359 2 0.4236 17.06577 7.76016 26.14985 0 0 0 + 4077 1359 2 0.4236 16.33132 7.56921 24.70396 0 0 0 + 4078 1360 1 -0.8472 21.90262 1.55066 18.37026 0 1 0 + 4079 1360 2 0.4236 22.89510 1.63523 18.45827 0 1 0 + 4080 1360 2 0.4236 21.50605 2.45082 18.19019 0 1 0 + 4081 1361 1 -0.8472 3.86501 3.59828 14.83734 0 0 0 + 4082 1361 2 0.4236 4.42352 4.42579 14.89434 0 0 0 + 4083 1361 2 0.4236 3.15744 3.62445 15.54349 0 0 0 + 4084 1362 1 -0.8472 21.92304 8.04648 5.73415 0 0 0 + 4085 1362 2 0.4236 21.15202 8.10038 6.36865 0 0 0 + 4086 1362 2 0.4236 21.92013 8.84868 5.13715 0 0 0 + 4087 1363 1 -0.8472 3.88461 21.70830 19.87333 0 0 0 + 4088 1363 2 0.4236 3.77326 22.48610 19.25477 0 0 0 + 4089 1363 2 0.4236 4.65296 21.88004 20.48982 0 0 0 + 4090 1364 1 -0.8472 12.50665 27.83119 26.83785 0 0 0 + 4091 1364 2 0.4236 11.81736 27.12241 26.68791 0 0 0 + 4092 1364 2 0.4236 12.13096 28.72130 26.57993 0 0 0 + 4093 1365 1 -0.8472 32.46832 31.16200 16.94950 0 0 0 + 4094 1365 2 0.4236 33.05634 30.35349 16.92696 0 0 0 + 4095 1365 2 0.4236 32.80140 31.79707 17.64643 0 0 0 + 4096 1366 1 -0.8472 4.04609 20.23787 14.72111 0 1 0 + 4097 1366 2 0.4236 4.07606 19.71469 15.57277 0 1 0 + 4098 1366 2 0.4236 3.88322 19.61722 13.95421 0 1 0 + 4099 1367 1 -0.8472 13.76311 15.06580 33.41259 0 0 0 + 4100 1367 2 0.4236 14.68056 14.84069 33.74052 0 0 0 + 4101 1367 2 0.4236 13.74788 16.01155 33.08807 0 0 0 + 4102 1368 1 -0.8472 7.97774 4.02759 21.41028 1 1 0 + 4103 1368 2 0.4236 7.63454 3.91294 20.47809 1 1 0 + 4104 1368 2 0.4236 7.20683 4.12630 22.03950 1 1 0 + 4105 1369 1 -0.8472 20.29710 6.13425 12.62094 1 1 0 + 4106 1369 2 0.4236 19.31701 6.01240 12.46433 1 1 0 + 4107 1369 2 0.4236 20.56868 7.05305 12.33463 1 1 0 + 4108 1370 1 -0.8472 5.16656 5.52045 3.92872 1 1 0 + 4109 1370 2 0.4236 5.60269 5.98846 4.69728 1 1 0 + 4110 1370 2 0.4236 5.86909 5.12236 3.33884 1 1 0 + 4111 1371 1 -0.8472 17.27686 18.91878 19.63934 0 0 0 + 4112 1371 2 0.4236 18.05111 18.28992 19.71027 0 0 0 + 4113 1371 2 0.4236 17.49359 19.76961 20.11795 0 0 0 + 4114 1372 1 -0.8472 14.92005 7.12502 11.97112 0 0 0 + 4115 1372 2 0.4236 14.48733 6.58039 11.25271 0 0 0 + 4116 1372 2 0.4236 14.49309 8.02852 12.00724 0 0 0 + 4117 1373 1 -0.8472 1.10019 6.63116 0.49417 0 0 0 + 4118 1373 2 0.4236 1.99111 6.89491 0.86381 0 0 0 + 4119 1373 2 0.4236 1.03948 5.63397 0.45037 0 0 0 + 4120 1374 1 -0.8472 22.59485 0.72080 27.60450 1 1 0 + 4121 1374 2 0.4236 22.17357 1.60200 27.81888 1 1 0 + 4122 1374 2 0.4236 23.51241 0.68494 28.00041 1 1 0 + 4123 1375 1 -0.8472 15.78119 12.79592 23.35569 0 0 0 + 4124 1375 2 0.4236 16.02480 13.73623 23.11820 0 0 0 + 4125 1375 2 0.4236 14.85064 12.77490 23.72119 0 0 0 + 4126 1376 1 -0.8472 33.58141 6.89870 20.97751 -1 1 0 + 4127 1376 2 0.4236 34.40863 7.44824 21.09409 -1 1 0 + 4128 1376 2 0.4236 33.57629 6.15751 21.64870 -1 1 0 + 4129 1377 1 -0.8472 1.92074 17.89289 29.55807 1 0 0 + 4130 1377 2 0.4236 1.00224 17.93553 29.95110 1 0 0 + 4131 1377 2 0.4236 2.57668 17.64257 30.27013 1 0 0 + 4132 1378 1 -0.8472 11.63025 22.33197 20.78411 0 0 0 + 4133 1378 2 0.4236 11.83395 22.71874 21.68347 0 0 0 + 4134 1378 2 0.4236 12.46780 21.96046 20.38359 0 0 0 + 4135 1379 1 -0.8472 18.99923 25.73489 1.07719 0 0 0 + 4136 1379 2 0.4236 19.23105 26.05481 0.15857 0 0 0 + 4137 1379 2 0.4236 18.34556 26.36595 1.49482 0 0 0 + 4138 1380 1 -0.8472 26.73163 0.58642 2.49028 -1 1 0 + 4139 1380 2 0.4236 27.25801 1.43411 2.55567 -1 1 0 + 4140 1380 2 0.4236 26.60863 0.19913 3.40397 -1 1 0 + 4141 1381 1 -0.8472 10.39938 13.91835 25.30518 0 1 0 + 4142 1381 2 0.4236 10.62229 13.45115 26.16077 0 1 0 + 4143 1381 2 0.4236 10.41238 14.90734 25.45229 0 1 0 + 4144 1382 1 -0.8472 21.36317 34.04409 28.83108 0 -1 0 + 4145 1382 2 0.4236 21.78783 34.69929 28.20632 0 -1 0 + 4146 1382 2 0.4236 22.07522 33.53628 29.31593 0 -1 0 + 4147 1383 1 -0.8472 6.60638 30.22876 4.76665 0 -1 0 + 4148 1383 2 0.4236 6.87058 30.19293 5.73043 0 -1 0 + 4149 1383 2 0.4236 5.62090 30.38335 4.69672 0 -1 0 + 4150 1384 1 -0.8472 6.99525 5.86455 18.49351 1 0 0 + 4151 1384 2 0.4236 6.79939 4.89785 18.65820 1 0 0 + 4152 1384 2 0.4236 7.16656 6.00860 17.51892 1 0 0 + 4153 1385 1 -0.8472 27.85100 24.40461 26.76259 -1 0 0 + 4154 1385 2 0.4236 28.16466 23.49121 27.02200 -1 0 0 + 4155 1385 2 0.4236 27.50868 24.88178 27.57192 -1 0 0 + 4156 1386 1 -0.8472 26.18497 0.14061 24.90006 0 0 0 + 4157 1386 2 0.4236 27.13608 0.23417 24.60577 0 0 0 + 4158 1386 2 0.4236 25.60583 0.74498 24.35296 0 0 0 + 4159 1387 1 -0.8472 28.23866 32.55211 16.47504 0 -1 0 + 4160 1387 2 0.4236 27.78773 32.40987 17.35615 0 -1 0 + 4161 1387 2 0.4236 27.95660 33.43281 16.09458 0 -1 0 + 4162 1388 1 -0.8472 3.71190 17.69512 22.13922 0 0 0 + 4163 1388 2 0.4236 4.11270 18.60508 22.24549 0 0 0 + 4164 1388 2 0.4236 3.85950 17.16622 22.97496 0 0 0 + 4165 1389 1 -0.8472 21.99849 17.97835 20.53867 0 -1 0 + 4166 1389 2 0.4236 22.74324 18.54152 20.18072 0 -1 0 + 4167 1389 2 0.4236 21.44970 18.51802 21.17704 0 -1 0 + 4168 1390 1 -0.8472 10.41557 21.36907 9.81845 0 -1 0 + 4169 1390 2 0.4236 10.99340 21.92631 10.41470 0 -1 0 + 4170 1390 2 0.4236 10.29063 20.46447 10.22595 0 -1 0 + 4171 1391 1 -0.8472 26.10008 27.35743 8.93223 -2 0 0 + 4172 1391 2 0.4236 26.71331 27.76259 9.61030 -2 0 0 + 4173 1391 2 0.4236 25.82174 28.05715 8.27430 -2 0 0 + 4174 1392 1 -0.8472 17.35902 11.08757 2.80124 0 1 0 + 4175 1392 2 0.4236 17.01865 10.28022 2.31926 0 1 0 + 4176 1392 2 0.4236 18.34155 11.18187 2.64093 0 1 0 +4177 1393 1 -0.8472 16.76823 15.96965 35.38491 0 0 0 +4178 1393 2 0.4236 16.40681 16.87372 0.16584 0 0 1 +4179 1393 2 0.4236 17.75158 16.03673 35.21615 0 0 0 + 4180 1394 1 -0.8472 5.38571 21.32895 1.85800 0 0 0 + 4181 1394 2 0.4236 4.97998 21.46462 0.95415 0 0 0 + 4182 1394 2 0.4236 5.12424 20.42993 2.20919 0 0 0 + 4183 1395 1 -0.8472 13.98879 4.76207 23.30507 1 0 0 + 4184 1395 2 0.4236 14.66465 5.49461 23.22411 1 0 0 + 4185 1395 2 0.4236 14.11509 4.10815 22.55919 1 0 0 + 4186 1396 1 -0.8472 25.20454 3.81448 25.19998 0 1 0 + 4187 1396 2 0.4236 25.43529 4.37239 24.40284 0 1 0 + 4188 1396 2 0.4236 24.75751 2.97209 24.89910 0 1 0 + 4189 1397 1 -0.8472 34.55435 24.91541 25.79393 0 0 0 + 4190 1397 2 0.4236 35.08581 24.14893 26.15450 0 0 0 + 4191 1397 2 0.4236 33.85857 25.18160 26.46101 0 0 0 + 4192 1398 1 -0.8472 26.21632 33.11524 29.54084 0 0 0 + 4193 1398 2 0.4236 26.88558 33.61308 30.09236 0 0 0 + 4194 1398 2 0.4236 26.05296 33.61059 28.68766 0 0 0 + 4195 1399 1 -0.8472 12.75839 12.33150 9.28135 0 0 0 + 4196 1399 2 0.4236 12.49269 12.46848 10.23559 0 0 0 + 4197 1399 2 0.4236 12.18297 12.89711 8.69066 0 0 0 + 4198 1400 1 -0.8472 30.89915 15.99270 28.39554 -1 0 0 + 4199 1400 2 0.4236 31.44059 15.15457 28.46154 -1 0 0 + 4200 1400 2 0.4236 30.93487 16.33963 27.45835 -1 0 0 + 4201 1401 1 -0.8472 0.81922 11.50629 17.18912 0 0 0 + 4202 1401 2 0.4236 0.61583 10.52916 17.25048 0 0 0 + 4203 1401 2 0.4236 1.64440 11.64122 16.64065 0 0 0 + 4204 1402 1 -0.8472 6.31281 10.44238 16.76762 0 0 0 + 4205 1402 2 0.4236 7.01406 11.14835 16.66862 0 0 0 + 4206 1402 2 0.4236 5.76953 10.62481 17.58709 0 0 0 + 4207 1403 1 -0.8472 0.54456 9.54460 23.55923 0 0 0 + 4208 1403 2 0.4236 0.32188 9.63088 24.53027 0 0 0 + 4209 1403 2 0.4236 1.35608 10.09147 23.35353 0 0 0 + 4210 1404 1 -0.8472 1.24532 29.21899 12.99820 0 -1 0 + 4211 1404 2 0.4236 2.10877 28.72424 12.90008 0 -1 0 + 4212 1404 2 0.4236 1.18386 29.60264 13.91958 0 -1 0 + 4213 1405 1 -0.8472 9.87755 0.56243 14.38546 1 1 0 + 4214 1405 2 0.4236 10.32219 35.29136 13.94064 1 0 0 + 4215 1405 2 0.4236 10.56154 1.10058 14.87789 1 1 0 + 4216 1406 1 -0.8472 20.62159 2.62651 7.23282 1 1 0 + 4217 1406 2 0.4236 21.26312 3.32400 7.55202 1 1 0 + 4218 1406 2 0.4236 19.72502 2.78378 7.64684 1 1 0 + 4219 1407 1 -0.8472 26.24895 1.51497 16.19898 0 0 0 + 4220 1407 2 0.4236 25.40298 1.65532 16.71335 0 0 0 + 4221 1407 2 0.4236 26.29696 2.17550 15.44973 0 0 0 + 4222 1408 1 -0.8472 21.52089 15.53065 3.18350 0 -1 0 + 4223 1408 2 0.4236 21.15454 15.06314 2.37904 0 -1 0 + 4224 1408 2 0.4236 20.93898 16.31204 3.40870 0 -1 0 + 4225 1409 1 -0.8472 9.60116 12.81307 30.75366 0 0 0 + 4226 1409 2 0.4236 10.22636 13.08164 31.48643 0 0 0 + 4227 1409 2 0.4236 9.05604 12.02934 31.05121 0 0 0 + 4228 1410 1 -0.8472 5.23729 9.36348 2.76133 0 1 0 + 4229 1410 2 0.4236 5.49223 9.14952 3.70430 0 1 0 + 4230 1410 2 0.4236 4.82716 8.55685 2.33574 0 1 0 + 4231 1411 1 -0.8472 30.15622 30.86325 5.78803 -1 0 0 + 4232 1411 2 0.4236 31.03997 30.61950 6.18741 -1 0 0 + 4233 1411 2 0.4236 29.77296 31.64556 6.27901 -1 0 0 + 4234 1412 1 -0.8472 3.00118 7.40350 17.66433 1 1 0 + 4235 1412 2 0.4236 3.31933 7.97153 18.42332 1 1 0 + 4236 1412 2 0.4236 3.68758 7.40392 16.93713 1 1 0 + 4237 1413 1 -0.8472 1.27143 7.78395 33.63201 1 0 0 + 4238 1413 2 0.4236 1.04920 7.43066 34.54070 1 0 0 + 4239 1413 2 0.4236 1.85411 8.59165 33.72173 1 0 0 + 4240 1414 1 -0.8472 10.36647 16.58874 31.95040 1 0 0 + 4241 1414 2 0.4236 9.90077 17.17268 32.61532 1 0 0 + 4242 1414 2 0.4236 10.58267 15.70882 32.37335 1 0 0 + 4243 1415 1 -0.8472 2.92649 20.78950 22.99831 0 0 0 + 4244 1415 2 0.4236 3.38630 20.43813 23.81381 0 0 0 + 4245 1415 2 0.4236 3.60510 21.17464 22.37292 0 0 0 + 4246 1416 1 -0.8472 31.24310 32.56486 23.40585 -1 0 0 + 4247 1416 2 0.4236 30.76987 31.83084 23.89290 -1 0 0 + 4248 1416 2 0.4236 31.66708 32.19655 22.57848 -1 0 0 + 4249 1417 1 -0.8472 20.50168 18.67424 3.60469 0 1 0 + 4250 1417 2 0.4236 20.50225 18.74384 2.60714 0 1 0 + 4251 1417 2 0.4236 21.11534 19.36488 3.98731 0 1 0 + 4252 1418 1 -0.8472 29.26181 22.02665 30.81917 0 0 0 + 4253 1418 2 0.4236 29.77498 22.84230 31.08623 0 0 0 + 4254 1418 2 0.4236 28.58254 22.27185 30.12749 0 0 0 + 4255 1419 1 -0.8472 1.85161 27.62341 34.11640 -1 0 0 + 4256 1419 2 0.4236 2.16253 28.55236 34.31717 -1 0 0 + 4257 1419 2 0.4236 1.65257 27.54234 33.13979 -1 0 0 + 4258 1420 1 -0.8472 7.53004 9.50217 1.47002 0 1 0 + 4259 1420 2 0.4236 6.61551 9.21649 1.75623 0 1 0 + 4260 1420 2 0.4236 7.45608 10.27834 0.84390 0 1 0 + 4261 1421 1 -0.8472 16.58285 11.39480 14.39794 0 0 0 + 4262 1421 2 0.4236 16.18969 11.93603 13.65465 0 0 0 + 4263 1421 2 0.4236 16.00811 10.59421 14.56741 0 0 0 + 4264 1422 1 -0.8472 2.10592 5.43627 4.72316 0 1 0 + 4265 1422 2 0.4236 3.08841 5.34757 4.55950 0 1 0 + 4266 1422 2 0.4236 1.92931 5.42178 5.70732 0 1 0 + 4267 1423 1 -0.8472 3.71506 8.98264 20.01685 -1 0 0 + 4268 1423 2 0.4236 3.94387 9.91834 19.74841 -1 0 0 + 4269 1423 2 0.4236 4.50529 8.56072 20.46122 -1 0 0 + 4270 1424 1 -0.8472 1.86906 28.23419 30.94543 0 0 0 + 4271 1424 2 0.4236 2.78640 28.42230 30.59465 0 0 0 + 4272 1424 2 0.4236 1.58999 27.31365 30.67214 0 0 0 + 4273 1425 1 -0.8472 25.91892 1.73795 33.90733 1 1 0 + 4274 1425 2 0.4236 26.38470 2.45005 34.43262 1 1 0 + 4275 1425 2 0.4236 25.22904 2.15669 33.31689 1 1 0 + 4276 1426 1 -0.8472 4.30326 25.01135 31.69404 1 0 0 + 4277 1426 2 0.4236 4.77681 24.60093 30.91477 1 0 0 + 4278 1426 2 0.4236 3.42338 24.55523 31.82725 1 0 0 + 4279 1427 1 -0.8472 19.66466 33.96005 4.92748 0 -1 0 + 4280 1427 2 0.4236 19.99973 33.92475 5.86895 0 -1 0 + 4281 1427 2 0.4236 19.05768 33.18317 4.76034 0 -1 0 + 4282 1428 1 -0.8472 21.52853 24.53570 28.70701 0 -1 0 + 4283 1428 2 0.4236 21.55814 23.79625 29.37955 0 -1 0 + 4284 1428 2 0.4236 21.25506 25.38540 29.15776 0 -1 0 + 4285 1429 1 -0.8472 26.10586 14.96142 24.05624 0 0 0 + 4286 1429 2 0.4236 26.93664 15.23325 24.54190 0 0 0 + 4287 1429 2 0.4236 25.31446 15.40773 24.47395 0 0 0 + 4288 1430 1 -0.8472 8.93989 1.91730 34.66523 0 1 0 + 4289 1430 2 0.4236 9.71720 1.70768 34.07209 0 1 0 + 4290 1430 2 0.4236 9.19437 2.64332 35.30406 0 1 0 + 4291 1431 1 -0.8472 5.49155 24.85029 22.62250 1 0 0 + 4292 1431 2 0.4236 4.52301 24.64929 22.47596 1 0 0 + 4293 1431 2 0.4236 5.58611 25.50776 23.37001 1 0 0 + 4294 1432 1 -0.8472 30.35913 16.66043 13.69265 0 0 0 + 4295 1432 2 0.4236 31.35846 16.64719 13.65890 0 0 0 + 4296 1432 2 0.4236 30.01994 15.73968 13.88523 0 0 0 + 4297 1433 1 -0.8472 23.73905 30.09756 28.00530 0 -1 0 + 4298 1433 2 0.4236 22.88636 29.76565 27.60195 0 -1 0 + 4299 1433 2 0.4236 23.54863 30.89468 28.57831 0 -1 0 + 4300 1434 1 -0.8472 6.27457 22.28510 10.31293 0 0 0 + 4301 1434 2 0.4236 7.20338 22.65451 10.28568 0 0 0 + 4302 1434 2 0.4236 5.87861 22.44520 11.21712 0 0 0 + 4303 1435 1 -0.8472 34.26671 24.67070 21.96499 -1 0 0 + 4304 1435 2 0.4236 34.87274 24.93575 21.21504 -1 0 0 + 4305 1435 2 0.4236 34.76260 24.73063 22.83129 -1 0 0 + 4306 1436 1 -0.8472 18.92162 1.08281 4.46925 -1 -1 0 + 4307 1436 2 0.4236 19.78754 1.54407 4.27592 -1 -1 0 + 4308 1436 2 0.4236 19.09471 0.11779 4.66605 -1 -1 0 + 4309 1437 1 -0.8472 12.57287 31.40153 18.46855 1 0 0 + 4310 1437 2 0.4236 12.50938 30.52513 17.99122 1 0 0 + 4311 1437 2 0.4236 13.40979 31.42337 19.01540 1 0 0 + 4312 1438 1 -0.8472 18.33881 32.50240 15.43232 0 0 0 + 4313 1438 2 0.4236 17.94420 33.42056 15.39759 0 0 0 + 4314 1438 2 0.4236 17.69459 31.87927 15.87577 0 0 0 + 4315 1439 1 -0.8472 7.67654 27.84873 9.04314 0 0 0 + 4316 1439 2 0.4236 8.43198 27.95713 9.68931 0 0 0 + 4317 1439 2 0.4236 6.89338 27.43980 9.51151 0 0 0 + 4318 1440 1 -0.8472 21.65035 35.23355 32.90414 -1 0 0 + 4319 1440 2 0.4236 21.89201 35.27549 33.87358 -1 0 0 + 4320 1440 2 0.4236 21.11552 0.53569 32.65870 -1 1 0 + 4321 1441 1 -0.8472 26.98866 12.66178 1.39463 0 0 0 + 4322 1441 2 0.4236 27.22858 13.05128 0.50543 0 0 0 + 4323 1441 2 0.4236 26.30814 11.93983 1.26961 0 0 0 + 4324 1442 1 -0.8472 33.90498 1.20205 27.79719 -1 0 0 + 4325 1442 2 0.4236 33.57512 1.17136 26.85369 -1 0 0 + 4326 1442 2 0.4236 34.38097 0.34922 28.01175 -1 0 0 + 4327 1443 1 -0.8472 16.09059 23.00449 11.90689 0 -1 0 + 4328 1443 2 0.4236 15.59851 22.16519 11.67591 0 -1 0 + 4329 1443 2 0.4236 16.68248 23.26450 11.14399 0 -1 0 + 4330 1444 1 -0.8472 21.66203 22.49671 5.11467 0 -1 0 + 4331 1444 2 0.4236 21.48400 22.99703 5.96197 0 -1 0 + 4332 1444 2 0.4236 22.09350 21.62052 5.32931 0 -1 0 + 4333 1445 1 -0.8472 28.27062 28.47059 10.54210 0 -1 0 + 4334 1445 2 0.4236 28.73699 29.21139 10.05878 0 -1 0 + 4335 1445 2 0.4236 28.92304 27.99473 11.13191 0 -1 0 + 4336 1446 1 -0.8472 11.81133 6.08344 22.22918 2 0 0 + 4337 1446 2 0.4236 11.98293 6.96727 21.79400 2 0 0 + 4338 1446 2 0.4236 12.67690 5.59722 22.34885 2 0 0 + 4339 1447 1 -0.8472 11.54720 20.00403 29.64998 0 0 0 + 4340 1447 2 0.4236 12.20847 19.94668 30.39788 0 0 0 + 4341 1447 2 0.4236 10.65105 20.25502 30.01590 0 0 0 + 4342 1448 1 -0.8472 23.63295 15.75324 6.26015 -1 0 0 + 4343 1448 2 0.4236 24.01845 15.46731 7.13740 -1 0 0 + 4344 1448 2 0.4236 23.17822 16.63724 6.36819 -1 0 0 + 4345 1449 1 -0.8472 18.59627 17.84478 8.51395 0 0 0 + 4346 1449 2 0.4236 19.30661 17.22786 8.85265 0 0 0 + 4347 1449 2 0.4236 18.88270 18.23432 7.63866 0 0 0 + 4348 1450 1 -0.8472 24.95837 31.64730 25.42719 0 0 0 + 4349 1450 2 0.4236 25.82533 31.64851 24.92884 0 0 0 + 4350 1450 2 0.4236 25.13906 31.67401 26.41036 0 0 0 + 4351 1451 1 -0.8472 16.66874 15.84535 16.59843 0 0 0 + 4352 1451 2 0.4236 16.30160 16.17484 17.46823 0 0 0 + 4353 1451 2 0.4236 16.72463 14.84712 16.61697 0 0 0 + 4354 1452 1 -0.8472 33.01487 0.03301 11.50236 0 1 0 + 4355 1452 2 0.4236 32.49598 0.68423 10.94864 0 1 0 + 4356 1452 2 0.4236 32.63147 35.50688 12.42537 0 0 0 +4357 1453 1 -0.8472 28.05328 19.82767 0.19572 0 0 0 +4358 1453 2 0.4236 27.36733 20.21266 0.81312 0 0 0 +4359 1453 2 0.4236 27.69662 18.99018 35.22901 0 0 -1 + 4360 1454 1 -0.8472 13.82727 16.06989 19.24354 0 0 0 + 4361 1454 2 0.4236 14.79294 15.86683 19.40546 0 0 0 + 4362 1454 2 0.4236 13.26084 15.39722 19.71956 0 0 0 + 4363 1455 1 -0.8472 14.08001 21.97765 33.17988 1 0 0 + 4364 1455 2 0.4236 14.86867 22.09727 33.78291 1 0 0 + 4365 1455 2 0.4236 14.36835 22.08619 32.22855 1 0 0 + 4366 1456 1 -0.8472 16.33725 25.39311 3.01993 0 -1 0 + 4367 1456 2 0.4236 16.78886 26.28499 3.04273 0 -1 0 + 4368 1456 2 0.4236 16.96355 24.71199 2.64072 0 -1 0 + 4369 1457 1 -0.8472 34.41055 25.51007 0.15059 0 0 0 + 4370 1457 2 0.4236 34.86965 26.36478 0.39277 0 0 0 + 4371 1457 2 0.4236 33.46228 25.53993 0.46653 0 0 0 + 4372 1458 1 -0.8472 27.86190 21.35174 28.12153 -1 0 0 + 4373 1458 2 0.4236 28.79121 21.05583 27.90073 -1 0 0 + 4374 1458 2 0.4236 27.23273 21.03009 27.41397 -1 0 0 + 4375 1459 1 -0.8472 0.23515 27.24082 28.00084 1 -1 0 + 4376 1459 2 0.4236 0.82580 26.63370 28.53233 1 -1 0 + 4377 1459 2 0.4236 34.89553 26.76177 27.76667 0 -1 0 + 4378 1460 1 -0.8472 21.68728 25.32330 17.79708 0 0 0 + 4379 1460 2 0.4236 22.01241 26.26845 17.76638 0 0 0 + 4380 1460 2 0.4236 21.31354 25.07022 16.90477 0 0 0 + 4381 1461 1 -0.8472 30.97176 0.89911 8.90394 -1 2 0 + 4382 1461 2 0.4236 31.32718 0.98094 7.97285 -1 2 0 + 4383 1461 2 0.4236 29.99239 1.10098 8.90582 -1 2 0 + 4384 1462 1 -0.8472 25.45716 22.87578 20.99721 0 0 0 + 4385 1462 2 0.4236 26.30239 22.39353 21.22734 0 0 0 + 4386 1462 2 0.4236 25.66033 23.83885 20.82076 0 0 0 + 4387 1463 1 -0.8472 24.87472 30.47978 12.60569 0 0 0 + 4388 1463 2 0.4236 25.65927 30.69811 12.02542 0 0 0 + 4389 1463 2 0.4236 24.98656 29.56166 12.98590 0 0 0 + 4390 1464 1 -0.8472 13.80973 7.99429 5.29049 0 1 0 + 4391 1464 2 0.4236 13.73023 8.76929 4.66357 0 1 0 + 4392 1464 2 0.4236 14.76858 7.86828 5.54475 0 1 0 + 4393 1465 1 -0.8472 33.97510 12.84300 16.82412 -1 0 0 + 4394 1465 2 0.4236 34.81504 12.34044 17.02876 -1 0 0 + 4395 1465 2 0.4236 33.55414 13.15013 17.67758 -1 0 0 + 4396 1466 1 -0.8472 26.59067 21.83864 5.49548 0 0 0 + 4397 1466 2 0.4236 27.17656 22.56826 5.14296 0 0 0 + 4398 1466 2 0.4236 27.14602 21.03034 5.69097 0 0 0 + 4399 1467 1 -0.8472 5.16628 25.92485 12.35774 0 0 0 + 4400 1467 2 0.4236 4.34860 25.35313 12.42442 0 0 0 + 4401 1467 2 0.4236 5.79329 25.69800 13.10298 0 0 0 + 4402 1468 1 -0.8472 26.79149 35.42791 5.15590 -1 0 0 + 4403 1468 2 0.4236 26.57100 34.75869 5.86548 -1 0 0 + 4404 1468 2 0.4236 27.24633 0.70960 5.57074 -1 1 0 + 4405 1469 1 -0.8472 1.20630 3.82773 0.24514 0 0 0 + 4406 1469 2 0.4236 2.07387 3.42063 0.53070 0 0 0 + 4407 1469 2 0.4236 0.45018 3.34497 0.68691 0 0 0 + 4408 1470 1 -0.8472 26.43671 3.26559 13.86247 -1 0 0 + 4409 1470 2 0.4236 26.83179 4.06661 14.31219 -1 0 0 + 4410 1470 2 0.4236 25.70088 3.55590 13.25073 -1 0 0 + 4411 1471 1 -0.8472 33.20618 12.63476 8.22853 0 0 0 + 4412 1471 2 0.4236 32.62856 12.82716 9.02181 0 0 0 + 4413 1471 2 0.4236 34.16325 12.80270 8.46454 0 0 0 + 4414 1472 1 -0.8472 29.90540 24.56808 20.70513 -1 0 0 + 4415 1472 2 0.4236 29.45905 25.46288 20.69386 -1 0 0 + 4416 1472 2 0.4236 30.31745 24.38988 19.81161 -1 0 0 + 4417 1473 1 -0.8472 15.29819 4.02922 16.44477 0 1 0 + 4418 1473 2 0.4236 15.74731 4.67008 17.06729 0 1 0 + 4419 1473 2 0.4236 14.96500 3.24064 16.96159 0 1 0 +4420 1474 1 -0.8472 31.27200 25.18131 0.42504 0 0 0 +4421 1474 2 0.4236 31.06216 24.44154 35.23295 0 0 -1 +4422 1474 2 0.4236 30.72503 25.06997 1.25474 0 0 0 +4423 1475 1 -0.8472 12.12236 3.02649 0.17854 1 1 0 +4424 1475 2 0.4236 11.88057 2.13716 35.23770 1 1 -1 +4425 1475 2 0.4236 11.35234 3.37843 0.71066 1 1 0 + 4426 1476 1 -0.8472 32.84202 11.55156 14.38224 -1 1 0 + 4427 1476 2 0.4236 31.87238 11.54439 14.13797 -1 1 0 + 4428 1476 2 0.4236 33.01945 12.30064 15.02046 -1 1 0 + 4429 1477 1 -0.8472 20.56141 17.44100 12.45462 0 0 0 + 4430 1477 2 0.4236 19.82341 16.87961 12.08022 0 0 0 + 4431 1477 2 0.4236 20.67251 17.23961 13.42779 0 0 0 + 4432 1478 1 -0.8472 15.28058 11.62137 18.81314 0 0 0 + 4433 1478 2 0.4236 14.35106 11.87755 19.07832 0 0 0 + 4434 1478 2 0.4236 15.44910 10.66895 19.06701 0 0 0 + 4435 1479 1 -0.8472 29.74122 20.36633 3.36258 1 0 0 + 4436 1479 2 0.4236 30.50489 19.77988 3.63245 1 0 0 + 4437 1479 2 0.4236 29.83808 20.61761 2.39955 1 0 0 + 4438 1480 1 -0.8472 10.45942 28.09788 15.37735 0 0 0 + 4439 1480 2 0.4236 9.49177 28.21579 15.60027 0 0 0 + 4440 1480 2 0.4236 10.68404 28.64230 14.56918 0 0 0 + 4441 1481 1 -0.8472 16.77284 6.13546 17.93915 -1 1 0 + 4442 1481 2 0.4236 17.27231 6.27930 18.79343 -1 1 0 + 4443 1481 2 0.4236 16.65241 7.01137 17.47196 -1 1 0 + 4444 1482 1 -0.8472 21.51809 0.32169 25.14138 1 0 0 + 4445 1482 2 0.4236 22.25467 0.17201 25.80091 1 0 0 + 4446 1482 2 0.4236 21.38164 34.99836 24.60010 1 -1 0 + 4447 1483 1 -0.8472 8.70687 23.66278 10.13832 1 0 0 + 4448 1483 2 0.4236 9.01094 24.13022 10.96842 1 0 0 + 4449 1483 2 0.4236 9.40929 23.01447 9.84461 1 0 0 + 4450 1484 1 -0.8472 20.24838 3.67061 10.36639 0 1 0 + 4451 1484 2 0.4236 20.42013 3.09249 11.16402 0 1 0 + 4452 1484 2 0.4236 19.45158 3.32583 9.87020 0 1 0 + 4453 1485 1 -0.8472 5.99031 17.08661 14.86595 0 -1 0 + 4454 1485 2 0.4236 6.68059 17.76993 14.62817 0 -1 0 + 4455 1485 2 0.4236 5.51211 17.37098 15.69683 0 -1 0 + 4456 1486 1 -0.8472 9.84126 21.02704 7.17564 1 0 0 + 4457 1486 2 0.4236 9.10851 21.60977 6.82433 1 0 0 + 4458 1486 2 0.4236 9.88717 21.11282 8.17086 1 0 0 + 4459 1487 1 -0.8472 14.27700 22.34872 30.47271 0 -1 0 + 4460 1487 2 0.4236 13.79127 23.17247 30.18038 0 -1 0 + 4461 1487 2 0.4236 14.51549 21.80029 29.67128 0 -1 0 + 4462 1488 1 -0.8472 9.99770 22.36314 18.62254 0 0 0 + 4463 1488 2 0.4236 10.44813 21.95194 17.83008 0 0 0 + 4464 1488 2 0.4236 10.66650 22.49693 19.35385 0 0 0 + 4465 1489 1 -0.8472 13.54588 23.31244 25.33301 0 0 0 + 4466 1489 2 0.4236 13.21038 22.99391 26.21954 0 0 0 + 4467 1489 2 0.4236 14.46674 22.95517 25.17718 0 0 0 + 4468 1490 1 -0.8472 20.37993 18.69129 28.39579 0 0 0 + 4469 1490 2 0.4236 19.95915 18.99234 27.54006 0 0 0 + 4470 1490 2 0.4236 21.36712 18.59759 28.26690 0 0 0 + 4471 1491 1 -0.8472 31.97357 7.21056 11.20401 0 1 0 + 4472 1491 2 0.4236 31.56230 7.14412 12.11308 0 1 0 + 4473 1491 2 0.4236 32.88388 6.79691 11.21900 0 1 0 + 4474 1492 1 -0.8472 3.59839 34.17205 15.10830 1 -1 0 + 4475 1492 2 0.4236 3.17559 33.45078 14.55972 1 -1 0 + 4476 1492 2 0.4236 3.57179 33.91506 16.07431 1 -1 0 + 4477 1493 1 -0.8472 16.75495 18.40965 28.99548 0 0 0 + 4478 1493 2 0.4236 17.67786 18.68610 29.26339 0 0 0 + 4479 1493 2 0.4236 16.49472 17.58541 29.49830 0 0 0 + 4480 1494 1 -0.8472 4.30401 30.00446 27.41270 2 -1 0 + 4481 1494 2 0.4236 5.20242 30.40877 27.24145 2 -1 0 + 4482 1494 2 0.4236 3.66298 30.30545 26.70673 2 -1 0 + 4483 1495 1 -0.8472 6.15115 15.71784 24.07151 1 1 0 + 4484 1495 2 0.4236 5.27191 15.89172 24.51496 1 1 0 + 4485 1495 2 0.4236 6.55767 14.88710 24.45172 1 1 0 + 4486 1496 1 -0.8472 1.16767 28.77477 19.83689 0 0 0 + 4487 1496 2 0.4236 0.50183 28.05644 19.63537 0 0 0 + 4488 1496 2 0.4236 2.07490 28.36794 19.94340 0 0 0 + 4489 1497 1 -0.8472 23.23606 29.74360 8.55284 0 0 0 + 4490 1497 2 0.4236 24.21877 29.83041 8.71618 0 0 0 + 4491 1497 2 0.4236 22.95269 30.40987 7.86311 0 0 0 + 4492 1498 1 -0.8472 19.48056 19.15564 6.41598 0 0 0 + 4493 1498 2 0.4236 18.99884 18.47150 5.86839 0 0 0 + 4494 1498 2 0.4236 18.88774 19.94885 6.55504 0 0 0 + 4495 1499 1 -0.8472 33.04434 24.82100 13.64486 0 0 0 + 4496 1499 2 0.4236 33.83932 24.36499 13.24495 0 0 0 + 4497 1499 2 0.4236 32.21569 24.31265 13.41066 0 0 0 + 4498 1500 1 -0.8472 26.04955 6.03860 17.25587 0 0 0 + 4499 1500 2 0.4236 26.40284 5.43861 17.97360 0 0 0 + 4500 1500 2 0.4236 26.43423 5.76580 16.37409 0 0 0 + +Bonds + + 1 1 1 2 + 2 1 1 3 + 3 1 4 5 + 4 1 4 6 + 5 1 7 8 + 6 1 7 9 + 7 1 10 11 + 8 1 10 12 + 9 1 13 14 + 10 1 13 15 + 11 1 16 17 + 12 1 16 18 + 13 1 19 20 + 14 1 19 21 + 15 1 22 23 + 16 1 22 24 + 17 1 25 26 + 18 1 25 27 + 19 1 28 29 + 20 1 28 30 + 21 1 31 32 + 22 1 31 33 + 23 1 34 35 + 24 1 34 36 + 25 1 37 38 + 26 1 37 39 + 27 1 40 41 + 28 1 40 42 + 29 1 43 44 + 30 1 43 45 + 31 1 46 47 + 32 1 46 48 + 33 1 49 50 + 34 1 49 51 + 35 1 52 53 + 36 1 52 54 + 37 1 55 56 + 38 1 55 57 + 39 1 58 59 + 40 1 58 60 + 41 1 61 62 + 42 1 61 63 + 43 1 64 65 + 44 1 64 66 + 45 1 67 68 + 46 1 67 69 + 47 1 70 71 + 48 1 70 72 + 49 1 73 74 + 50 1 73 75 + 51 1 76 77 + 52 1 76 78 + 53 1 79 80 + 54 1 79 81 + 55 1 82 83 + 56 1 82 84 + 57 1 85 86 + 58 1 85 87 + 59 1 88 89 + 60 1 88 90 + 61 1 91 92 + 62 1 91 93 + 63 1 94 95 + 64 1 94 96 + 65 1 97 98 + 66 1 97 99 + 67 1 100 101 + 68 1 100 102 + 69 1 103 104 + 70 1 103 105 + 71 1 106 107 + 72 1 106 108 + 73 1 109 110 + 74 1 109 111 + 75 1 112 113 + 76 1 112 114 + 77 1 115 116 + 78 1 115 117 + 79 1 118 119 + 80 1 118 120 + 81 1 121 122 + 82 1 121 123 + 83 1 124 125 + 84 1 124 126 + 85 1 127 128 + 86 1 127 129 + 87 1 130 131 + 88 1 130 132 + 89 1 133 134 + 90 1 133 135 + 91 1 136 137 + 92 1 136 138 + 93 1 139 140 + 94 1 139 141 + 95 1 142 143 + 96 1 142 144 + 97 1 145 146 + 98 1 145 147 + 99 1 148 149 + 100 1 148 150 + 101 1 151 152 + 102 1 151 153 + 103 1 154 155 + 104 1 154 156 + 105 1 157 158 + 106 1 157 159 + 107 1 160 161 + 108 1 160 162 + 109 1 163 164 + 110 1 163 165 + 111 1 166 167 + 112 1 166 168 + 113 1 169 170 + 114 1 169 171 + 115 1 172 173 + 116 1 172 174 + 117 1 175 176 + 118 1 175 177 + 119 1 178 179 + 120 1 178 180 + 121 1 181 182 + 122 1 181 183 + 123 1 184 185 + 124 1 184 186 + 125 1 187 188 + 126 1 187 189 + 127 1 190 191 + 128 1 190 192 + 129 1 193 194 + 130 1 193 195 + 131 1 196 197 + 132 1 196 198 + 133 1 199 200 + 134 1 199 201 + 135 1 202 203 + 136 1 202 204 + 137 1 205 206 + 138 1 205 207 + 139 1 208 209 + 140 1 208 210 + 141 1 211 212 + 142 1 211 213 + 143 1 214 215 + 144 1 214 216 + 145 1 217 218 + 146 1 217 219 + 147 1 220 221 + 148 1 220 222 + 149 1 223 224 + 150 1 223 225 + 151 1 226 227 + 152 1 226 228 + 153 1 229 230 + 154 1 229 231 + 155 1 232 233 + 156 1 232 234 + 157 1 235 236 + 158 1 235 237 + 159 1 238 239 + 160 1 238 240 + 161 1 241 242 + 162 1 241 243 + 163 1 244 245 + 164 1 244 246 + 165 1 247 248 + 166 1 247 249 + 167 1 250 251 + 168 1 250 252 + 169 1 253 254 + 170 1 253 255 + 171 1 256 257 + 172 1 256 258 + 173 1 259 260 + 174 1 259 261 + 175 1 262 263 + 176 1 262 264 + 177 1 265 266 + 178 1 265 267 + 179 1 268 269 + 180 1 268 270 + 181 1 271 272 + 182 1 271 273 + 183 1 274 275 + 184 1 274 276 + 185 1 277 278 + 186 1 277 279 + 187 1 280 281 + 188 1 280 282 + 189 1 283 284 + 190 1 283 285 + 191 1 286 287 + 192 1 286 288 + 193 1 289 290 + 194 1 289 291 + 195 1 292 293 + 196 1 292 294 + 197 1 295 296 + 198 1 295 297 + 199 1 298 299 + 200 1 298 300 + 201 1 301 302 + 202 1 301 303 + 203 1 304 305 + 204 1 304 306 + 205 1 307 308 + 206 1 307 309 + 207 1 310 311 + 208 1 310 312 + 209 1 313 314 + 210 1 313 315 + 211 1 316 317 + 212 1 316 318 + 213 1 319 320 + 214 1 319 321 + 215 1 322 323 + 216 1 322 324 + 217 1 325 326 + 218 1 325 327 + 219 1 328 329 + 220 1 328 330 + 221 1 331 332 + 222 1 331 333 + 223 1 334 335 + 224 1 334 336 + 225 1 337 338 + 226 1 337 339 + 227 1 340 341 + 228 1 340 342 + 229 1 343 344 + 230 1 343 345 + 231 1 346 347 + 232 1 346 348 + 233 1 349 350 + 234 1 349 351 + 235 1 352 353 + 236 1 352 354 + 237 1 355 356 + 238 1 355 357 + 239 1 358 359 + 240 1 358 360 + 241 1 361 362 + 242 1 361 363 + 243 1 364 365 + 244 1 364 366 + 245 1 367 368 + 246 1 367 369 + 247 1 370 371 + 248 1 370 372 + 249 1 373 374 + 250 1 373 375 + 251 1 376 377 + 252 1 376 378 + 253 1 379 380 + 254 1 379 381 + 255 1 382 383 + 256 1 382 384 + 257 1 385 386 + 258 1 385 387 + 259 1 388 389 + 260 1 388 390 + 261 1 391 392 + 262 1 391 393 + 263 1 394 395 + 264 1 394 396 + 265 1 397 398 + 266 1 397 399 + 267 1 400 401 + 268 1 400 402 + 269 1 403 404 + 270 1 403 405 + 271 1 406 407 + 272 1 406 408 + 273 1 409 410 + 274 1 409 411 + 275 1 412 413 + 276 1 412 414 + 277 1 415 416 + 278 1 415 417 + 279 1 418 419 + 280 1 418 420 + 281 1 421 422 + 282 1 421 423 + 283 1 424 425 + 284 1 424 426 + 285 1 427 428 + 286 1 427 429 + 287 1 430 431 + 288 1 430 432 + 289 1 433 434 + 290 1 433 435 + 291 1 436 437 + 292 1 436 438 + 293 1 439 440 + 294 1 439 441 + 295 1 442 443 + 296 1 442 444 + 297 1 445 446 + 298 1 445 447 + 299 1 448 449 + 300 1 448 450 + 301 1 451 452 + 302 1 451 453 + 303 1 454 455 + 304 1 454 456 + 305 1 457 458 + 306 1 457 459 + 307 1 460 461 + 308 1 460 462 + 309 1 463 464 + 310 1 463 465 + 311 1 466 467 + 312 1 466 468 + 313 1 469 470 + 314 1 469 471 + 315 1 472 473 + 316 1 472 474 + 317 1 475 476 + 318 1 475 477 + 319 1 478 479 + 320 1 478 480 + 321 1 481 482 + 322 1 481 483 + 323 1 484 485 + 324 1 484 486 + 325 1 487 488 + 326 1 487 489 + 327 1 490 491 + 328 1 490 492 + 329 1 493 494 + 330 1 493 495 + 331 1 496 497 + 332 1 496 498 + 333 1 499 500 + 334 1 499 501 + 335 1 502 503 + 336 1 502 504 + 337 1 505 506 + 338 1 505 507 + 339 1 508 509 + 340 1 508 510 + 341 1 511 512 + 342 1 511 513 + 343 1 514 515 + 344 1 514 516 + 345 1 517 518 + 346 1 517 519 + 347 1 520 521 + 348 1 520 522 + 349 1 523 524 + 350 1 523 525 + 351 1 526 527 + 352 1 526 528 + 353 1 529 530 + 354 1 529 531 + 355 1 532 533 + 356 1 532 534 + 357 1 535 536 + 358 1 535 537 + 359 1 538 539 + 360 1 538 540 + 361 1 541 542 + 362 1 541 543 + 363 1 544 545 + 364 1 544 546 + 365 1 547 548 + 366 1 547 549 + 367 1 550 551 + 368 1 550 552 + 369 1 553 554 + 370 1 553 555 + 371 1 556 557 + 372 1 556 558 + 373 1 559 560 + 374 1 559 561 + 375 1 562 563 + 376 1 562 564 + 377 1 565 566 + 378 1 565 567 + 379 1 568 569 + 380 1 568 570 + 381 1 571 572 + 382 1 571 573 + 383 1 574 575 + 384 1 574 576 + 385 1 577 578 + 386 1 577 579 + 387 1 580 581 + 388 1 580 582 + 389 1 583 584 + 390 1 583 585 + 391 1 586 587 + 392 1 586 588 + 393 1 589 590 + 394 1 589 591 + 395 1 592 593 + 396 1 592 594 + 397 1 595 596 + 398 1 595 597 + 399 1 598 599 + 400 1 598 600 + 401 1 601 602 + 402 1 601 603 + 403 1 604 605 + 404 1 604 606 + 405 1 607 608 + 406 1 607 609 + 407 1 610 611 + 408 1 610 612 + 409 1 613 614 + 410 1 613 615 + 411 1 616 617 + 412 1 616 618 + 413 1 619 620 + 414 1 619 621 + 415 1 622 623 + 416 1 622 624 + 417 1 625 626 + 418 1 625 627 + 419 1 628 629 + 420 1 628 630 + 421 1 631 632 + 422 1 631 633 + 423 1 634 635 + 424 1 634 636 + 425 1 637 638 + 426 1 637 639 + 427 1 640 641 + 428 1 640 642 + 429 1 643 644 + 430 1 643 645 + 431 1 646 647 + 432 1 646 648 + 433 1 649 650 + 434 1 649 651 + 435 1 652 653 + 436 1 652 654 + 437 1 655 656 + 438 1 655 657 + 439 1 658 659 + 440 1 658 660 + 441 1 661 662 + 442 1 661 663 + 443 1 664 665 + 444 1 664 666 + 445 1 667 668 + 446 1 667 669 + 447 1 670 671 + 448 1 670 672 + 449 1 673 674 + 450 1 673 675 + 451 1 676 677 + 452 1 676 678 + 453 1 679 680 + 454 1 679 681 + 455 1 682 683 + 456 1 682 684 + 457 1 685 686 + 458 1 685 687 + 459 1 688 689 + 460 1 688 690 + 461 1 691 692 + 462 1 691 693 + 463 1 694 695 + 464 1 694 696 + 465 1 697 698 + 466 1 697 699 + 467 1 700 701 + 468 1 700 702 + 469 1 703 704 + 470 1 703 705 + 471 1 706 707 + 472 1 706 708 + 473 1 709 710 + 474 1 709 711 + 475 1 712 713 + 476 1 712 714 + 477 1 715 716 + 478 1 715 717 + 479 1 718 719 + 480 1 718 720 + 481 1 721 722 + 482 1 721 723 + 483 1 724 725 + 484 1 724 726 + 485 1 727 728 + 486 1 727 729 + 487 1 730 731 + 488 1 730 732 + 489 1 733 734 + 490 1 733 735 + 491 1 736 737 + 492 1 736 738 + 493 1 739 740 + 494 1 739 741 + 495 1 742 743 + 496 1 742 744 + 497 1 745 746 + 498 1 745 747 + 499 1 748 749 + 500 1 748 750 + 501 1 751 752 + 502 1 751 753 + 503 1 754 755 + 504 1 754 756 + 505 1 757 758 + 506 1 757 759 + 507 1 760 761 + 508 1 760 762 + 509 1 763 764 + 510 1 763 765 + 511 1 766 767 + 512 1 766 768 + 513 1 769 770 + 514 1 769 771 + 515 1 772 773 + 516 1 772 774 + 517 1 775 776 + 518 1 775 777 + 519 1 778 779 + 520 1 778 780 + 521 1 781 782 + 522 1 781 783 + 523 1 784 785 + 524 1 784 786 + 525 1 787 788 + 526 1 787 789 + 527 1 790 791 + 528 1 790 792 + 529 1 793 794 + 530 1 793 795 + 531 1 796 797 + 532 1 796 798 + 533 1 799 800 + 534 1 799 801 + 535 1 802 803 + 536 1 802 804 + 537 1 805 806 + 538 1 805 807 + 539 1 808 809 + 540 1 808 810 + 541 1 811 812 + 542 1 811 813 + 543 1 814 815 + 544 1 814 816 + 545 1 817 818 + 546 1 817 819 + 547 1 820 821 + 548 1 820 822 + 549 1 823 824 + 550 1 823 825 + 551 1 826 827 + 552 1 826 828 + 553 1 829 830 + 554 1 829 831 + 555 1 832 833 + 556 1 832 834 + 557 1 835 836 + 558 1 835 837 + 559 1 838 839 + 560 1 838 840 + 561 1 841 842 + 562 1 841 843 + 563 1 844 845 + 564 1 844 846 + 565 1 847 848 + 566 1 847 849 + 567 1 850 851 + 568 1 850 852 + 569 1 853 854 + 570 1 853 855 + 571 1 856 857 + 572 1 856 858 + 573 1 859 860 + 574 1 859 861 + 575 1 862 863 + 576 1 862 864 + 577 1 865 866 + 578 1 865 867 + 579 1 868 869 + 580 1 868 870 + 581 1 871 872 + 582 1 871 873 + 583 1 874 875 + 584 1 874 876 + 585 1 877 878 + 586 1 877 879 + 587 1 880 881 + 588 1 880 882 + 589 1 883 884 + 590 1 883 885 + 591 1 886 887 + 592 1 886 888 + 593 1 889 890 + 594 1 889 891 + 595 1 892 893 + 596 1 892 894 + 597 1 895 896 + 598 1 895 897 + 599 1 898 899 + 600 1 898 900 + 601 1 901 902 + 602 1 901 903 + 603 1 904 905 + 604 1 904 906 + 605 1 907 908 + 606 1 907 909 + 607 1 910 911 + 608 1 910 912 + 609 1 913 914 + 610 1 913 915 + 611 1 916 917 + 612 1 916 918 + 613 1 919 920 + 614 1 919 921 + 615 1 922 923 + 616 1 922 924 + 617 1 925 926 + 618 1 925 927 + 619 1 928 929 + 620 1 928 930 + 621 1 931 932 + 622 1 931 933 + 623 1 934 935 + 624 1 934 936 + 625 1 937 938 + 626 1 937 939 + 627 1 940 941 + 628 1 940 942 + 629 1 943 944 + 630 1 943 945 + 631 1 946 947 + 632 1 946 948 + 633 1 949 950 + 634 1 949 951 + 635 1 952 953 + 636 1 952 954 + 637 1 955 956 + 638 1 955 957 + 639 1 958 959 + 640 1 958 960 + 641 1 961 962 + 642 1 961 963 + 643 1 964 965 + 644 1 964 966 + 645 1 967 968 + 646 1 967 969 + 647 1 970 971 + 648 1 970 972 + 649 1 973 974 + 650 1 973 975 + 651 1 976 977 + 652 1 976 978 + 653 1 979 980 + 654 1 979 981 + 655 1 982 983 + 656 1 982 984 + 657 1 985 986 + 658 1 985 987 + 659 1 988 989 + 660 1 988 990 + 661 1 991 992 + 662 1 991 993 + 663 1 994 995 + 664 1 994 996 + 665 1 997 998 + 666 1 997 999 + 667 1 1000 1001 + 668 1 1000 1002 + 669 1 1003 1004 + 670 1 1003 1005 + 671 1 1006 1007 + 672 1 1006 1008 + 673 1 1009 1010 + 674 1 1009 1011 + 675 1 1012 1013 + 676 1 1012 1014 + 677 1 1015 1016 + 678 1 1015 1017 + 679 1 1018 1019 + 680 1 1018 1020 + 681 1 1021 1022 + 682 1 1021 1023 + 683 1 1024 1025 + 684 1 1024 1026 + 685 1 1027 1028 + 686 1 1027 1029 + 687 1 1030 1031 + 688 1 1030 1032 + 689 1 1033 1034 + 690 1 1033 1035 + 691 1 1036 1037 + 692 1 1036 1038 + 693 1 1039 1040 + 694 1 1039 1041 + 695 1 1042 1043 + 696 1 1042 1044 + 697 1 1045 1046 + 698 1 1045 1047 + 699 1 1048 1049 + 700 1 1048 1050 + 701 1 1051 1052 + 702 1 1051 1053 + 703 1 1054 1055 + 704 1 1054 1056 + 705 1 1057 1058 + 706 1 1057 1059 + 707 1 1060 1061 + 708 1 1060 1062 + 709 1 1063 1064 + 710 1 1063 1065 + 711 1 1066 1067 + 712 1 1066 1068 + 713 1 1069 1070 + 714 1 1069 1071 + 715 1 1072 1073 + 716 1 1072 1074 + 717 1 1075 1076 + 718 1 1075 1077 + 719 1 1078 1079 + 720 1 1078 1080 + 721 1 1081 1082 + 722 1 1081 1083 + 723 1 1084 1085 + 724 1 1084 1086 + 725 1 1087 1088 + 726 1 1087 1089 + 727 1 1090 1091 + 728 1 1090 1092 + 729 1 1093 1094 + 730 1 1093 1095 + 731 1 1096 1097 + 732 1 1096 1098 + 733 1 1099 1100 + 734 1 1099 1101 + 735 1 1102 1103 + 736 1 1102 1104 + 737 1 1105 1106 + 738 1 1105 1107 + 739 1 1108 1109 + 740 1 1108 1110 + 741 1 1111 1112 + 742 1 1111 1113 + 743 1 1114 1115 + 744 1 1114 1116 + 745 1 1117 1118 + 746 1 1117 1119 + 747 1 1120 1121 + 748 1 1120 1122 + 749 1 1123 1124 + 750 1 1123 1125 + 751 1 1126 1127 + 752 1 1126 1128 + 753 1 1129 1130 + 754 1 1129 1131 + 755 1 1132 1133 + 756 1 1132 1134 + 757 1 1135 1136 + 758 1 1135 1137 + 759 1 1138 1139 + 760 1 1138 1140 + 761 1 1141 1142 + 762 1 1141 1143 + 763 1 1144 1145 + 764 1 1144 1146 + 765 1 1147 1148 + 766 1 1147 1149 + 767 1 1150 1151 + 768 1 1150 1152 + 769 1 1153 1154 + 770 1 1153 1155 + 771 1 1156 1157 + 772 1 1156 1158 + 773 1 1159 1160 + 774 1 1159 1161 + 775 1 1162 1163 + 776 1 1162 1164 + 777 1 1165 1166 + 778 1 1165 1167 + 779 1 1168 1169 + 780 1 1168 1170 + 781 1 1171 1172 + 782 1 1171 1173 + 783 1 1174 1175 + 784 1 1174 1176 + 785 1 1177 1178 + 786 1 1177 1179 + 787 1 1180 1181 + 788 1 1180 1182 + 789 1 1183 1184 + 790 1 1183 1185 + 791 1 1186 1187 + 792 1 1186 1188 + 793 1 1189 1190 + 794 1 1189 1191 + 795 1 1192 1193 + 796 1 1192 1194 + 797 1 1195 1196 + 798 1 1195 1197 + 799 1 1198 1199 + 800 1 1198 1200 + 801 1 1201 1202 + 802 1 1201 1203 + 803 1 1204 1205 + 804 1 1204 1206 + 805 1 1207 1208 + 806 1 1207 1209 + 807 1 1210 1211 + 808 1 1210 1212 + 809 1 1213 1214 + 810 1 1213 1215 + 811 1 1216 1217 + 812 1 1216 1218 + 813 1 1219 1220 + 814 1 1219 1221 + 815 1 1222 1223 + 816 1 1222 1224 + 817 1 1225 1226 + 818 1 1225 1227 + 819 1 1228 1229 + 820 1 1228 1230 + 821 1 1231 1232 + 822 1 1231 1233 + 823 1 1234 1235 + 824 1 1234 1236 + 825 1 1237 1238 + 826 1 1237 1239 + 827 1 1240 1241 + 828 1 1240 1242 + 829 1 1243 1244 + 830 1 1243 1245 + 831 1 1246 1247 + 832 1 1246 1248 + 833 1 1249 1250 + 834 1 1249 1251 + 835 1 1252 1253 + 836 1 1252 1254 + 837 1 1255 1256 + 838 1 1255 1257 + 839 1 1258 1259 + 840 1 1258 1260 + 841 1 1261 1262 + 842 1 1261 1263 + 843 1 1264 1265 + 844 1 1264 1266 + 845 1 1267 1268 + 846 1 1267 1269 + 847 1 1270 1271 + 848 1 1270 1272 + 849 1 1273 1274 + 850 1 1273 1275 + 851 1 1276 1277 + 852 1 1276 1278 + 853 1 1279 1280 + 854 1 1279 1281 + 855 1 1282 1283 + 856 1 1282 1284 + 857 1 1285 1286 + 858 1 1285 1287 + 859 1 1288 1289 + 860 1 1288 1290 + 861 1 1291 1292 + 862 1 1291 1293 + 863 1 1294 1295 + 864 1 1294 1296 + 865 1 1297 1298 + 866 1 1297 1299 + 867 1 1300 1301 + 868 1 1300 1302 + 869 1 1303 1304 + 870 1 1303 1305 + 871 1 1306 1307 + 872 1 1306 1308 + 873 1 1309 1310 + 874 1 1309 1311 + 875 1 1312 1313 + 876 1 1312 1314 + 877 1 1315 1316 + 878 1 1315 1317 + 879 1 1318 1319 + 880 1 1318 1320 + 881 1 1321 1322 + 882 1 1321 1323 + 883 1 1324 1325 + 884 1 1324 1326 + 885 1 1327 1328 + 886 1 1327 1329 + 887 1 1330 1331 + 888 1 1330 1332 + 889 1 1333 1334 + 890 1 1333 1335 + 891 1 1336 1337 + 892 1 1336 1338 + 893 1 1339 1340 + 894 1 1339 1341 + 895 1 1342 1343 + 896 1 1342 1344 + 897 1 1345 1346 + 898 1 1345 1347 + 899 1 1348 1349 + 900 1 1348 1350 + 901 1 1351 1352 + 902 1 1351 1353 + 903 1 1354 1355 + 904 1 1354 1356 + 905 1 1357 1358 + 906 1 1357 1359 + 907 1 1360 1361 + 908 1 1360 1362 + 909 1 1363 1364 + 910 1 1363 1365 + 911 1 1366 1367 + 912 1 1366 1368 + 913 1 1369 1370 + 914 1 1369 1371 + 915 1 1372 1373 + 916 1 1372 1374 + 917 1 1375 1376 + 918 1 1375 1377 + 919 1 1378 1379 + 920 1 1378 1380 + 921 1 1381 1382 + 922 1 1381 1383 + 923 1 1384 1385 + 924 1 1384 1386 + 925 1 1387 1388 + 926 1 1387 1389 + 927 1 1390 1391 + 928 1 1390 1392 + 929 1 1393 1394 + 930 1 1393 1395 + 931 1 1396 1397 + 932 1 1396 1398 + 933 1 1399 1400 + 934 1 1399 1401 + 935 1 1402 1403 + 936 1 1402 1404 + 937 1 1405 1406 + 938 1 1405 1407 + 939 1 1408 1409 + 940 1 1408 1410 + 941 1 1411 1412 + 942 1 1411 1413 + 943 1 1414 1415 + 944 1 1414 1416 + 945 1 1417 1418 + 946 1 1417 1419 + 947 1 1420 1421 + 948 1 1420 1422 + 949 1 1423 1424 + 950 1 1423 1425 + 951 1 1426 1427 + 952 1 1426 1428 + 953 1 1429 1430 + 954 1 1429 1431 + 955 1 1432 1433 + 956 1 1432 1434 + 957 1 1435 1436 + 958 1 1435 1437 + 959 1 1438 1439 + 960 1 1438 1440 + 961 1 1441 1442 + 962 1 1441 1443 + 963 1 1444 1445 + 964 1 1444 1446 + 965 1 1447 1448 + 966 1 1447 1449 + 967 1 1450 1451 + 968 1 1450 1452 + 969 1 1453 1454 + 970 1 1453 1455 + 971 1 1456 1457 + 972 1 1456 1458 + 973 1 1459 1460 + 974 1 1459 1461 + 975 1 1462 1463 + 976 1 1462 1464 + 977 1 1465 1466 + 978 1 1465 1467 + 979 1 1468 1469 + 980 1 1468 1470 + 981 1 1471 1472 + 982 1 1471 1473 + 983 1 1474 1475 + 984 1 1474 1476 + 985 1 1477 1478 + 986 1 1477 1479 + 987 1 1480 1481 + 988 1 1480 1482 + 989 1 1483 1484 + 990 1 1483 1485 + 991 1 1486 1487 + 992 1 1486 1488 + 993 1 1489 1490 + 994 1 1489 1491 + 995 1 1492 1493 + 996 1 1492 1494 + 997 1 1495 1496 + 998 1 1495 1497 + 999 1 1498 1499 + 1000 1 1498 1500 + 1001 1 1501 1502 + 1002 1 1501 1503 + 1003 1 1504 1505 + 1004 1 1504 1506 + 1005 1 1507 1508 + 1006 1 1507 1509 + 1007 1 1510 1511 + 1008 1 1510 1512 + 1009 1 1513 1514 + 1010 1 1513 1515 + 1011 1 1516 1517 + 1012 1 1516 1518 + 1013 1 1519 1520 + 1014 1 1519 1521 + 1015 1 1522 1523 + 1016 1 1522 1524 + 1017 1 1525 1526 + 1018 1 1525 1527 + 1019 1 1528 1529 + 1020 1 1528 1530 + 1021 1 1531 1532 + 1022 1 1531 1533 + 1023 1 1534 1535 + 1024 1 1534 1536 + 1025 1 1537 1538 + 1026 1 1537 1539 + 1027 1 1540 1541 + 1028 1 1540 1542 + 1029 1 1543 1544 + 1030 1 1543 1545 + 1031 1 1546 1547 + 1032 1 1546 1548 + 1033 1 1549 1550 + 1034 1 1549 1551 + 1035 1 1552 1553 + 1036 1 1552 1554 + 1037 1 1555 1556 + 1038 1 1555 1557 + 1039 1 1558 1559 + 1040 1 1558 1560 + 1041 1 1561 1562 + 1042 1 1561 1563 + 1043 1 1564 1565 + 1044 1 1564 1566 + 1045 1 1567 1568 + 1046 1 1567 1569 + 1047 1 1570 1571 + 1048 1 1570 1572 + 1049 1 1573 1574 + 1050 1 1573 1575 + 1051 1 1576 1577 + 1052 1 1576 1578 + 1053 1 1579 1580 + 1054 1 1579 1581 + 1055 1 1582 1583 + 1056 1 1582 1584 + 1057 1 1585 1586 + 1058 1 1585 1587 + 1059 1 1588 1589 + 1060 1 1588 1590 + 1061 1 1591 1592 + 1062 1 1591 1593 + 1063 1 1594 1595 + 1064 1 1594 1596 + 1065 1 1597 1598 + 1066 1 1597 1599 + 1067 1 1600 1601 + 1068 1 1600 1602 + 1069 1 1603 1604 + 1070 1 1603 1605 + 1071 1 1606 1607 + 1072 1 1606 1608 + 1073 1 1609 1610 + 1074 1 1609 1611 + 1075 1 1612 1613 + 1076 1 1612 1614 + 1077 1 1615 1616 + 1078 1 1615 1617 + 1079 1 1618 1619 + 1080 1 1618 1620 + 1081 1 1621 1622 + 1082 1 1621 1623 + 1083 1 1624 1625 + 1084 1 1624 1626 + 1085 1 1627 1628 + 1086 1 1627 1629 + 1087 1 1630 1631 + 1088 1 1630 1632 + 1089 1 1633 1634 + 1090 1 1633 1635 + 1091 1 1636 1637 + 1092 1 1636 1638 + 1093 1 1639 1640 + 1094 1 1639 1641 + 1095 1 1642 1643 + 1096 1 1642 1644 + 1097 1 1645 1646 + 1098 1 1645 1647 + 1099 1 1648 1649 + 1100 1 1648 1650 + 1101 1 1651 1652 + 1102 1 1651 1653 + 1103 1 1654 1655 + 1104 1 1654 1656 + 1105 1 1657 1658 + 1106 1 1657 1659 + 1107 1 1660 1661 + 1108 1 1660 1662 + 1109 1 1663 1664 + 1110 1 1663 1665 + 1111 1 1666 1667 + 1112 1 1666 1668 + 1113 1 1669 1670 + 1114 1 1669 1671 + 1115 1 1672 1673 + 1116 1 1672 1674 + 1117 1 1675 1676 + 1118 1 1675 1677 + 1119 1 1678 1679 + 1120 1 1678 1680 + 1121 1 1681 1682 + 1122 1 1681 1683 + 1123 1 1684 1685 + 1124 1 1684 1686 + 1125 1 1687 1688 + 1126 1 1687 1689 + 1127 1 1690 1691 + 1128 1 1690 1692 + 1129 1 1693 1694 + 1130 1 1693 1695 + 1131 1 1696 1697 + 1132 1 1696 1698 + 1133 1 1699 1700 + 1134 1 1699 1701 + 1135 1 1702 1703 + 1136 1 1702 1704 + 1137 1 1705 1706 + 1138 1 1705 1707 + 1139 1 1708 1709 + 1140 1 1708 1710 + 1141 1 1711 1712 + 1142 1 1711 1713 + 1143 1 1714 1715 + 1144 1 1714 1716 + 1145 1 1717 1718 + 1146 1 1717 1719 + 1147 1 1720 1721 + 1148 1 1720 1722 + 1149 1 1723 1724 + 1150 1 1723 1725 + 1151 1 1726 1727 + 1152 1 1726 1728 + 1153 1 1729 1730 + 1154 1 1729 1731 + 1155 1 1732 1733 + 1156 1 1732 1734 + 1157 1 1735 1736 + 1158 1 1735 1737 + 1159 1 1738 1739 + 1160 1 1738 1740 + 1161 1 1741 1742 + 1162 1 1741 1743 + 1163 1 1744 1745 + 1164 1 1744 1746 + 1165 1 1747 1748 + 1166 1 1747 1749 + 1167 1 1750 1751 + 1168 1 1750 1752 + 1169 1 1753 1754 + 1170 1 1753 1755 + 1171 1 1756 1757 + 1172 1 1756 1758 + 1173 1 1759 1760 + 1174 1 1759 1761 + 1175 1 1762 1763 + 1176 1 1762 1764 + 1177 1 1765 1766 + 1178 1 1765 1767 + 1179 1 1768 1769 + 1180 1 1768 1770 + 1181 1 1771 1772 + 1182 1 1771 1773 + 1183 1 1774 1775 + 1184 1 1774 1776 + 1185 1 1777 1778 + 1186 1 1777 1779 + 1187 1 1780 1781 + 1188 1 1780 1782 + 1189 1 1783 1784 + 1190 1 1783 1785 + 1191 1 1786 1787 + 1192 1 1786 1788 + 1193 1 1789 1790 + 1194 1 1789 1791 + 1195 1 1792 1793 + 1196 1 1792 1794 + 1197 1 1795 1796 + 1198 1 1795 1797 + 1199 1 1798 1799 + 1200 1 1798 1800 + 1201 1 1801 1802 + 1202 1 1801 1803 + 1203 1 1804 1805 + 1204 1 1804 1806 + 1205 1 1807 1808 + 1206 1 1807 1809 + 1207 1 1810 1811 + 1208 1 1810 1812 + 1209 1 1813 1814 + 1210 1 1813 1815 + 1211 1 1816 1817 + 1212 1 1816 1818 + 1213 1 1819 1820 + 1214 1 1819 1821 + 1215 1 1822 1823 + 1216 1 1822 1824 + 1217 1 1825 1826 + 1218 1 1825 1827 + 1219 1 1828 1829 + 1220 1 1828 1830 + 1221 1 1831 1832 + 1222 1 1831 1833 + 1223 1 1834 1835 + 1224 1 1834 1836 + 1225 1 1837 1838 + 1226 1 1837 1839 + 1227 1 1840 1841 + 1228 1 1840 1842 + 1229 1 1843 1844 + 1230 1 1843 1845 + 1231 1 1846 1847 + 1232 1 1846 1848 + 1233 1 1849 1850 + 1234 1 1849 1851 + 1235 1 1852 1853 + 1236 1 1852 1854 + 1237 1 1855 1856 + 1238 1 1855 1857 + 1239 1 1858 1859 + 1240 1 1858 1860 + 1241 1 1861 1862 + 1242 1 1861 1863 + 1243 1 1864 1865 + 1244 1 1864 1866 + 1245 1 1867 1868 + 1246 1 1867 1869 + 1247 1 1870 1871 + 1248 1 1870 1872 + 1249 1 1873 1874 + 1250 1 1873 1875 + 1251 1 1876 1877 + 1252 1 1876 1878 + 1253 1 1879 1880 + 1254 1 1879 1881 + 1255 1 1882 1883 + 1256 1 1882 1884 + 1257 1 1885 1886 + 1258 1 1885 1887 + 1259 1 1888 1889 + 1260 1 1888 1890 + 1261 1 1891 1892 + 1262 1 1891 1893 + 1263 1 1894 1895 + 1264 1 1894 1896 + 1265 1 1897 1898 + 1266 1 1897 1899 + 1267 1 1900 1901 + 1268 1 1900 1902 + 1269 1 1903 1904 + 1270 1 1903 1905 + 1271 1 1906 1907 + 1272 1 1906 1908 + 1273 1 1909 1910 + 1274 1 1909 1911 + 1275 1 1912 1913 + 1276 1 1912 1914 + 1277 1 1915 1916 + 1278 1 1915 1917 + 1279 1 1918 1919 + 1280 1 1918 1920 + 1281 1 1921 1922 + 1282 1 1921 1923 + 1283 1 1924 1925 + 1284 1 1924 1926 + 1285 1 1927 1928 + 1286 1 1927 1929 + 1287 1 1930 1931 + 1288 1 1930 1932 + 1289 1 1933 1934 + 1290 1 1933 1935 + 1291 1 1936 1937 + 1292 1 1936 1938 + 1293 1 1939 1940 + 1294 1 1939 1941 + 1295 1 1942 1943 + 1296 1 1942 1944 + 1297 1 1945 1946 + 1298 1 1945 1947 + 1299 1 1948 1949 + 1300 1 1948 1950 + 1301 1 1951 1952 + 1302 1 1951 1953 + 1303 1 1954 1955 + 1304 1 1954 1956 + 1305 1 1957 1958 + 1306 1 1957 1959 + 1307 1 1960 1961 + 1308 1 1960 1962 + 1309 1 1963 1964 + 1310 1 1963 1965 + 1311 1 1966 1967 + 1312 1 1966 1968 + 1313 1 1969 1970 + 1314 1 1969 1971 + 1315 1 1972 1973 + 1316 1 1972 1974 + 1317 1 1975 1976 + 1318 1 1975 1977 + 1319 1 1978 1979 + 1320 1 1978 1980 + 1321 1 1981 1982 + 1322 1 1981 1983 + 1323 1 1984 1985 + 1324 1 1984 1986 + 1325 1 1987 1988 + 1326 1 1987 1989 + 1327 1 1990 1991 + 1328 1 1990 1992 + 1329 1 1993 1994 + 1330 1 1993 1995 + 1331 1 1996 1997 + 1332 1 1996 1998 + 1333 1 1999 2000 + 1334 1 1999 2001 + 1335 1 2002 2003 + 1336 1 2002 2004 + 1337 1 2005 2006 + 1338 1 2005 2007 + 1339 1 2008 2009 + 1340 1 2008 2010 + 1341 1 2011 2012 + 1342 1 2011 2013 + 1343 1 2014 2015 + 1344 1 2014 2016 + 1345 1 2017 2018 + 1346 1 2017 2019 + 1347 1 2020 2021 + 1348 1 2020 2022 + 1349 1 2023 2024 + 1350 1 2023 2025 + 1351 1 2026 2027 + 1352 1 2026 2028 + 1353 1 2029 2030 + 1354 1 2029 2031 + 1355 1 2032 2033 + 1356 1 2032 2034 + 1357 1 2035 2036 + 1358 1 2035 2037 + 1359 1 2038 2039 + 1360 1 2038 2040 + 1361 1 2041 2042 + 1362 1 2041 2043 + 1363 1 2044 2045 + 1364 1 2044 2046 + 1365 1 2047 2048 + 1366 1 2047 2049 + 1367 1 2050 2051 + 1368 1 2050 2052 + 1369 1 2053 2054 + 1370 1 2053 2055 + 1371 1 2056 2057 + 1372 1 2056 2058 + 1373 1 2059 2060 + 1374 1 2059 2061 + 1375 1 2062 2063 + 1376 1 2062 2064 + 1377 1 2065 2066 + 1378 1 2065 2067 + 1379 1 2068 2069 + 1380 1 2068 2070 + 1381 1 2071 2072 + 1382 1 2071 2073 + 1383 1 2074 2075 + 1384 1 2074 2076 + 1385 1 2077 2078 + 1386 1 2077 2079 + 1387 1 2080 2081 + 1388 1 2080 2082 + 1389 1 2083 2084 + 1390 1 2083 2085 + 1391 1 2086 2087 + 1392 1 2086 2088 + 1393 1 2089 2090 + 1394 1 2089 2091 + 1395 1 2092 2093 + 1396 1 2092 2094 + 1397 1 2095 2096 + 1398 1 2095 2097 + 1399 1 2098 2099 + 1400 1 2098 2100 + 1401 1 2101 2102 + 1402 1 2101 2103 + 1403 1 2104 2105 + 1404 1 2104 2106 + 1405 1 2107 2108 + 1406 1 2107 2109 + 1407 1 2110 2111 + 1408 1 2110 2112 + 1409 1 2113 2114 + 1410 1 2113 2115 + 1411 1 2116 2117 + 1412 1 2116 2118 + 1413 1 2119 2120 + 1414 1 2119 2121 + 1415 1 2122 2123 + 1416 1 2122 2124 + 1417 1 2125 2126 + 1418 1 2125 2127 + 1419 1 2128 2129 + 1420 1 2128 2130 + 1421 1 2131 2132 + 1422 1 2131 2133 + 1423 1 2134 2135 + 1424 1 2134 2136 + 1425 1 2137 2138 + 1426 1 2137 2139 + 1427 1 2140 2141 + 1428 1 2140 2142 + 1429 1 2143 2144 + 1430 1 2143 2145 + 1431 1 2146 2147 + 1432 1 2146 2148 + 1433 1 2149 2150 + 1434 1 2149 2151 + 1435 1 2152 2153 + 1436 1 2152 2154 + 1437 1 2155 2156 + 1438 1 2155 2157 + 1439 1 2158 2159 + 1440 1 2158 2160 + 1441 1 2161 2162 + 1442 1 2161 2163 + 1443 1 2164 2165 + 1444 1 2164 2166 + 1445 1 2167 2168 + 1446 1 2167 2169 + 1447 1 2170 2171 + 1448 1 2170 2172 + 1449 1 2173 2174 + 1450 1 2173 2175 + 1451 1 2176 2177 + 1452 1 2176 2178 + 1453 1 2179 2180 + 1454 1 2179 2181 + 1455 1 2182 2183 + 1456 1 2182 2184 + 1457 1 2185 2186 + 1458 1 2185 2187 + 1459 1 2188 2189 + 1460 1 2188 2190 + 1461 1 2191 2192 + 1462 1 2191 2193 + 1463 1 2194 2195 + 1464 1 2194 2196 + 1465 1 2197 2198 + 1466 1 2197 2199 + 1467 1 2200 2201 + 1468 1 2200 2202 + 1469 1 2203 2204 + 1470 1 2203 2205 + 1471 1 2206 2207 + 1472 1 2206 2208 + 1473 1 2209 2210 + 1474 1 2209 2211 + 1475 1 2212 2213 + 1476 1 2212 2214 + 1477 1 2215 2216 + 1478 1 2215 2217 + 1479 1 2218 2219 + 1480 1 2218 2220 + 1481 1 2221 2222 + 1482 1 2221 2223 + 1483 1 2224 2225 + 1484 1 2224 2226 + 1485 1 2227 2228 + 1486 1 2227 2229 + 1487 1 2230 2231 + 1488 1 2230 2232 + 1489 1 2233 2234 + 1490 1 2233 2235 + 1491 1 2236 2237 + 1492 1 2236 2238 + 1493 1 2239 2240 + 1494 1 2239 2241 + 1495 1 2242 2243 + 1496 1 2242 2244 + 1497 1 2245 2246 + 1498 1 2245 2247 + 1499 1 2248 2249 + 1500 1 2248 2250 + 1501 1 2251 2252 + 1502 1 2251 2253 + 1503 1 2254 2255 + 1504 1 2254 2256 + 1505 1 2257 2258 + 1506 1 2257 2259 + 1507 1 2260 2261 + 1508 1 2260 2262 + 1509 1 2263 2264 + 1510 1 2263 2265 + 1511 1 2266 2267 + 1512 1 2266 2268 + 1513 1 2269 2270 + 1514 1 2269 2271 + 1515 1 2272 2273 + 1516 1 2272 2274 + 1517 1 2275 2276 + 1518 1 2275 2277 + 1519 1 2278 2279 + 1520 1 2278 2280 + 1521 1 2281 2282 + 1522 1 2281 2283 + 1523 1 2284 2285 + 1524 1 2284 2286 + 1525 1 2287 2288 + 1526 1 2287 2289 + 1527 1 2290 2291 + 1528 1 2290 2292 + 1529 1 2293 2294 + 1530 1 2293 2295 + 1531 1 2296 2297 + 1532 1 2296 2298 + 1533 1 2299 2300 + 1534 1 2299 2301 + 1535 1 2302 2303 + 1536 1 2302 2304 + 1537 1 2305 2306 + 1538 1 2305 2307 + 1539 1 2308 2309 + 1540 1 2308 2310 + 1541 1 2311 2312 + 1542 1 2311 2313 + 1543 1 2314 2315 + 1544 1 2314 2316 + 1545 1 2317 2318 + 1546 1 2317 2319 + 1547 1 2320 2321 + 1548 1 2320 2322 + 1549 1 2323 2324 + 1550 1 2323 2325 + 1551 1 2326 2327 + 1552 1 2326 2328 + 1553 1 2329 2330 + 1554 1 2329 2331 + 1555 1 2332 2333 + 1556 1 2332 2334 + 1557 1 2335 2336 + 1558 1 2335 2337 + 1559 1 2338 2339 + 1560 1 2338 2340 + 1561 1 2341 2342 + 1562 1 2341 2343 + 1563 1 2344 2345 + 1564 1 2344 2346 + 1565 1 2347 2348 + 1566 1 2347 2349 + 1567 1 2350 2351 + 1568 1 2350 2352 + 1569 1 2353 2354 + 1570 1 2353 2355 + 1571 1 2356 2357 + 1572 1 2356 2358 + 1573 1 2359 2360 + 1574 1 2359 2361 + 1575 1 2362 2363 + 1576 1 2362 2364 + 1577 1 2365 2366 + 1578 1 2365 2367 + 1579 1 2368 2369 + 1580 1 2368 2370 + 1581 1 2371 2372 + 1582 1 2371 2373 + 1583 1 2374 2375 + 1584 1 2374 2376 + 1585 1 2377 2378 + 1586 1 2377 2379 + 1587 1 2380 2381 + 1588 1 2380 2382 + 1589 1 2383 2384 + 1590 1 2383 2385 + 1591 1 2386 2387 + 1592 1 2386 2388 + 1593 1 2389 2390 + 1594 1 2389 2391 + 1595 1 2392 2393 + 1596 1 2392 2394 + 1597 1 2395 2396 + 1598 1 2395 2397 + 1599 1 2398 2399 + 1600 1 2398 2400 + 1601 1 2401 2402 + 1602 1 2401 2403 + 1603 1 2404 2405 + 1604 1 2404 2406 + 1605 1 2407 2408 + 1606 1 2407 2409 + 1607 1 2410 2411 + 1608 1 2410 2412 + 1609 1 2413 2414 + 1610 1 2413 2415 + 1611 1 2416 2417 + 1612 1 2416 2418 + 1613 1 2419 2420 + 1614 1 2419 2421 + 1615 1 2422 2423 + 1616 1 2422 2424 + 1617 1 2425 2426 + 1618 1 2425 2427 + 1619 1 2428 2429 + 1620 1 2428 2430 + 1621 1 2431 2432 + 1622 1 2431 2433 + 1623 1 2434 2435 + 1624 1 2434 2436 + 1625 1 2437 2438 + 1626 1 2437 2439 + 1627 1 2440 2441 + 1628 1 2440 2442 + 1629 1 2443 2444 + 1630 1 2443 2445 + 1631 1 2446 2447 + 1632 1 2446 2448 + 1633 1 2449 2450 + 1634 1 2449 2451 + 1635 1 2452 2453 + 1636 1 2452 2454 + 1637 1 2455 2456 + 1638 1 2455 2457 + 1639 1 2458 2459 + 1640 1 2458 2460 + 1641 1 2461 2462 + 1642 1 2461 2463 + 1643 1 2464 2465 + 1644 1 2464 2466 + 1645 1 2467 2468 + 1646 1 2467 2469 + 1647 1 2470 2471 + 1648 1 2470 2472 + 1649 1 2473 2474 + 1650 1 2473 2475 + 1651 1 2476 2477 + 1652 1 2476 2478 + 1653 1 2479 2480 + 1654 1 2479 2481 + 1655 1 2482 2483 + 1656 1 2482 2484 + 1657 1 2485 2486 + 1658 1 2485 2487 + 1659 1 2488 2489 + 1660 1 2488 2490 + 1661 1 2491 2492 + 1662 1 2491 2493 + 1663 1 2494 2495 + 1664 1 2494 2496 + 1665 1 2497 2498 + 1666 1 2497 2499 + 1667 1 2500 2501 + 1668 1 2500 2502 + 1669 1 2503 2504 + 1670 1 2503 2505 + 1671 1 2506 2507 + 1672 1 2506 2508 + 1673 1 2509 2510 + 1674 1 2509 2511 + 1675 1 2512 2513 + 1676 1 2512 2514 + 1677 1 2515 2516 + 1678 1 2515 2517 + 1679 1 2518 2519 + 1680 1 2518 2520 + 1681 1 2521 2522 + 1682 1 2521 2523 + 1683 1 2524 2525 + 1684 1 2524 2526 + 1685 1 2527 2528 + 1686 1 2527 2529 + 1687 1 2530 2531 + 1688 1 2530 2532 + 1689 1 2533 2534 + 1690 1 2533 2535 + 1691 1 2536 2537 + 1692 1 2536 2538 + 1693 1 2539 2540 + 1694 1 2539 2541 + 1695 1 2542 2543 + 1696 1 2542 2544 + 1697 1 2545 2546 + 1698 1 2545 2547 + 1699 1 2548 2549 + 1700 1 2548 2550 + 1701 1 2551 2552 + 1702 1 2551 2553 + 1703 1 2554 2555 + 1704 1 2554 2556 + 1705 1 2557 2558 + 1706 1 2557 2559 + 1707 1 2560 2561 + 1708 1 2560 2562 + 1709 1 2563 2564 + 1710 1 2563 2565 + 1711 1 2566 2567 + 1712 1 2566 2568 + 1713 1 2569 2570 + 1714 1 2569 2571 + 1715 1 2572 2573 + 1716 1 2572 2574 + 1717 1 2575 2576 + 1718 1 2575 2577 + 1719 1 2578 2579 + 1720 1 2578 2580 + 1721 1 2581 2582 + 1722 1 2581 2583 + 1723 1 2584 2585 + 1724 1 2584 2586 + 1725 1 2587 2588 + 1726 1 2587 2589 + 1727 1 2590 2591 + 1728 1 2590 2592 + 1729 1 2593 2594 + 1730 1 2593 2595 + 1731 1 2596 2597 + 1732 1 2596 2598 + 1733 1 2599 2600 + 1734 1 2599 2601 + 1735 1 2602 2603 + 1736 1 2602 2604 + 1737 1 2605 2606 + 1738 1 2605 2607 + 1739 1 2608 2609 + 1740 1 2608 2610 + 1741 1 2611 2612 + 1742 1 2611 2613 + 1743 1 2614 2615 + 1744 1 2614 2616 + 1745 1 2617 2618 + 1746 1 2617 2619 + 1747 1 2620 2621 + 1748 1 2620 2622 + 1749 1 2623 2624 + 1750 1 2623 2625 + 1751 1 2626 2627 + 1752 1 2626 2628 + 1753 1 2629 2630 + 1754 1 2629 2631 + 1755 1 2632 2633 + 1756 1 2632 2634 + 1757 1 2635 2636 + 1758 1 2635 2637 + 1759 1 2638 2639 + 1760 1 2638 2640 + 1761 1 2641 2642 + 1762 1 2641 2643 + 1763 1 2644 2645 + 1764 1 2644 2646 + 1765 1 2647 2648 + 1766 1 2647 2649 + 1767 1 2650 2651 + 1768 1 2650 2652 + 1769 1 2653 2654 + 1770 1 2653 2655 + 1771 1 2656 2657 + 1772 1 2656 2658 + 1773 1 2659 2660 + 1774 1 2659 2661 + 1775 1 2662 2663 + 1776 1 2662 2664 + 1777 1 2665 2666 + 1778 1 2665 2667 + 1779 1 2668 2669 + 1780 1 2668 2670 + 1781 1 2671 2672 + 1782 1 2671 2673 + 1783 1 2674 2675 + 1784 1 2674 2676 + 1785 1 2677 2678 + 1786 1 2677 2679 + 1787 1 2680 2681 + 1788 1 2680 2682 + 1789 1 2683 2684 + 1790 1 2683 2685 + 1791 1 2686 2687 + 1792 1 2686 2688 + 1793 1 2689 2690 + 1794 1 2689 2691 + 1795 1 2692 2693 + 1796 1 2692 2694 + 1797 1 2695 2696 + 1798 1 2695 2697 + 1799 1 2698 2699 + 1800 1 2698 2700 + 1801 1 2701 2702 + 1802 1 2701 2703 + 1803 1 2704 2705 + 1804 1 2704 2706 + 1805 1 2707 2708 + 1806 1 2707 2709 + 1807 1 2710 2711 + 1808 1 2710 2712 + 1809 1 2713 2714 + 1810 1 2713 2715 + 1811 1 2716 2717 + 1812 1 2716 2718 + 1813 1 2719 2720 + 1814 1 2719 2721 + 1815 1 2722 2723 + 1816 1 2722 2724 + 1817 1 2725 2726 + 1818 1 2725 2727 + 1819 1 2728 2729 + 1820 1 2728 2730 + 1821 1 2731 2732 + 1822 1 2731 2733 + 1823 1 2734 2735 + 1824 1 2734 2736 + 1825 1 2737 2738 + 1826 1 2737 2739 + 1827 1 2740 2741 + 1828 1 2740 2742 + 1829 1 2743 2744 + 1830 1 2743 2745 + 1831 1 2746 2747 + 1832 1 2746 2748 + 1833 1 2749 2750 + 1834 1 2749 2751 + 1835 1 2752 2753 + 1836 1 2752 2754 + 1837 1 2755 2756 + 1838 1 2755 2757 + 1839 1 2758 2759 + 1840 1 2758 2760 + 1841 1 2761 2762 + 1842 1 2761 2763 + 1843 1 2764 2765 + 1844 1 2764 2766 + 1845 1 2767 2768 + 1846 1 2767 2769 + 1847 1 2770 2771 + 1848 1 2770 2772 + 1849 1 2773 2774 + 1850 1 2773 2775 + 1851 1 2776 2777 + 1852 1 2776 2778 + 1853 1 2779 2780 + 1854 1 2779 2781 + 1855 1 2782 2783 + 1856 1 2782 2784 + 1857 1 2785 2786 + 1858 1 2785 2787 + 1859 1 2788 2789 + 1860 1 2788 2790 + 1861 1 2791 2792 + 1862 1 2791 2793 + 1863 1 2794 2795 + 1864 1 2794 2796 + 1865 1 2797 2798 + 1866 1 2797 2799 + 1867 1 2800 2801 + 1868 1 2800 2802 + 1869 1 2803 2804 + 1870 1 2803 2805 + 1871 1 2806 2807 + 1872 1 2806 2808 + 1873 1 2809 2810 + 1874 1 2809 2811 + 1875 1 2812 2813 + 1876 1 2812 2814 + 1877 1 2815 2816 + 1878 1 2815 2817 + 1879 1 2818 2819 + 1880 1 2818 2820 + 1881 1 2821 2822 + 1882 1 2821 2823 + 1883 1 2824 2825 + 1884 1 2824 2826 + 1885 1 2827 2828 + 1886 1 2827 2829 + 1887 1 2830 2831 + 1888 1 2830 2832 + 1889 1 2833 2834 + 1890 1 2833 2835 + 1891 1 2836 2837 + 1892 1 2836 2838 + 1893 1 2839 2840 + 1894 1 2839 2841 + 1895 1 2842 2843 + 1896 1 2842 2844 + 1897 1 2845 2846 + 1898 1 2845 2847 + 1899 1 2848 2849 + 1900 1 2848 2850 + 1901 1 2851 2852 + 1902 1 2851 2853 + 1903 1 2854 2855 + 1904 1 2854 2856 + 1905 1 2857 2858 + 1906 1 2857 2859 + 1907 1 2860 2861 + 1908 1 2860 2862 + 1909 1 2863 2864 + 1910 1 2863 2865 + 1911 1 2866 2867 + 1912 1 2866 2868 + 1913 1 2869 2870 + 1914 1 2869 2871 + 1915 1 2872 2873 + 1916 1 2872 2874 + 1917 1 2875 2876 + 1918 1 2875 2877 + 1919 1 2878 2879 + 1920 1 2878 2880 + 1921 1 2881 2882 + 1922 1 2881 2883 + 1923 1 2884 2885 + 1924 1 2884 2886 + 1925 1 2887 2888 + 1926 1 2887 2889 + 1927 1 2890 2891 + 1928 1 2890 2892 + 1929 1 2893 2894 + 1930 1 2893 2895 + 1931 1 2896 2897 + 1932 1 2896 2898 + 1933 1 2899 2900 + 1934 1 2899 2901 + 1935 1 2902 2903 + 1936 1 2902 2904 + 1937 1 2905 2906 + 1938 1 2905 2907 + 1939 1 2908 2909 + 1940 1 2908 2910 + 1941 1 2911 2912 + 1942 1 2911 2913 + 1943 1 2914 2915 + 1944 1 2914 2916 + 1945 1 2917 2918 + 1946 1 2917 2919 + 1947 1 2920 2921 + 1948 1 2920 2922 + 1949 1 2923 2924 + 1950 1 2923 2925 + 1951 1 2926 2927 + 1952 1 2926 2928 + 1953 1 2929 2930 + 1954 1 2929 2931 + 1955 1 2932 2933 + 1956 1 2932 2934 + 1957 1 2935 2936 + 1958 1 2935 2937 + 1959 1 2938 2939 + 1960 1 2938 2940 + 1961 1 2941 2942 + 1962 1 2941 2943 + 1963 1 2944 2945 + 1964 1 2944 2946 + 1965 1 2947 2948 + 1966 1 2947 2949 + 1967 1 2950 2951 + 1968 1 2950 2952 + 1969 1 2953 2954 + 1970 1 2953 2955 + 1971 1 2956 2957 + 1972 1 2956 2958 + 1973 1 2959 2960 + 1974 1 2959 2961 + 1975 1 2962 2963 + 1976 1 2962 2964 + 1977 1 2965 2966 + 1978 1 2965 2967 + 1979 1 2968 2969 + 1980 1 2968 2970 + 1981 1 2971 2972 + 1982 1 2971 2973 + 1983 1 2974 2975 + 1984 1 2974 2976 + 1985 1 2977 2978 + 1986 1 2977 2979 + 1987 1 2980 2981 + 1988 1 2980 2982 + 1989 1 2983 2984 + 1990 1 2983 2985 + 1991 1 2986 2987 + 1992 1 2986 2988 + 1993 1 2989 2990 + 1994 1 2989 2991 + 1995 1 2992 2993 + 1996 1 2992 2994 + 1997 1 2995 2996 + 1998 1 2995 2997 + 1999 1 2998 2999 + 2000 1 2998 3000 + 2001 1 3001 3002 + 2002 1 3001 3003 + 2003 1 3004 3005 + 2004 1 3004 3006 + 2005 1 3007 3008 + 2006 1 3007 3009 + 2007 1 3010 3011 + 2008 1 3010 3012 + 2009 1 3013 3014 + 2010 1 3013 3015 + 2011 1 3016 3017 + 2012 1 3016 3018 + 2013 1 3019 3020 + 2014 1 3019 3021 + 2015 1 3022 3023 + 2016 1 3022 3024 + 2017 1 3025 3026 + 2018 1 3025 3027 + 2019 1 3028 3029 + 2020 1 3028 3030 + 2021 1 3031 3032 + 2022 1 3031 3033 + 2023 1 3034 3035 + 2024 1 3034 3036 + 2025 1 3037 3038 + 2026 1 3037 3039 + 2027 1 3040 3041 + 2028 1 3040 3042 + 2029 1 3043 3044 + 2030 1 3043 3045 + 2031 1 3046 3047 + 2032 1 3046 3048 + 2033 1 3049 3050 + 2034 1 3049 3051 + 2035 1 3052 3053 + 2036 1 3052 3054 + 2037 1 3055 3056 + 2038 1 3055 3057 + 2039 1 3058 3059 + 2040 1 3058 3060 + 2041 1 3061 3062 + 2042 1 3061 3063 + 2043 1 3064 3065 + 2044 1 3064 3066 + 2045 1 3067 3068 + 2046 1 3067 3069 + 2047 1 3070 3071 + 2048 1 3070 3072 + 2049 1 3073 3074 + 2050 1 3073 3075 + 2051 1 3076 3077 + 2052 1 3076 3078 + 2053 1 3079 3080 + 2054 1 3079 3081 + 2055 1 3082 3083 + 2056 1 3082 3084 + 2057 1 3085 3086 + 2058 1 3085 3087 + 2059 1 3088 3089 + 2060 1 3088 3090 + 2061 1 3091 3092 + 2062 1 3091 3093 + 2063 1 3094 3095 + 2064 1 3094 3096 + 2065 1 3097 3098 + 2066 1 3097 3099 + 2067 1 3100 3101 + 2068 1 3100 3102 + 2069 1 3103 3104 + 2070 1 3103 3105 + 2071 1 3106 3107 + 2072 1 3106 3108 + 2073 1 3109 3110 + 2074 1 3109 3111 + 2075 1 3112 3113 + 2076 1 3112 3114 + 2077 1 3115 3116 + 2078 1 3115 3117 + 2079 1 3118 3119 + 2080 1 3118 3120 + 2081 1 3121 3122 + 2082 1 3121 3123 + 2083 1 3124 3125 + 2084 1 3124 3126 + 2085 1 3127 3128 + 2086 1 3127 3129 + 2087 1 3130 3131 + 2088 1 3130 3132 + 2089 1 3133 3134 + 2090 1 3133 3135 + 2091 1 3136 3137 + 2092 1 3136 3138 + 2093 1 3139 3140 + 2094 1 3139 3141 + 2095 1 3142 3143 + 2096 1 3142 3144 + 2097 1 3145 3146 + 2098 1 3145 3147 + 2099 1 3148 3149 + 2100 1 3148 3150 + 2101 1 3151 3152 + 2102 1 3151 3153 + 2103 1 3154 3155 + 2104 1 3154 3156 + 2105 1 3157 3158 + 2106 1 3157 3159 + 2107 1 3160 3161 + 2108 1 3160 3162 + 2109 1 3163 3164 + 2110 1 3163 3165 + 2111 1 3166 3167 + 2112 1 3166 3168 + 2113 1 3169 3170 + 2114 1 3169 3171 + 2115 1 3172 3173 + 2116 1 3172 3174 + 2117 1 3175 3176 + 2118 1 3175 3177 + 2119 1 3178 3179 + 2120 1 3178 3180 + 2121 1 3181 3182 + 2122 1 3181 3183 + 2123 1 3184 3185 + 2124 1 3184 3186 + 2125 1 3187 3188 + 2126 1 3187 3189 + 2127 1 3190 3191 + 2128 1 3190 3192 + 2129 1 3193 3194 + 2130 1 3193 3195 + 2131 1 3196 3197 + 2132 1 3196 3198 + 2133 1 3199 3200 + 2134 1 3199 3201 + 2135 1 3202 3203 + 2136 1 3202 3204 + 2137 1 3205 3206 + 2138 1 3205 3207 + 2139 1 3208 3209 + 2140 1 3208 3210 + 2141 1 3211 3212 + 2142 1 3211 3213 + 2143 1 3214 3215 + 2144 1 3214 3216 + 2145 1 3217 3218 + 2146 1 3217 3219 + 2147 1 3220 3221 + 2148 1 3220 3222 + 2149 1 3223 3224 + 2150 1 3223 3225 + 2151 1 3226 3227 + 2152 1 3226 3228 + 2153 1 3229 3230 + 2154 1 3229 3231 + 2155 1 3232 3233 + 2156 1 3232 3234 + 2157 1 3235 3236 + 2158 1 3235 3237 + 2159 1 3238 3239 + 2160 1 3238 3240 + 2161 1 3241 3242 + 2162 1 3241 3243 + 2163 1 3244 3245 + 2164 1 3244 3246 + 2165 1 3247 3248 + 2166 1 3247 3249 + 2167 1 3250 3251 + 2168 1 3250 3252 + 2169 1 3253 3254 + 2170 1 3253 3255 + 2171 1 3256 3257 + 2172 1 3256 3258 + 2173 1 3259 3260 + 2174 1 3259 3261 + 2175 1 3262 3263 + 2176 1 3262 3264 + 2177 1 3265 3266 + 2178 1 3265 3267 + 2179 1 3268 3269 + 2180 1 3268 3270 + 2181 1 3271 3272 + 2182 1 3271 3273 + 2183 1 3274 3275 + 2184 1 3274 3276 + 2185 1 3277 3278 + 2186 1 3277 3279 + 2187 1 3280 3281 + 2188 1 3280 3282 + 2189 1 3283 3284 + 2190 1 3283 3285 + 2191 1 3286 3287 + 2192 1 3286 3288 + 2193 1 3289 3290 + 2194 1 3289 3291 + 2195 1 3292 3293 + 2196 1 3292 3294 + 2197 1 3295 3296 + 2198 1 3295 3297 + 2199 1 3298 3299 + 2200 1 3298 3300 + 2201 1 3301 3302 + 2202 1 3301 3303 + 2203 1 3304 3305 + 2204 1 3304 3306 + 2205 1 3307 3308 + 2206 1 3307 3309 + 2207 1 3310 3311 + 2208 1 3310 3312 + 2209 1 3313 3314 + 2210 1 3313 3315 + 2211 1 3316 3317 + 2212 1 3316 3318 + 2213 1 3319 3320 + 2214 1 3319 3321 + 2215 1 3322 3323 + 2216 1 3322 3324 + 2217 1 3325 3326 + 2218 1 3325 3327 + 2219 1 3328 3329 + 2220 1 3328 3330 + 2221 1 3331 3332 + 2222 1 3331 3333 + 2223 1 3334 3335 + 2224 1 3334 3336 + 2225 1 3337 3338 + 2226 1 3337 3339 + 2227 1 3340 3341 + 2228 1 3340 3342 + 2229 1 3343 3344 + 2230 1 3343 3345 + 2231 1 3346 3347 + 2232 1 3346 3348 + 2233 1 3349 3350 + 2234 1 3349 3351 + 2235 1 3352 3353 + 2236 1 3352 3354 + 2237 1 3355 3356 + 2238 1 3355 3357 + 2239 1 3358 3359 + 2240 1 3358 3360 + 2241 1 3361 3362 + 2242 1 3361 3363 + 2243 1 3364 3365 + 2244 1 3364 3366 + 2245 1 3367 3368 + 2246 1 3367 3369 + 2247 1 3370 3371 + 2248 1 3370 3372 + 2249 1 3373 3374 + 2250 1 3373 3375 + 2251 1 3376 3377 + 2252 1 3376 3378 + 2253 1 3379 3380 + 2254 1 3379 3381 + 2255 1 3382 3383 + 2256 1 3382 3384 + 2257 1 3385 3386 + 2258 1 3385 3387 + 2259 1 3388 3389 + 2260 1 3388 3390 + 2261 1 3391 3392 + 2262 1 3391 3393 + 2263 1 3394 3395 + 2264 1 3394 3396 + 2265 1 3397 3398 + 2266 1 3397 3399 + 2267 1 3400 3401 + 2268 1 3400 3402 + 2269 1 3403 3404 + 2270 1 3403 3405 + 2271 1 3406 3407 + 2272 1 3406 3408 + 2273 1 3409 3410 + 2274 1 3409 3411 + 2275 1 3412 3413 + 2276 1 3412 3414 + 2277 1 3415 3416 + 2278 1 3415 3417 + 2279 1 3418 3419 + 2280 1 3418 3420 + 2281 1 3421 3422 + 2282 1 3421 3423 + 2283 1 3424 3425 + 2284 1 3424 3426 + 2285 1 3427 3428 + 2286 1 3427 3429 + 2287 1 3430 3431 + 2288 1 3430 3432 + 2289 1 3433 3434 + 2290 1 3433 3435 + 2291 1 3436 3437 + 2292 1 3436 3438 + 2293 1 3439 3440 + 2294 1 3439 3441 + 2295 1 3442 3443 + 2296 1 3442 3444 + 2297 1 3445 3446 + 2298 1 3445 3447 + 2299 1 3448 3449 + 2300 1 3448 3450 + 2301 1 3451 3452 + 2302 1 3451 3453 + 2303 1 3454 3455 + 2304 1 3454 3456 + 2305 1 3457 3458 + 2306 1 3457 3459 + 2307 1 3460 3461 + 2308 1 3460 3462 + 2309 1 3463 3464 + 2310 1 3463 3465 + 2311 1 3466 3467 + 2312 1 3466 3468 + 2313 1 3469 3470 + 2314 1 3469 3471 + 2315 1 3472 3473 + 2316 1 3472 3474 + 2317 1 3475 3476 + 2318 1 3475 3477 + 2319 1 3478 3479 + 2320 1 3478 3480 + 2321 1 3481 3482 + 2322 1 3481 3483 + 2323 1 3484 3485 + 2324 1 3484 3486 + 2325 1 3487 3488 + 2326 1 3487 3489 + 2327 1 3490 3491 + 2328 1 3490 3492 + 2329 1 3493 3494 + 2330 1 3493 3495 + 2331 1 3496 3497 + 2332 1 3496 3498 + 2333 1 3499 3500 + 2334 1 3499 3501 + 2335 1 3502 3503 + 2336 1 3502 3504 + 2337 1 3505 3506 + 2338 1 3505 3507 + 2339 1 3508 3509 + 2340 1 3508 3510 + 2341 1 3511 3512 + 2342 1 3511 3513 + 2343 1 3514 3515 + 2344 1 3514 3516 + 2345 1 3517 3518 + 2346 1 3517 3519 + 2347 1 3520 3521 + 2348 1 3520 3522 + 2349 1 3523 3524 + 2350 1 3523 3525 + 2351 1 3526 3527 + 2352 1 3526 3528 + 2353 1 3529 3530 + 2354 1 3529 3531 + 2355 1 3532 3533 + 2356 1 3532 3534 + 2357 1 3535 3536 + 2358 1 3535 3537 + 2359 1 3538 3539 + 2360 1 3538 3540 + 2361 1 3541 3542 + 2362 1 3541 3543 + 2363 1 3544 3545 + 2364 1 3544 3546 + 2365 1 3547 3548 + 2366 1 3547 3549 + 2367 1 3550 3551 + 2368 1 3550 3552 + 2369 1 3553 3554 + 2370 1 3553 3555 + 2371 1 3556 3557 + 2372 1 3556 3558 + 2373 1 3559 3560 + 2374 1 3559 3561 + 2375 1 3562 3563 + 2376 1 3562 3564 + 2377 1 3565 3566 + 2378 1 3565 3567 + 2379 1 3568 3569 + 2380 1 3568 3570 + 2381 1 3571 3572 + 2382 1 3571 3573 + 2383 1 3574 3575 + 2384 1 3574 3576 + 2385 1 3577 3578 + 2386 1 3577 3579 + 2387 1 3580 3581 + 2388 1 3580 3582 + 2389 1 3583 3584 + 2390 1 3583 3585 + 2391 1 3586 3587 + 2392 1 3586 3588 + 2393 1 3589 3590 + 2394 1 3589 3591 + 2395 1 3592 3593 + 2396 1 3592 3594 + 2397 1 3595 3596 + 2398 1 3595 3597 + 2399 1 3598 3599 + 2400 1 3598 3600 + 2401 1 3601 3602 + 2402 1 3601 3603 + 2403 1 3604 3605 + 2404 1 3604 3606 + 2405 1 3607 3608 + 2406 1 3607 3609 + 2407 1 3610 3611 + 2408 1 3610 3612 + 2409 1 3613 3614 + 2410 1 3613 3615 + 2411 1 3616 3617 + 2412 1 3616 3618 + 2413 1 3619 3620 + 2414 1 3619 3621 + 2415 1 3622 3623 + 2416 1 3622 3624 + 2417 1 3625 3626 + 2418 1 3625 3627 + 2419 1 3628 3629 + 2420 1 3628 3630 + 2421 1 3631 3632 + 2422 1 3631 3633 + 2423 1 3634 3635 + 2424 1 3634 3636 + 2425 1 3637 3638 + 2426 1 3637 3639 + 2427 1 3640 3641 + 2428 1 3640 3642 + 2429 1 3643 3644 + 2430 1 3643 3645 + 2431 1 3646 3647 + 2432 1 3646 3648 + 2433 1 3649 3650 + 2434 1 3649 3651 + 2435 1 3652 3653 + 2436 1 3652 3654 + 2437 1 3655 3656 + 2438 1 3655 3657 + 2439 1 3658 3659 + 2440 1 3658 3660 + 2441 1 3661 3662 + 2442 1 3661 3663 + 2443 1 3664 3665 + 2444 1 3664 3666 + 2445 1 3667 3668 + 2446 1 3667 3669 + 2447 1 3670 3671 + 2448 1 3670 3672 + 2449 1 3673 3674 + 2450 1 3673 3675 + 2451 1 3676 3677 + 2452 1 3676 3678 + 2453 1 3679 3680 + 2454 1 3679 3681 + 2455 1 3682 3683 + 2456 1 3682 3684 + 2457 1 3685 3686 + 2458 1 3685 3687 + 2459 1 3688 3689 + 2460 1 3688 3690 + 2461 1 3691 3692 + 2462 1 3691 3693 + 2463 1 3694 3695 + 2464 1 3694 3696 + 2465 1 3697 3698 + 2466 1 3697 3699 + 2467 1 3700 3701 + 2468 1 3700 3702 + 2469 1 3703 3704 + 2470 1 3703 3705 + 2471 1 3706 3707 + 2472 1 3706 3708 + 2473 1 3709 3710 + 2474 1 3709 3711 + 2475 1 3712 3713 + 2476 1 3712 3714 + 2477 1 3715 3716 + 2478 1 3715 3717 + 2479 1 3718 3719 + 2480 1 3718 3720 + 2481 1 3721 3722 + 2482 1 3721 3723 + 2483 1 3724 3725 + 2484 1 3724 3726 + 2485 1 3727 3728 + 2486 1 3727 3729 + 2487 1 3730 3731 + 2488 1 3730 3732 + 2489 1 3733 3734 + 2490 1 3733 3735 + 2491 1 3736 3737 + 2492 1 3736 3738 + 2493 1 3739 3740 + 2494 1 3739 3741 + 2495 1 3742 3743 + 2496 1 3742 3744 + 2497 1 3745 3746 + 2498 1 3745 3747 + 2499 1 3748 3749 + 2500 1 3748 3750 + 2501 1 3751 3752 + 2502 1 3751 3753 + 2503 1 3754 3755 + 2504 1 3754 3756 + 2505 1 3757 3758 + 2506 1 3757 3759 + 2507 1 3760 3761 + 2508 1 3760 3762 + 2509 1 3763 3764 + 2510 1 3763 3765 + 2511 1 3766 3767 + 2512 1 3766 3768 + 2513 1 3769 3770 + 2514 1 3769 3771 + 2515 1 3772 3773 + 2516 1 3772 3774 + 2517 1 3775 3776 + 2518 1 3775 3777 + 2519 1 3778 3779 + 2520 1 3778 3780 + 2521 1 3781 3782 + 2522 1 3781 3783 + 2523 1 3784 3785 + 2524 1 3784 3786 + 2525 1 3787 3788 + 2526 1 3787 3789 + 2527 1 3790 3791 + 2528 1 3790 3792 + 2529 1 3793 3794 + 2530 1 3793 3795 + 2531 1 3796 3797 + 2532 1 3796 3798 + 2533 1 3799 3800 + 2534 1 3799 3801 + 2535 1 3802 3803 + 2536 1 3802 3804 + 2537 1 3805 3806 + 2538 1 3805 3807 + 2539 1 3808 3809 + 2540 1 3808 3810 + 2541 1 3811 3812 + 2542 1 3811 3813 + 2543 1 3814 3815 + 2544 1 3814 3816 + 2545 1 3817 3818 + 2546 1 3817 3819 + 2547 1 3820 3821 + 2548 1 3820 3822 + 2549 1 3823 3824 + 2550 1 3823 3825 + 2551 1 3826 3827 + 2552 1 3826 3828 + 2553 1 3829 3830 + 2554 1 3829 3831 + 2555 1 3832 3833 + 2556 1 3832 3834 + 2557 1 3835 3836 + 2558 1 3835 3837 + 2559 1 3838 3839 + 2560 1 3838 3840 + 2561 1 3841 3842 + 2562 1 3841 3843 + 2563 1 3844 3845 + 2564 1 3844 3846 + 2565 1 3847 3848 + 2566 1 3847 3849 + 2567 1 3850 3851 + 2568 1 3850 3852 + 2569 1 3853 3854 + 2570 1 3853 3855 + 2571 1 3856 3857 + 2572 1 3856 3858 + 2573 1 3859 3860 + 2574 1 3859 3861 + 2575 1 3862 3863 + 2576 1 3862 3864 + 2577 1 3865 3866 + 2578 1 3865 3867 + 2579 1 3868 3869 + 2580 1 3868 3870 + 2581 1 3871 3872 + 2582 1 3871 3873 + 2583 1 3874 3875 + 2584 1 3874 3876 + 2585 1 3877 3878 + 2586 1 3877 3879 + 2587 1 3880 3881 + 2588 1 3880 3882 + 2589 1 3883 3884 + 2590 1 3883 3885 + 2591 1 3886 3887 + 2592 1 3886 3888 + 2593 1 3889 3890 + 2594 1 3889 3891 + 2595 1 3892 3893 + 2596 1 3892 3894 + 2597 1 3895 3896 + 2598 1 3895 3897 + 2599 1 3898 3899 + 2600 1 3898 3900 + 2601 1 3901 3902 + 2602 1 3901 3903 + 2603 1 3904 3905 + 2604 1 3904 3906 + 2605 1 3907 3908 + 2606 1 3907 3909 + 2607 1 3910 3911 + 2608 1 3910 3912 + 2609 1 3913 3914 + 2610 1 3913 3915 + 2611 1 3916 3917 + 2612 1 3916 3918 + 2613 1 3919 3920 + 2614 1 3919 3921 + 2615 1 3922 3923 + 2616 1 3922 3924 + 2617 1 3925 3926 + 2618 1 3925 3927 + 2619 1 3928 3929 + 2620 1 3928 3930 + 2621 1 3931 3932 + 2622 1 3931 3933 + 2623 1 3934 3935 + 2624 1 3934 3936 + 2625 1 3937 3938 + 2626 1 3937 3939 + 2627 1 3940 3941 + 2628 1 3940 3942 + 2629 1 3943 3944 + 2630 1 3943 3945 + 2631 1 3946 3947 + 2632 1 3946 3948 + 2633 1 3949 3950 + 2634 1 3949 3951 + 2635 1 3952 3953 + 2636 1 3952 3954 + 2637 1 3955 3956 + 2638 1 3955 3957 + 2639 1 3958 3959 + 2640 1 3958 3960 + 2641 1 3961 3962 + 2642 1 3961 3963 + 2643 1 3964 3965 + 2644 1 3964 3966 + 2645 1 3967 3968 + 2646 1 3967 3969 + 2647 1 3970 3971 + 2648 1 3970 3972 + 2649 1 3973 3974 + 2650 1 3973 3975 + 2651 1 3976 3977 + 2652 1 3976 3978 + 2653 1 3979 3980 + 2654 1 3979 3981 + 2655 1 3982 3983 + 2656 1 3982 3984 + 2657 1 3985 3986 + 2658 1 3985 3987 + 2659 1 3988 3989 + 2660 1 3988 3990 + 2661 1 3991 3992 + 2662 1 3991 3993 + 2663 1 3994 3995 + 2664 1 3994 3996 + 2665 1 3997 3998 + 2666 1 3997 3999 + 2667 1 4000 4001 + 2668 1 4000 4002 + 2669 1 4003 4004 + 2670 1 4003 4005 + 2671 1 4006 4007 + 2672 1 4006 4008 + 2673 1 4009 4010 + 2674 1 4009 4011 + 2675 1 4012 4013 + 2676 1 4012 4014 + 2677 1 4015 4016 + 2678 1 4015 4017 + 2679 1 4018 4019 + 2680 1 4018 4020 + 2681 1 4021 4022 + 2682 1 4021 4023 + 2683 1 4024 4025 + 2684 1 4024 4026 + 2685 1 4027 4028 + 2686 1 4027 4029 + 2687 1 4030 4031 + 2688 1 4030 4032 + 2689 1 4033 4034 + 2690 1 4033 4035 + 2691 1 4036 4037 + 2692 1 4036 4038 + 2693 1 4039 4040 + 2694 1 4039 4041 + 2695 1 4042 4043 + 2696 1 4042 4044 + 2697 1 4045 4046 + 2698 1 4045 4047 + 2699 1 4048 4049 + 2700 1 4048 4050 + 2701 1 4051 4052 + 2702 1 4051 4053 + 2703 1 4054 4055 + 2704 1 4054 4056 + 2705 1 4057 4058 + 2706 1 4057 4059 + 2707 1 4060 4061 + 2708 1 4060 4062 + 2709 1 4063 4064 + 2710 1 4063 4065 + 2711 1 4066 4067 + 2712 1 4066 4068 + 2713 1 4069 4070 + 2714 1 4069 4071 + 2715 1 4072 4073 + 2716 1 4072 4074 + 2717 1 4075 4076 + 2718 1 4075 4077 + 2719 1 4078 4079 + 2720 1 4078 4080 + 2721 1 4081 4082 + 2722 1 4081 4083 + 2723 1 4084 4085 + 2724 1 4084 4086 + 2725 1 4087 4088 + 2726 1 4087 4089 + 2727 1 4090 4091 + 2728 1 4090 4092 + 2729 1 4093 4094 + 2730 1 4093 4095 + 2731 1 4096 4097 + 2732 1 4096 4098 + 2733 1 4099 4100 + 2734 1 4099 4101 + 2735 1 4102 4103 + 2736 1 4102 4104 + 2737 1 4105 4106 + 2738 1 4105 4107 + 2739 1 4108 4109 + 2740 1 4108 4110 + 2741 1 4111 4112 + 2742 1 4111 4113 + 2743 1 4114 4115 + 2744 1 4114 4116 + 2745 1 4117 4118 + 2746 1 4117 4119 + 2747 1 4120 4121 + 2748 1 4120 4122 + 2749 1 4123 4124 + 2750 1 4123 4125 + 2751 1 4126 4127 + 2752 1 4126 4128 + 2753 1 4129 4130 + 2754 1 4129 4131 + 2755 1 4132 4133 + 2756 1 4132 4134 + 2757 1 4135 4136 + 2758 1 4135 4137 + 2759 1 4138 4139 + 2760 1 4138 4140 + 2761 1 4141 4142 + 2762 1 4141 4143 + 2763 1 4144 4145 + 2764 1 4144 4146 + 2765 1 4147 4148 + 2766 1 4147 4149 + 2767 1 4150 4151 + 2768 1 4150 4152 + 2769 1 4153 4154 + 2770 1 4153 4155 + 2771 1 4156 4157 + 2772 1 4156 4158 + 2773 1 4159 4160 + 2774 1 4159 4161 + 2775 1 4162 4163 + 2776 1 4162 4164 + 2777 1 4165 4166 + 2778 1 4165 4167 + 2779 1 4168 4169 + 2780 1 4168 4170 + 2781 1 4171 4172 + 2782 1 4171 4173 + 2783 1 4174 4175 + 2784 1 4174 4176 + 2785 1 4177 4178 + 2786 1 4177 4179 + 2787 1 4180 4181 + 2788 1 4180 4182 + 2789 1 4183 4184 + 2790 1 4183 4185 + 2791 1 4186 4187 + 2792 1 4186 4188 + 2793 1 4189 4190 + 2794 1 4189 4191 + 2795 1 4192 4193 + 2796 1 4192 4194 + 2797 1 4195 4196 + 2798 1 4195 4197 + 2799 1 4198 4199 + 2800 1 4198 4200 + 2801 1 4201 4202 + 2802 1 4201 4203 + 2803 1 4204 4205 + 2804 1 4204 4206 + 2805 1 4207 4208 + 2806 1 4207 4209 + 2807 1 4210 4211 + 2808 1 4210 4212 + 2809 1 4213 4214 + 2810 1 4213 4215 + 2811 1 4216 4217 + 2812 1 4216 4218 + 2813 1 4219 4220 + 2814 1 4219 4221 + 2815 1 4222 4223 + 2816 1 4222 4224 + 2817 1 4225 4226 + 2818 1 4225 4227 + 2819 1 4228 4229 + 2820 1 4228 4230 + 2821 1 4231 4232 + 2822 1 4231 4233 + 2823 1 4234 4235 + 2824 1 4234 4236 + 2825 1 4237 4238 + 2826 1 4237 4239 + 2827 1 4240 4241 + 2828 1 4240 4242 + 2829 1 4243 4244 + 2830 1 4243 4245 + 2831 1 4246 4247 + 2832 1 4246 4248 + 2833 1 4249 4250 + 2834 1 4249 4251 + 2835 1 4252 4253 + 2836 1 4252 4254 + 2837 1 4255 4256 + 2838 1 4255 4257 + 2839 1 4258 4259 + 2840 1 4258 4260 + 2841 1 4261 4262 + 2842 1 4261 4263 + 2843 1 4264 4265 + 2844 1 4264 4266 + 2845 1 4267 4268 + 2846 1 4267 4269 + 2847 1 4270 4271 + 2848 1 4270 4272 + 2849 1 4273 4274 + 2850 1 4273 4275 + 2851 1 4276 4277 + 2852 1 4276 4278 + 2853 1 4279 4280 + 2854 1 4279 4281 + 2855 1 4282 4283 + 2856 1 4282 4284 + 2857 1 4285 4286 + 2858 1 4285 4287 + 2859 1 4288 4289 + 2860 1 4288 4290 + 2861 1 4291 4292 + 2862 1 4291 4293 + 2863 1 4294 4295 + 2864 1 4294 4296 + 2865 1 4297 4298 + 2866 1 4297 4299 + 2867 1 4300 4301 + 2868 1 4300 4302 + 2869 1 4303 4304 + 2870 1 4303 4305 + 2871 1 4306 4307 + 2872 1 4306 4308 + 2873 1 4309 4310 + 2874 1 4309 4311 + 2875 1 4312 4313 + 2876 1 4312 4314 + 2877 1 4315 4316 + 2878 1 4315 4317 + 2879 1 4318 4319 + 2880 1 4318 4320 + 2881 1 4321 4322 + 2882 1 4321 4323 + 2883 1 4324 4325 + 2884 1 4324 4326 + 2885 1 4327 4328 + 2886 1 4327 4329 + 2887 1 4330 4331 + 2888 1 4330 4332 + 2889 1 4333 4334 + 2890 1 4333 4335 + 2891 1 4336 4337 + 2892 1 4336 4338 + 2893 1 4339 4340 + 2894 1 4339 4341 + 2895 1 4342 4343 + 2896 1 4342 4344 + 2897 1 4345 4346 + 2898 1 4345 4347 + 2899 1 4348 4349 + 2900 1 4348 4350 + 2901 1 4351 4352 + 2902 1 4351 4353 + 2903 1 4354 4355 + 2904 1 4354 4356 + 2905 1 4357 4358 + 2906 1 4357 4359 + 2907 1 4360 4361 + 2908 1 4360 4362 + 2909 1 4363 4364 + 2910 1 4363 4365 + 2911 1 4366 4367 + 2912 1 4366 4368 + 2913 1 4369 4370 + 2914 1 4369 4371 + 2915 1 4372 4373 + 2916 1 4372 4374 + 2917 1 4375 4376 + 2918 1 4375 4377 + 2919 1 4378 4379 + 2920 1 4378 4380 + 2921 1 4381 4382 + 2922 1 4381 4383 + 2923 1 4384 4385 + 2924 1 4384 4386 + 2925 1 4387 4388 + 2926 1 4387 4389 + 2927 1 4390 4391 + 2928 1 4390 4392 + 2929 1 4393 4394 + 2930 1 4393 4395 + 2931 1 4396 4397 + 2932 1 4396 4398 + 2933 1 4399 4400 + 2934 1 4399 4401 + 2935 1 4402 4403 + 2936 1 4402 4404 + 2937 1 4405 4406 + 2938 1 4405 4407 + 2939 1 4408 4409 + 2940 1 4408 4410 + 2941 1 4411 4412 + 2942 1 4411 4413 + 2943 1 4414 4415 + 2944 1 4414 4416 + 2945 1 4417 4418 + 2946 1 4417 4419 + 2947 1 4420 4421 + 2948 1 4420 4422 + 2949 1 4423 4424 + 2950 1 4423 4425 + 2951 1 4426 4427 + 2952 1 4426 4428 + 2953 1 4429 4430 + 2954 1 4429 4431 + 2955 1 4432 4433 + 2956 1 4432 4434 + 2957 1 4435 4436 + 2958 1 4435 4437 + 2959 1 4438 4439 + 2960 1 4438 4440 + 2961 1 4441 4442 + 2962 1 4441 4443 + 2963 1 4444 4445 + 2964 1 4444 4446 + 2965 1 4447 4448 + 2966 1 4447 4449 + 2967 1 4450 4451 + 2968 1 4450 4452 + 2969 1 4453 4454 + 2970 1 4453 4455 + 2971 1 4456 4457 + 2972 1 4456 4458 + 2973 1 4459 4460 + 2974 1 4459 4461 + 2975 1 4462 4463 + 2976 1 4462 4464 + 2977 1 4465 4466 + 2978 1 4465 4467 + 2979 1 4468 4469 + 2980 1 4468 4470 + 2981 1 4471 4472 + 2982 1 4471 4473 + 2983 1 4474 4475 + 2984 1 4474 4476 + 2985 1 4477 4478 + 2986 1 4477 4479 + 2987 1 4480 4481 + 2988 1 4480 4482 + 2989 1 4483 4484 + 2990 1 4483 4485 + 2991 1 4486 4487 + 2992 1 4486 4488 + 2993 1 4489 4490 + 2994 1 4489 4491 + 2995 1 4492 4493 + 2996 1 4492 4494 + 2997 1 4495 4496 + 2998 1 4495 4497 + 2999 1 4498 4499 + 3000 1 4498 4500 + +Angles + + 1 1 2 1 3 + 2 1 5 4 6 + 3 1 8 7 9 + 4 1 11 10 12 + 5 1 14 13 15 + 6 1 17 16 18 + 7 1 20 19 21 + 8 1 23 22 24 + 9 1 26 25 27 + 10 1 29 28 30 + 11 1 32 31 33 + 12 1 35 34 36 + 13 1 38 37 39 + 14 1 41 40 42 + 15 1 44 43 45 + 16 1 47 46 48 + 17 1 50 49 51 + 18 1 53 52 54 + 19 1 56 55 57 + 20 1 59 58 60 + 21 1 62 61 63 + 22 1 65 64 66 + 23 1 68 67 69 + 24 1 71 70 72 + 25 1 74 73 75 + 26 1 77 76 78 + 27 1 80 79 81 + 28 1 83 82 84 + 29 1 86 85 87 + 30 1 89 88 90 + 31 1 92 91 93 + 32 1 95 94 96 + 33 1 98 97 99 + 34 1 101 100 102 + 35 1 104 103 105 + 36 1 107 106 108 + 37 1 110 109 111 + 38 1 113 112 114 + 39 1 116 115 117 + 40 1 119 118 120 + 41 1 122 121 123 + 42 1 125 124 126 + 43 1 128 127 129 + 44 1 131 130 132 + 45 1 134 133 135 + 46 1 137 136 138 + 47 1 140 139 141 + 48 1 143 142 144 + 49 1 146 145 147 + 50 1 149 148 150 + 51 1 152 151 153 + 52 1 155 154 156 + 53 1 158 157 159 + 54 1 161 160 162 + 55 1 164 163 165 + 56 1 167 166 168 + 57 1 170 169 171 + 58 1 173 172 174 + 59 1 176 175 177 + 60 1 179 178 180 + 61 1 182 181 183 + 62 1 185 184 186 + 63 1 188 187 189 + 64 1 191 190 192 + 65 1 194 193 195 + 66 1 197 196 198 + 67 1 200 199 201 + 68 1 203 202 204 + 69 1 206 205 207 + 70 1 209 208 210 + 71 1 212 211 213 + 72 1 215 214 216 + 73 1 218 217 219 + 74 1 221 220 222 + 75 1 224 223 225 + 76 1 227 226 228 + 77 1 230 229 231 + 78 1 233 232 234 + 79 1 236 235 237 + 80 1 239 238 240 + 81 1 242 241 243 + 82 1 245 244 246 + 83 1 248 247 249 + 84 1 251 250 252 + 85 1 254 253 255 + 86 1 257 256 258 + 87 1 260 259 261 + 88 1 263 262 264 + 89 1 266 265 267 + 90 1 269 268 270 + 91 1 272 271 273 + 92 1 275 274 276 + 93 1 278 277 279 + 94 1 281 280 282 + 95 1 284 283 285 + 96 1 287 286 288 + 97 1 290 289 291 + 98 1 293 292 294 + 99 1 296 295 297 + 100 1 299 298 300 + 101 1 302 301 303 + 102 1 305 304 306 + 103 1 308 307 309 + 104 1 311 310 312 + 105 1 314 313 315 + 106 1 317 316 318 + 107 1 320 319 321 + 108 1 323 322 324 + 109 1 326 325 327 + 110 1 329 328 330 + 111 1 332 331 333 + 112 1 335 334 336 + 113 1 338 337 339 + 114 1 341 340 342 + 115 1 344 343 345 + 116 1 347 346 348 + 117 1 350 349 351 + 118 1 353 352 354 + 119 1 356 355 357 + 120 1 359 358 360 + 121 1 362 361 363 + 122 1 365 364 366 + 123 1 368 367 369 + 124 1 371 370 372 + 125 1 374 373 375 + 126 1 377 376 378 + 127 1 380 379 381 + 128 1 383 382 384 + 129 1 386 385 387 + 130 1 389 388 390 + 131 1 392 391 393 + 132 1 395 394 396 + 133 1 398 397 399 + 134 1 401 400 402 + 135 1 404 403 405 + 136 1 407 406 408 + 137 1 410 409 411 + 138 1 413 412 414 + 139 1 416 415 417 + 140 1 419 418 420 + 141 1 422 421 423 + 142 1 425 424 426 + 143 1 428 427 429 + 144 1 431 430 432 + 145 1 434 433 435 + 146 1 437 436 438 + 147 1 440 439 441 + 148 1 443 442 444 + 149 1 446 445 447 + 150 1 449 448 450 + 151 1 452 451 453 + 152 1 455 454 456 + 153 1 458 457 459 + 154 1 461 460 462 + 155 1 464 463 465 + 156 1 467 466 468 + 157 1 470 469 471 + 158 1 473 472 474 + 159 1 476 475 477 + 160 1 479 478 480 + 161 1 482 481 483 + 162 1 485 484 486 + 163 1 488 487 489 + 164 1 491 490 492 + 165 1 494 493 495 + 166 1 497 496 498 + 167 1 500 499 501 + 168 1 503 502 504 + 169 1 506 505 507 + 170 1 509 508 510 + 171 1 512 511 513 + 172 1 515 514 516 + 173 1 518 517 519 + 174 1 521 520 522 + 175 1 524 523 525 + 176 1 527 526 528 + 177 1 530 529 531 + 178 1 533 532 534 + 179 1 536 535 537 + 180 1 539 538 540 + 181 1 542 541 543 + 182 1 545 544 546 + 183 1 548 547 549 + 184 1 551 550 552 + 185 1 554 553 555 + 186 1 557 556 558 + 187 1 560 559 561 + 188 1 563 562 564 + 189 1 566 565 567 + 190 1 569 568 570 + 191 1 572 571 573 + 192 1 575 574 576 + 193 1 578 577 579 + 194 1 581 580 582 + 195 1 584 583 585 + 196 1 587 586 588 + 197 1 590 589 591 + 198 1 593 592 594 + 199 1 596 595 597 + 200 1 599 598 600 + 201 1 602 601 603 + 202 1 605 604 606 + 203 1 608 607 609 + 204 1 611 610 612 + 205 1 614 613 615 + 206 1 617 616 618 + 207 1 620 619 621 + 208 1 623 622 624 + 209 1 626 625 627 + 210 1 629 628 630 + 211 1 632 631 633 + 212 1 635 634 636 + 213 1 638 637 639 + 214 1 641 640 642 + 215 1 644 643 645 + 216 1 647 646 648 + 217 1 650 649 651 + 218 1 653 652 654 + 219 1 656 655 657 + 220 1 659 658 660 + 221 1 662 661 663 + 222 1 665 664 666 + 223 1 668 667 669 + 224 1 671 670 672 + 225 1 674 673 675 + 226 1 677 676 678 + 227 1 680 679 681 + 228 1 683 682 684 + 229 1 686 685 687 + 230 1 689 688 690 + 231 1 692 691 693 + 232 1 695 694 696 + 233 1 698 697 699 + 234 1 701 700 702 + 235 1 704 703 705 + 236 1 707 706 708 + 237 1 710 709 711 + 238 1 713 712 714 + 239 1 716 715 717 + 240 1 719 718 720 + 241 1 722 721 723 + 242 1 725 724 726 + 243 1 728 727 729 + 244 1 731 730 732 + 245 1 734 733 735 + 246 1 737 736 738 + 247 1 740 739 741 + 248 1 743 742 744 + 249 1 746 745 747 + 250 1 749 748 750 + 251 1 752 751 753 + 252 1 755 754 756 + 253 1 758 757 759 + 254 1 761 760 762 + 255 1 764 763 765 + 256 1 767 766 768 + 257 1 770 769 771 + 258 1 773 772 774 + 259 1 776 775 777 + 260 1 779 778 780 + 261 1 782 781 783 + 262 1 785 784 786 + 263 1 788 787 789 + 264 1 791 790 792 + 265 1 794 793 795 + 266 1 797 796 798 + 267 1 800 799 801 + 268 1 803 802 804 + 269 1 806 805 807 + 270 1 809 808 810 + 271 1 812 811 813 + 272 1 815 814 816 + 273 1 818 817 819 + 274 1 821 820 822 + 275 1 824 823 825 + 276 1 827 826 828 + 277 1 830 829 831 + 278 1 833 832 834 + 279 1 836 835 837 + 280 1 839 838 840 + 281 1 842 841 843 + 282 1 845 844 846 + 283 1 848 847 849 + 284 1 851 850 852 + 285 1 854 853 855 + 286 1 857 856 858 + 287 1 860 859 861 + 288 1 863 862 864 + 289 1 866 865 867 + 290 1 869 868 870 + 291 1 872 871 873 + 292 1 875 874 876 + 293 1 878 877 879 + 294 1 881 880 882 + 295 1 884 883 885 + 296 1 887 886 888 + 297 1 890 889 891 + 298 1 893 892 894 + 299 1 896 895 897 + 300 1 899 898 900 + 301 1 902 901 903 + 302 1 905 904 906 + 303 1 908 907 909 + 304 1 911 910 912 + 305 1 914 913 915 + 306 1 917 916 918 + 307 1 920 919 921 + 308 1 923 922 924 + 309 1 926 925 927 + 310 1 929 928 930 + 311 1 932 931 933 + 312 1 935 934 936 + 313 1 938 937 939 + 314 1 941 940 942 + 315 1 944 943 945 + 316 1 947 946 948 + 317 1 950 949 951 + 318 1 953 952 954 + 319 1 956 955 957 + 320 1 959 958 960 + 321 1 962 961 963 + 322 1 965 964 966 + 323 1 968 967 969 + 324 1 971 970 972 + 325 1 974 973 975 + 326 1 977 976 978 + 327 1 980 979 981 + 328 1 983 982 984 + 329 1 986 985 987 + 330 1 989 988 990 + 331 1 992 991 993 + 332 1 995 994 996 + 333 1 998 997 999 + 334 1 1001 1000 1002 + 335 1 1004 1003 1005 + 336 1 1007 1006 1008 + 337 1 1010 1009 1011 + 338 1 1013 1012 1014 + 339 1 1016 1015 1017 + 340 1 1019 1018 1020 + 341 1 1022 1021 1023 + 342 1 1025 1024 1026 + 343 1 1028 1027 1029 + 344 1 1031 1030 1032 + 345 1 1034 1033 1035 + 346 1 1037 1036 1038 + 347 1 1040 1039 1041 + 348 1 1043 1042 1044 + 349 1 1046 1045 1047 + 350 1 1049 1048 1050 + 351 1 1052 1051 1053 + 352 1 1055 1054 1056 + 353 1 1058 1057 1059 + 354 1 1061 1060 1062 + 355 1 1064 1063 1065 + 356 1 1067 1066 1068 + 357 1 1070 1069 1071 + 358 1 1073 1072 1074 + 359 1 1076 1075 1077 + 360 1 1079 1078 1080 + 361 1 1082 1081 1083 + 362 1 1085 1084 1086 + 363 1 1088 1087 1089 + 364 1 1091 1090 1092 + 365 1 1094 1093 1095 + 366 1 1097 1096 1098 + 367 1 1100 1099 1101 + 368 1 1103 1102 1104 + 369 1 1106 1105 1107 + 370 1 1109 1108 1110 + 371 1 1112 1111 1113 + 372 1 1115 1114 1116 + 373 1 1118 1117 1119 + 374 1 1121 1120 1122 + 375 1 1124 1123 1125 + 376 1 1127 1126 1128 + 377 1 1130 1129 1131 + 378 1 1133 1132 1134 + 379 1 1136 1135 1137 + 380 1 1139 1138 1140 + 381 1 1142 1141 1143 + 382 1 1145 1144 1146 + 383 1 1148 1147 1149 + 384 1 1151 1150 1152 + 385 1 1154 1153 1155 + 386 1 1157 1156 1158 + 387 1 1160 1159 1161 + 388 1 1163 1162 1164 + 389 1 1166 1165 1167 + 390 1 1169 1168 1170 + 391 1 1172 1171 1173 + 392 1 1175 1174 1176 + 393 1 1178 1177 1179 + 394 1 1181 1180 1182 + 395 1 1184 1183 1185 + 396 1 1187 1186 1188 + 397 1 1190 1189 1191 + 398 1 1193 1192 1194 + 399 1 1196 1195 1197 + 400 1 1199 1198 1200 + 401 1 1202 1201 1203 + 402 1 1205 1204 1206 + 403 1 1208 1207 1209 + 404 1 1211 1210 1212 + 405 1 1214 1213 1215 + 406 1 1217 1216 1218 + 407 1 1220 1219 1221 + 408 1 1223 1222 1224 + 409 1 1226 1225 1227 + 410 1 1229 1228 1230 + 411 1 1232 1231 1233 + 412 1 1235 1234 1236 + 413 1 1238 1237 1239 + 414 1 1241 1240 1242 + 415 1 1244 1243 1245 + 416 1 1247 1246 1248 + 417 1 1250 1249 1251 + 418 1 1253 1252 1254 + 419 1 1256 1255 1257 + 420 1 1259 1258 1260 + 421 1 1262 1261 1263 + 422 1 1265 1264 1266 + 423 1 1268 1267 1269 + 424 1 1271 1270 1272 + 425 1 1274 1273 1275 + 426 1 1277 1276 1278 + 427 1 1280 1279 1281 + 428 1 1283 1282 1284 + 429 1 1286 1285 1287 + 430 1 1289 1288 1290 + 431 1 1292 1291 1293 + 432 1 1295 1294 1296 + 433 1 1298 1297 1299 + 434 1 1301 1300 1302 + 435 1 1304 1303 1305 + 436 1 1307 1306 1308 + 437 1 1310 1309 1311 + 438 1 1313 1312 1314 + 439 1 1316 1315 1317 + 440 1 1319 1318 1320 + 441 1 1322 1321 1323 + 442 1 1325 1324 1326 + 443 1 1328 1327 1329 + 444 1 1331 1330 1332 + 445 1 1334 1333 1335 + 446 1 1337 1336 1338 + 447 1 1340 1339 1341 + 448 1 1343 1342 1344 + 449 1 1346 1345 1347 + 450 1 1349 1348 1350 + 451 1 1352 1351 1353 + 452 1 1355 1354 1356 + 453 1 1358 1357 1359 + 454 1 1361 1360 1362 + 455 1 1364 1363 1365 + 456 1 1367 1366 1368 + 457 1 1370 1369 1371 + 458 1 1373 1372 1374 + 459 1 1376 1375 1377 + 460 1 1379 1378 1380 + 461 1 1382 1381 1383 + 462 1 1385 1384 1386 + 463 1 1388 1387 1389 + 464 1 1391 1390 1392 + 465 1 1394 1393 1395 + 466 1 1397 1396 1398 + 467 1 1400 1399 1401 + 468 1 1403 1402 1404 + 469 1 1406 1405 1407 + 470 1 1409 1408 1410 + 471 1 1412 1411 1413 + 472 1 1415 1414 1416 + 473 1 1418 1417 1419 + 474 1 1421 1420 1422 + 475 1 1424 1423 1425 + 476 1 1427 1426 1428 + 477 1 1430 1429 1431 + 478 1 1433 1432 1434 + 479 1 1436 1435 1437 + 480 1 1439 1438 1440 + 481 1 1442 1441 1443 + 482 1 1445 1444 1446 + 483 1 1448 1447 1449 + 484 1 1451 1450 1452 + 485 1 1454 1453 1455 + 486 1 1457 1456 1458 + 487 1 1460 1459 1461 + 488 1 1463 1462 1464 + 489 1 1466 1465 1467 + 490 1 1469 1468 1470 + 491 1 1472 1471 1473 + 492 1 1475 1474 1476 + 493 1 1478 1477 1479 + 494 1 1481 1480 1482 + 495 1 1484 1483 1485 + 496 1 1487 1486 1488 + 497 1 1490 1489 1491 + 498 1 1493 1492 1494 + 499 1 1496 1495 1497 + 500 1 1499 1498 1500 + 501 1 1502 1501 1503 + 502 1 1505 1504 1506 + 503 1 1508 1507 1509 + 504 1 1511 1510 1512 + 505 1 1514 1513 1515 + 506 1 1517 1516 1518 + 507 1 1520 1519 1521 + 508 1 1523 1522 1524 + 509 1 1526 1525 1527 + 510 1 1529 1528 1530 + 511 1 1532 1531 1533 + 512 1 1535 1534 1536 + 513 1 1538 1537 1539 + 514 1 1541 1540 1542 + 515 1 1544 1543 1545 + 516 1 1547 1546 1548 + 517 1 1550 1549 1551 + 518 1 1553 1552 1554 + 519 1 1556 1555 1557 + 520 1 1559 1558 1560 + 521 1 1562 1561 1563 + 522 1 1565 1564 1566 + 523 1 1568 1567 1569 + 524 1 1571 1570 1572 + 525 1 1574 1573 1575 + 526 1 1577 1576 1578 + 527 1 1580 1579 1581 + 528 1 1583 1582 1584 + 529 1 1586 1585 1587 + 530 1 1589 1588 1590 + 531 1 1592 1591 1593 + 532 1 1595 1594 1596 + 533 1 1598 1597 1599 + 534 1 1601 1600 1602 + 535 1 1604 1603 1605 + 536 1 1607 1606 1608 + 537 1 1610 1609 1611 + 538 1 1613 1612 1614 + 539 1 1616 1615 1617 + 540 1 1619 1618 1620 + 541 1 1622 1621 1623 + 542 1 1625 1624 1626 + 543 1 1628 1627 1629 + 544 1 1631 1630 1632 + 545 1 1634 1633 1635 + 546 1 1637 1636 1638 + 547 1 1640 1639 1641 + 548 1 1643 1642 1644 + 549 1 1646 1645 1647 + 550 1 1649 1648 1650 + 551 1 1652 1651 1653 + 552 1 1655 1654 1656 + 553 1 1658 1657 1659 + 554 1 1661 1660 1662 + 555 1 1664 1663 1665 + 556 1 1667 1666 1668 + 557 1 1670 1669 1671 + 558 1 1673 1672 1674 + 559 1 1676 1675 1677 + 560 1 1679 1678 1680 + 561 1 1682 1681 1683 + 562 1 1685 1684 1686 + 563 1 1688 1687 1689 + 564 1 1691 1690 1692 + 565 1 1694 1693 1695 + 566 1 1697 1696 1698 + 567 1 1700 1699 1701 + 568 1 1703 1702 1704 + 569 1 1706 1705 1707 + 570 1 1709 1708 1710 + 571 1 1712 1711 1713 + 572 1 1715 1714 1716 + 573 1 1718 1717 1719 + 574 1 1721 1720 1722 + 575 1 1724 1723 1725 + 576 1 1727 1726 1728 + 577 1 1730 1729 1731 + 578 1 1733 1732 1734 + 579 1 1736 1735 1737 + 580 1 1739 1738 1740 + 581 1 1742 1741 1743 + 582 1 1745 1744 1746 + 583 1 1748 1747 1749 + 584 1 1751 1750 1752 + 585 1 1754 1753 1755 + 586 1 1757 1756 1758 + 587 1 1760 1759 1761 + 588 1 1763 1762 1764 + 589 1 1766 1765 1767 + 590 1 1769 1768 1770 + 591 1 1772 1771 1773 + 592 1 1775 1774 1776 + 593 1 1778 1777 1779 + 594 1 1781 1780 1782 + 595 1 1784 1783 1785 + 596 1 1787 1786 1788 + 597 1 1790 1789 1791 + 598 1 1793 1792 1794 + 599 1 1796 1795 1797 + 600 1 1799 1798 1800 + 601 1 1802 1801 1803 + 602 1 1805 1804 1806 + 603 1 1808 1807 1809 + 604 1 1811 1810 1812 + 605 1 1814 1813 1815 + 606 1 1817 1816 1818 + 607 1 1820 1819 1821 + 608 1 1823 1822 1824 + 609 1 1826 1825 1827 + 610 1 1829 1828 1830 + 611 1 1832 1831 1833 + 612 1 1835 1834 1836 + 613 1 1838 1837 1839 + 614 1 1841 1840 1842 + 615 1 1844 1843 1845 + 616 1 1847 1846 1848 + 617 1 1850 1849 1851 + 618 1 1853 1852 1854 + 619 1 1856 1855 1857 + 620 1 1859 1858 1860 + 621 1 1862 1861 1863 + 622 1 1865 1864 1866 + 623 1 1868 1867 1869 + 624 1 1871 1870 1872 + 625 1 1874 1873 1875 + 626 1 1877 1876 1878 + 627 1 1880 1879 1881 + 628 1 1883 1882 1884 + 629 1 1886 1885 1887 + 630 1 1889 1888 1890 + 631 1 1892 1891 1893 + 632 1 1895 1894 1896 + 633 1 1898 1897 1899 + 634 1 1901 1900 1902 + 635 1 1904 1903 1905 + 636 1 1907 1906 1908 + 637 1 1910 1909 1911 + 638 1 1913 1912 1914 + 639 1 1916 1915 1917 + 640 1 1919 1918 1920 + 641 1 1922 1921 1923 + 642 1 1925 1924 1926 + 643 1 1928 1927 1929 + 644 1 1931 1930 1932 + 645 1 1934 1933 1935 + 646 1 1937 1936 1938 + 647 1 1940 1939 1941 + 648 1 1943 1942 1944 + 649 1 1946 1945 1947 + 650 1 1949 1948 1950 + 651 1 1952 1951 1953 + 652 1 1955 1954 1956 + 653 1 1958 1957 1959 + 654 1 1961 1960 1962 + 655 1 1964 1963 1965 + 656 1 1967 1966 1968 + 657 1 1970 1969 1971 + 658 1 1973 1972 1974 + 659 1 1976 1975 1977 + 660 1 1979 1978 1980 + 661 1 1982 1981 1983 + 662 1 1985 1984 1986 + 663 1 1988 1987 1989 + 664 1 1991 1990 1992 + 665 1 1994 1993 1995 + 666 1 1997 1996 1998 + 667 1 2000 1999 2001 + 668 1 2003 2002 2004 + 669 1 2006 2005 2007 + 670 1 2009 2008 2010 + 671 1 2012 2011 2013 + 672 1 2015 2014 2016 + 673 1 2018 2017 2019 + 674 1 2021 2020 2022 + 675 1 2024 2023 2025 + 676 1 2027 2026 2028 + 677 1 2030 2029 2031 + 678 1 2033 2032 2034 + 679 1 2036 2035 2037 + 680 1 2039 2038 2040 + 681 1 2042 2041 2043 + 682 1 2045 2044 2046 + 683 1 2048 2047 2049 + 684 1 2051 2050 2052 + 685 1 2054 2053 2055 + 686 1 2057 2056 2058 + 687 1 2060 2059 2061 + 688 1 2063 2062 2064 + 689 1 2066 2065 2067 + 690 1 2069 2068 2070 + 691 1 2072 2071 2073 + 692 1 2075 2074 2076 + 693 1 2078 2077 2079 + 694 1 2081 2080 2082 + 695 1 2084 2083 2085 + 696 1 2087 2086 2088 + 697 1 2090 2089 2091 + 698 1 2093 2092 2094 + 699 1 2096 2095 2097 + 700 1 2099 2098 2100 + 701 1 2102 2101 2103 + 702 1 2105 2104 2106 + 703 1 2108 2107 2109 + 704 1 2111 2110 2112 + 705 1 2114 2113 2115 + 706 1 2117 2116 2118 + 707 1 2120 2119 2121 + 708 1 2123 2122 2124 + 709 1 2126 2125 2127 + 710 1 2129 2128 2130 + 711 1 2132 2131 2133 + 712 1 2135 2134 2136 + 713 1 2138 2137 2139 + 714 1 2141 2140 2142 + 715 1 2144 2143 2145 + 716 1 2147 2146 2148 + 717 1 2150 2149 2151 + 718 1 2153 2152 2154 + 719 1 2156 2155 2157 + 720 1 2159 2158 2160 + 721 1 2162 2161 2163 + 722 1 2165 2164 2166 + 723 1 2168 2167 2169 + 724 1 2171 2170 2172 + 725 1 2174 2173 2175 + 726 1 2177 2176 2178 + 727 1 2180 2179 2181 + 728 1 2183 2182 2184 + 729 1 2186 2185 2187 + 730 1 2189 2188 2190 + 731 1 2192 2191 2193 + 732 1 2195 2194 2196 + 733 1 2198 2197 2199 + 734 1 2201 2200 2202 + 735 1 2204 2203 2205 + 736 1 2207 2206 2208 + 737 1 2210 2209 2211 + 738 1 2213 2212 2214 + 739 1 2216 2215 2217 + 740 1 2219 2218 2220 + 741 1 2222 2221 2223 + 742 1 2225 2224 2226 + 743 1 2228 2227 2229 + 744 1 2231 2230 2232 + 745 1 2234 2233 2235 + 746 1 2237 2236 2238 + 747 1 2240 2239 2241 + 748 1 2243 2242 2244 + 749 1 2246 2245 2247 + 750 1 2249 2248 2250 + 751 1 2252 2251 2253 + 752 1 2255 2254 2256 + 753 1 2258 2257 2259 + 754 1 2261 2260 2262 + 755 1 2264 2263 2265 + 756 1 2267 2266 2268 + 757 1 2270 2269 2271 + 758 1 2273 2272 2274 + 759 1 2276 2275 2277 + 760 1 2279 2278 2280 + 761 1 2282 2281 2283 + 762 1 2285 2284 2286 + 763 1 2288 2287 2289 + 764 1 2291 2290 2292 + 765 1 2294 2293 2295 + 766 1 2297 2296 2298 + 767 1 2300 2299 2301 + 768 1 2303 2302 2304 + 769 1 2306 2305 2307 + 770 1 2309 2308 2310 + 771 1 2312 2311 2313 + 772 1 2315 2314 2316 + 773 1 2318 2317 2319 + 774 1 2321 2320 2322 + 775 1 2324 2323 2325 + 776 1 2327 2326 2328 + 777 1 2330 2329 2331 + 778 1 2333 2332 2334 + 779 1 2336 2335 2337 + 780 1 2339 2338 2340 + 781 1 2342 2341 2343 + 782 1 2345 2344 2346 + 783 1 2348 2347 2349 + 784 1 2351 2350 2352 + 785 1 2354 2353 2355 + 786 1 2357 2356 2358 + 787 1 2360 2359 2361 + 788 1 2363 2362 2364 + 789 1 2366 2365 2367 + 790 1 2369 2368 2370 + 791 1 2372 2371 2373 + 792 1 2375 2374 2376 + 793 1 2378 2377 2379 + 794 1 2381 2380 2382 + 795 1 2384 2383 2385 + 796 1 2387 2386 2388 + 797 1 2390 2389 2391 + 798 1 2393 2392 2394 + 799 1 2396 2395 2397 + 800 1 2399 2398 2400 + 801 1 2402 2401 2403 + 802 1 2405 2404 2406 + 803 1 2408 2407 2409 + 804 1 2411 2410 2412 + 805 1 2414 2413 2415 + 806 1 2417 2416 2418 + 807 1 2420 2419 2421 + 808 1 2423 2422 2424 + 809 1 2426 2425 2427 + 810 1 2429 2428 2430 + 811 1 2432 2431 2433 + 812 1 2435 2434 2436 + 813 1 2438 2437 2439 + 814 1 2441 2440 2442 + 815 1 2444 2443 2445 + 816 1 2447 2446 2448 + 817 1 2450 2449 2451 + 818 1 2453 2452 2454 + 819 1 2456 2455 2457 + 820 1 2459 2458 2460 + 821 1 2462 2461 2463 + 822 1 2465 2464 2466 + 823 1 2468 2467 2469 + 824 1 2471 2470 2472 + 825 1 2474 2473 2475 + 826 1 2477 2476 2478 + 827 1 2480 2479 2481 + 828 1 2483 2482 2484 + 829 1 2486 2485 2487 + 830 1 2489 2488 2490 + 831 1 2492 2491 2493 + 832 1 2495 2494 2496 + 833 1 2498 2497 2499 + 834 1 2501 2500 2502 + 835 1 2504 2503 2505 + 836 1 2507 2506 2508 + 837 1 2510 2509 2511 + 838 1 2513 2512 2514 + 839 1 2516 2515 2517 + 840 1 2519 2518 2520 + 841 1 2522 2521 2523 + 842 1 2525 2524 2526 + 843 1 2528 2527 2529 + 844 1 2531 2530 2532 + 845 1 2534 2533 2535 + 846 1 2537 2536 2538 + 847 1 2540 2539 2541 + 848 1 2543 2542 2544 + 849 1 2546 2545 2547 + 850 1 2549 2548 2550 + 851 1 2552 2551 2553 + 852 1 2555 2554 2556 + 853 1 2558 2557 2559 + 854 1 2561 2560 2562 + 855 1 2564 2563 2565 + 856 1 2567 2566 2568 + 857 1 2570 2569 2571 + 858 1 2573 2572 2574 + 859 1 2576 2575 2577 + 860 1 2579 2578 2580 + 861 1 2582 2581 2583 + 862 1 2585 2584 2586 + 863 1 2588 2587 2589 + 864 1 2591 2590 2592 + 865 1 2594 2593 2595 + 866 1 2597 2596 2598 + 867 1 2600 2599 2601 + 868 1 2603 2602 2604 + 869 1 2606 2605 2607 + 870 1 2609 2608 2610 + 871 1 2612 2611 2613 + 872 1 2615 2614 2616 + 873 1 2618 2617 2619 + 874 1 2621 2620 2622 + 875 1 2624 2623 2625 + 876 1 2627 2626 2628 + 877 1 2630 2629 2631 + 878 1 2633 2632 2634 + 879 1 2636 2635 2637 + 880 1 2639 2638 2640 + 881 1 2642 2641 2643 + 882 1 2645 2644 2646 + 883 1 2648 2647 2649 + 884 1 2651 2650 2652 + 885 1 2654 2653 2655 + 886 1 2657 2656 2658 + 887 1 2660 2659 2661 + 888 1 2663 2662 2664 + 889 1 2666 2665 2667 + 890 1 2669 2668 2670 + 891 1 2672 2671 2673 + 892 1 2675 2674 2676 + 893 1 2678 2677 2679 + 894 1 2681 2680 2682 + 895 1 2684 2683 2685 + 896 1 2687 2686 2688 + 897 1 2690 2689 2691 + 898 1 2693 2692 2694 + 899 1 2696 2695 2697 + 900 1 2699 2698 2700 + 901 1 2702 2701 2703 + 902 1 2705 2704 2706 + 903 1 2708 2707 2709 + 904 1 2711 2710 2712 + 905 1 2714 2713 2715 + 906 1 2717 2716 2718 + 907 1 2720 2719 2721 + 908 1 2723 2722 2724 + 909 1 2726 2725 2727 + 910 1 2729 2728 2730 + 911 1 2732 2731 2733 + 912 1 2735 2734 2736 + 913 1 2738 2737 2739 + 914 1 2741 2740 2742 + 915 1 2744 2743 2745 + 916 1 2747 2746 2748 + 917 1 2750 2749 2751 + 918 1 2753 2752 2754 + 919 1 2756 2755 2757 + 920 1 2759 2758 2760 + 921 1 2762 2761 2763 + 922 1 2765 2764 2766 + 923 1 2768 2767 2769 + 924 1 2771 2770 2772 + 925 1 2774 2773 2775 + 926 1 2777 2776 2778 + 927 1 2780 2779 2781 + 928 1 2783 2782 2784 + 929 1 2786 2785 2787 + 930 1 2789 2788 2790 + 931 1 2792 2791 2793 + 932 1 2795 2794 2796 + 933 1 2798 2797 2799 + 934 1 2801 2800 2802 + 935 1 2804 2803 2805 + 936 1 2807 2806 2808 + 937 1 2810 2809 2811 + 938 1 2813 2812 2814 + 939 1 2816 2815 2817 + 940 1 2819 2818 2820 + 941 1 2822 2821 2823 + 942 1 2825 2824 2826 + 943 1 2828 2827 2829 + 944 1 2831 2830 2832 + 945 1 2834 2833 2835 + 946 1 2837 2836 2838 + 947 1 2840 2839 2841 + 948 1 2843 2842 2844 + 949 1 2846 2845 2847 + 950 1 2849 2848 2850 + 951 1 2852 2851 2853 + 952 1 2855 2854 2856 + 953 1 2858 2857 2859 + 954 1 2861 2860 2862 + 955 1 2864 2863 2865 + 956 1 2867 2866 2868 + 957 1 2870 2869 2871 + 958 1 2873 2872 2874 + 959 1 2876 2875 2877 + 960 1 2879 2878 2880 + 961 1 2882 2881 2883 + 962 1 2885 2884 2886 + 963 1 2888 2887 2889 + 964 1 2891 2890 2892 + 965 1 2894 2893 2895 + 966 1 2897 2896 2898 + 967 1 2900 2899 2901 + 968 1 2903 2902 2904 + 969 1 2906 2905 2907 + 970 1 2909 2908 2910 + 971 1 2912 2911 2913 + 972 1 2915 2914 2916 + 973 1 2918 2917 2919 + 974 1 2921 2920 2922 + 975 1 2924 2923 2925 + 976 1 2927 2926 2928 + 977 1 2930 2929 2931 + 978 1 2933 2932 2934 + 979 1 2936 2935 2937 + 980 1 2939 2938 2940 + 981 1 2942 2941 2943 + 982 1 2945 2944 2946 + 983 1 2948 2947 2949 + 984 1 2951 2950 2952 + 985 1 2954 2953 2955 + 986 1 2957 2956 2958 + 987 1 2960 2959 2961 + 988 1 2963 2962 2964 + 989 1 2966 2965 2967 + 990 1 2969 2968 2970 + 991 1 2972 2971 2973 + 992 1 2975 2974 2976 + 993 1 2978 2977 2979 + 994 1 2981 2980 2982 + 995 1 2984 2983 2985 + 996 1 2987 2986 2988 + 997 1 2990 2989 2991 + 998 1 2993 2992 2994 + 999 1 2996 2995 2997 + 1000 1 2999 2998 3000 + 1001 1 3002 3001 3003 + 1002 1 3005 3004 3006 + 1003 1 3008 3007 3009 + 1004 1 3011 3010 3012 + 1005 1 3014 3013 3015 + 1006 1 3017 3016 3018 + 1007 1 3020 3019 3021 + 1008 1 3023 3022 3024 + 1009 1 3026 3025 3027 + 1010 1 3029 3028 3030 + 1011 1 3032 3031 3033 + 1012 1 3035 3034 3036 + 1013 1 3038 3037 3039 + 1014 1 3041 3040 3042 + 1015 1 3044 3043 3045 + 1016 1 3047 3046 3048 + 1017 1 3050 3049 3051 + 1018 1 3053 3052 3054 + 1019 1 3056 3055 3057 + 1020 1 3059 3058 3060 + 1021 1 3062 3061 3063 + 1022 1 3065 3064 3066 + 1023 1 3068 3067 3069 + 1024 1 3071 3070 3072 + 1025 1 3074 3073 3075 + 1026 1 3077 3076 3078 + 1027 1 3080 3079 3081 + 1028 1 3083 3082 3084 + 1029 1 3086 3085 3087 + 1030 1 3089 3088 3090 + 1031 1 3092 3091 3093 + 1032 1 3095 3094 3096 + 1033 1 3098 3097 3099 + 1034 1 3101 3100 3102 + 1035 1 3104 3103 3105 + 1036 1 3107 3106 3108 + 1037 1 3110 3109 3111 + 1038 1 3113 3112 3114 + 1039 1 3116 3115 3117 + 1040 1 3119 3118 3120 + 1041 1 3122 3121 3123 + 1042 1 3125 3124 3126 + 1043 1 3128 3127 3129 + 1044 1 3131 3130 3132 + 1045 1 3134 3133 3135 + 1046 1 3137 3136 3138 + 1047 1 3140 3139 3141 + 1048 1 3143 3142 3144 + 1049 1 3146 3145 3147 + 1050 1 3149 3148 3150 + 1051 1 3152 3151 3153 + 1052 1 3155 3154 3156 + 1053 1 3158 3157 3159 + 1054 1 3161 3160 3162 + 1055 1 3164 3163 3165 + 1056 1 3167 3166 3168 + 1057 1 3170 3169 3171 + 1058 1 3173 3172 3174 + 1059 1 3176 3175 3177 + 1060 1 3179 3178 3180 + 1061 1 3182 3181 3183 + 1062 1 3185 3184 3186 + 1063 1 3188 3187 3189 + 1064 1 3191 3190 3192 + 1065 1 3194 3193 3195 + 1066 1 3197 3196 3198 + 1067 1 3200 3199 3201 + 1068 1 3203 3202 3204 + 1069 1 3206 3205 3207 + 1070 1 3209 3208 3210 + 1071 1 3212 3211 3213 + 1072 1 3215 3214 3216 + 1073 1 3218 3217 3219 + 1074 1 3221 3220 3222 + 1075 1 3224 3223 3225 + 1076 1 3227 3226 3228 + 1077 1 3230 3229 3231 + 1078 1 3233 3232 3234 + 1079 1 3236 3235 3237 + 1080 1 3239 3238 3240 + 1081 1 3242 3241 3243 + 1082 1 3245 3244 3246 + 1083 1 3248 3247 3249 + 1084 1 3251 3250 3252 + 1085 1 3254 3253 3255 + 1086 1 3257 3256 3258 + 1087 1 3260 3259 3261 + 1088 1 3263 3262 3264 + 1089 1 3266 3265 3267 + 1090 1 3269 3268 3270 + 1091 1 3272 3271 3273 + 1092 1 3275 3274 3276 + 1093 1 3278 3277 3279 + 1094 1 3281 3280 3282 + 1095 1 3284 3283 3285 + 1096 1 3287 3286 3288 + 1097 1 3290 3289 3291 + 1098 1 3293 3292 3294 + 1099 1 3296 3295 3297 + 1100 1 3299 3298 3300 + 1101 1 3302 3301 3303 + 1102 1 3305 3304 3306 + 1103 1 3308 3307 3309 + 1104 1 3311 3310 3312 + 1105 1 3314 3313 3315 + 1106 1 3317 3316 3318 + 1107 1 3320 3319 3321 + 1108 1 3323 3322 3324 + 1109 1 3326 3325 3327 + 1110 1 3329 3328 3330 + 1111 1 3332 3331 3333 + 1112 1 3335 3334 3336 + 1113 1 3338 3337 3339 + 1114 1 3341 3340 3342 + 1115 1 3344 3343 3345 + 1116 1 3347 3346 3348 + 1117 1 3350 3349 3351 + 1118 1 3353 3352 3354 + 1119 1 3356 3355 3357 + 1120 1 3359 3358 3360 + 1121 1 3362 3361 3363 + 1122 1 3365 3364 3366 + 1123 1 3368 3367 3369 + 1124 1 3371 3370 3372 + 1125 1 3374 3373 3375 + 1126 1 3377 3376 3378 + 1127 1 3380 3379 3381 + 1128 1 3383 3382 3384 + 1129 1 3386 3385 3387 + 1130 1 3389 3388 3390 + 1131 1 3392 3391 3393 + 1132 1 3395 3394 3396 + 1133 1 3398 3397 3399 + 1134 1 3401 3400 3402 + 1135 1 3404 3403 3405 + 1136 1 3407 3406 3408 + 1137 1 3410 3409 3411 + 1138 1 3413 3412 3414 + 1139 1 3416 3415 3417 + 1140 1 3419 3418 3420 + 1141 1 3422 3421 3423 + 1142 1 3425 3424 3426 + 1143 1 3428 3427 3429 + 1144 1 3431 3430 3432 + 1145 1 3434 3433 3435 + 1146 1 3437 3436 3438 + 1147 1 3440 3439 3441 + 1148 1 3443 3442 3444 + 1149 1 3446 3445 3447 + 1150 1 3449 3448 3450 + 1151 1 3452 3451 3453 + 1152 1 3455 3454 3456 + 1153 1 3458 3457 3459 + 1154 1 3461 3460 3462 + 1155 1 3464 3463 3465 + 1156 1 3467 3466 3468 + 1157 1 3470 3469 3471 + 1158 1 3473 3472 3474 + 1159 1 3476 3475 3477 + 1160 1 3479 3478 3480 + 1161 1 3482 3481 3483 + 1162 1 3485 3484 3486 + 1163 1 3488 3487 3489 + 1164 1 3491 3490 3492 + 1165 1 3494 3493 3495 + 1166 1 3497 3496 3498 + 1167 1 3500 3499 3501 + 1168 1 3503 3502 3504 + 1169 1 3506 3505 3507 + 1170 1 3509 3508 3510 + 1171 1 3512 3511 3513 + 1172 1 3515 3514 3516 + 1173 1 3518 3517 3519 + 1174 1 3521 3520 3522 + 1175 1 3524 3523 3525 + 1176 1 3527 3526 3528 + 1177 1 3530 3529 3531 + 1178 1 3533 3532 3534 + 1179 1 3536 3535 3537 + 1180 1 3539 3538 3540 + 1181 1 3542 3541 3543 + 1182 1 3545 3544 3546 + 1183 1 3548 3547 3549 + 1184 1 3551 3550 3552 + 1185 1 3554 3553 3555 + 1186 1 3557 3556 3558 + 1187 1 3560 3559 3561 + 1188 1 3563 3562 3564 + 1189 1 3566 3565 3567 + 1190 1 3569 3568 3570 + 1191 1 3572 3571 3573 + 1192 1 3575 3574 3576 + 1193 1 3578 3577 3579 + 1194 1 3581 3580 3582 + 1195 1 3584 3583 3585 + 1196 1 3587 3586 3588 + 1197 1 3590 3589 3591 + 1198 1 3593 3592 3594 + 1199 1 3596 3595 3597 + 1200 1 3599 3598 3600 + 1201 1 3602 3601 3603 + 1202 1 3605 3604 3606 + 1203 1 3608 3607 3609 + 1204 1 3611 3610 3612 + 1205 1 3614 3613 3615 + 1206 1 3617 3616 3618 + 1207 1 3620 3619 3621 + 1208 1 3623 3622 3624 + 1209 1 3626 3625 3627 + 1210 1 3629 3628 3630 + 1211 1 3632 3631 3633 + 1212 1 3635 3634 3636 + 1213 1 3638 3637 3639 + 1214 1 3641 3640 3642 + 1215 1 3644 3643 3645 + 1216 1 3647 3646 3648 + 1217 1 3650 3649 3651 + 1218 1 3653 3652 3654 + 1219 1 3656 3655 3657 + 1220 1 3659 3658 3660 + 1221 1 3662 3661 3663 + 1222 1 3665 3664 3666 + 1223 1 3668 3667 3669 + 1224 1 3671 3670 3672 + 1225 1 3674 3673 3675 + 1226 1 3677 3676 3678 + 1227 1 3680 3679 3681 + 1228 1 3683 3682 3684 + 1229 1 3686 3685 3687 + 1230 1 3689 3688 3690 + 1231 1 3692 3691 3693 + 1232 1 3695 3694 3696 + 1233 1 3698 3697 3699 + 1234 1 3701 3700 3702 + 1235 1 3704 3703 3705 + 1236 1 3707 3706 3708 + 1237 1 3710 3709 3711 + 1238 1 3713 3712 3714 + 1239 1 3716 3715 3717 + 1240 1 3719 3718 3720 + 1241 1 3722 3721 3723 + 1242 1 3725 3724 3726 + 1243 1 3728 3727 3729 + 1244 1 3731 3730 3732 + 1245 1 3734 3733 3735 + 1246 1 3737 3736 3738 + 1247 1 3740 3739 3741 + 1248 1 3743 3742 3744 + 1249 1 3746 3745 3747 + 1250 1 3749 3748 3750 + 1251 1 3752 3751 3753 + 1252 1 3755 3754 3756 + 1253 1 3758 3757 3759 + 1254 1 3761 3760 3762 + 1255 1 3764 3763 3765 + 1256 1 3767 3766 3768 + 1257 1 3770 3769 3771 + 1258 1 3773 3772 3774 + 1259 1 3776 3775 3777 + 1260 1 3779 3778 3780 + 1261 1 3782 3781 3783 + 1262 1 3785 3784 3786 + 1263 1 3788 3787 3789 + 1264 1 3791 3790 3792 + 1265 1 3794 3793 3795 + 1266 1 3797 3796 3798 + 1267 1 3800 3799 3801 + 1268 1 3803 3802 3804 + 1269 1 3806 3805 3807 + 1270 1 3809 3808 3810 + 1271 1 3812 3811 3813 + 1272 1 3815 3814 3816 + 1273 1 3818 3817 3819 + 1274 1 3821 3820 3822 + 1275 1 3824 3823 3825 + 1276 1 3827 3826 3828 + 1277 1 3830 3829 3831 + 1278 1 3833 3832 3834 + 1279 1 3836 3835 3837 + 1280 1 3839 3838 3840 + 1281 1 3842 3841 3843 + 1282 1 3845 3844 3846 + 1283 1 3848 3847 3849 + 1284 1 3851 3850 3852 + 1285 1 3854 3853 3855 + 1286 1 3857 3856 3858 + 1287 1 3860 3859 3861 + 1288 1 3863 3862 3864 + 1289 1 3866 3865 3867 + 1290 1 3869 3868 3870 + 1291 1 3872 3871 3873 + 1292 1 3875 3874 3876 + 1293 1 3878 3877 3879 + 1294 1 3881 3880 3882 + 1295 1 3884 3883 3885 + 1296 1 3887 3886 3888 + 1297 1 3890 3889 3891 + 1298 1 3893 3892 3894 + 1299 1 3896 3895 3897 + 1300 1 3899 3898 3900 + 1301 1 3902 3901 3903 + 1302 1 3905 3904 3906 + 1303 1 3908 3907 3909 + 1304 1 3911 3910 3912 + 1305 1 3914 3913 3915 + 1306 1 3917 3916 3918 + 1307 1 3920 3919 3921 + 1308 1 3923 3922 3924 + 1309 1 3926 3925 3927 + 1310 1 3929 3928 3930 + 1311 1 3932 3931 3933 + 1312 1 3935 3934 3936 + 1313 1 3938 3937 3939 + 1314 1 3941 3940 3942 + 1315 1 3944 3943 3945 + 1316 1 3947 3946 3948 + 1317 1 3950 3949 3951 + 1318 1 3953 3952 3954 + 1319 1 3956 3955 3957 + 1320 1 3959 3958 3960 + 1321 1 3962 3961 3963 + 1322 1 3965 3964 3966 + 1323 1 3968 3967 3969 + 1324 1 3971 3970 3972 + 1325 1 3974 3973 3975 + 1326 1 3977 3976 3978 + 1327 1 3980 3979 3981 + 1328 1 3983 3982 3984 + 1329 1 3986 3985 3987 + 1330 1 3989 3988 3990 + 1331 1 3992 3991 3993 + 1332 1 3995 3994 3996 + 1333 1 3998 3997 3999 + 1334 1 4001 4000 4002 + 1335 1 4004 4003 4005 + 1336 1 4007 4006 4008 + 1337 1 4010 4009 4011 + 1338 1 4013 4012 4014 + 1339 1 4016 4015 4017 + 1340 1 4019 4018 4020 + 1341 1 4022 4021 4023 + 1342 1 4025 4024 4026 + 1343 1 4028 4027 4029 + 1344 1 4031 4030 4032 + 1345 1 4034 4033 4035 + 1346 1 4037 4036 4038 + 1347 1 4040 4039 4041 + 1348 1 4043 4042 4044 + 1349 1 4046 4045 4047 + 1350 1 4049 4048 4050 + 1351 1 4052 4051 4053 + 1352 1 4055 4054 4056 + 1353 1 4058 4057 4059 + 1354 1 4061 4060 4062 + 1355 1 4064 4063 4065 + 1356 1 4067 4066 4068 + 1357 1 4070 4069 4071 + 1358 1 4073 4072 4074 + 1359 1 4076 4075 4077 + 1360 1 4079 4078 4080 + 1361 1 4082 4081 4083 + 1362 1 4085 4084 4086 + 1363 1 4088 4087 4089 + 1364 1 4091 4090 4092 + 1365 1 4094 4093 4095 + 1366 1 4097 4096 4098 + 1367 1 4100 4099 4101 + 1368 1 4103 4102 4104 + 1369 1 4106 4105 4107 + 1370 1 4109 4108 4110 + 1371 1 4112 4111 4113 + 1372 1 4115 4114 4116 + 1373 1 4118 4117 4119 + 1374 1 4121 4120 4122 + 1375 1 4124 4123 4125 + 1376 1 4127 4126 4128 + 1377 1 4130 4129 4131 + 1378 1 4133 4132 4134 + 1379 1 4136 4135 4137 + 1380 1 4139 4138 4140 + 1381 1 4142 4141 4143 + 1382 1 4145 4144 4146 + 1383 1 4148 4147 4149 + 1384 1 4151 4150 4152 + 1385 1 4154 4153 4155 + 1386 1 4157 4156 4158 + 1387 1 4160 4159 4161 + 1388 1 4163 4162 4164 + 1389 1 4166 4165 4167 + 1390 1 4169 4168 4170 + 1391 1 4172 4171 4173 + 1392 1 4175 4174 4176 + 1393 1 4178 4177 4179 + 1394 1 4181 4180 4182 + 1395 1 4184 4183 4185 + 1396 1 4187 4186 4188 + 1397 1 4190 4189 4191 + 1398 1 4193 4192 4194 + 1399 1 4196 4195 4197 + 1400 1 4199 4198 4200 + 1401 1 4202 4201 4203 + 1402 1 4205 4204 4206 + 1403 1 4208 4207 4209 + 1404 1 4211 4210 4212 + 1405 1 4214 4213 4215 + 1406 1 4217 4216 4218 + 1407 1 4220 4219 4221 + 1408 1 4223 4222 4224 + 1409 1 4226 4225 4227 + 1410 1 4229 4228 4230 + 1411 1 4232 4231 4233 + 1412 1 4235 4234 4236 + 1413 1 4238 4237 4239 + 1414 1 4241 4240 4242 + 1415 1 4244 4243 4245 + 1416 1 4247 4246 4248 + 1417 1 4250 4249 4251 + 1418 1 4253 4252 4254 + 1419 1 4256 4255 4257 + 1420 1 4259 4258 4260 + 1421 1 4262 4261 4263 + 1422 1 4265 4264 4266 + 1423 1 4268 4267 4269 + 1424 1 4271 4270 4272 + 1425 1 4274 4273 4275 + 1426 1 4277 4276 4278 + 1427 1 4280 4279 4281 + 1428 1 4283 4282 4284 + 1429 1 4286 4285 4287 + 1430 1 4289 4288 4290 + 1431 1 4292 4291 4293 + 1432 1 4295 4294 4296 + 1433 1 4298 4297 4299 + 1434 1 4301 4300 4302 + 1435 1 4304 4303 4305 + 1436 1 4307 4306 4308 + 1437 1 4310 4309 4311 + 1438 1 4313 4312 4314 + 1439 1 4316 4315 4317 + 1440 1 4319 4318 4320 + 1441 1 4322 4321 4323 + 1442 1 4325 4324 4326 + 1443 1 4328 4327 4329 + 1444 1 4331 4330 4332 + 1445 1 4334 4333 4335 + 1446 1 4337 4336 4338 + 1447 1 4340 4339 4341 + 1448 1 4343 4342 4344 + 1449 1 4346 4345 4347 + 1450 1 4349 4348 4350 + 1451 1 4352 4351 4353 + 1452 1 4355 4354 4356 + 1453 1 4358 4357 4359 + 1454 1 4361 4360 4362 + 1455 1 4364 4363 4365 + 1456 1 4367 4366 4368 + 1457 1 4370 4369 4371 + 1458 1 4373 4372 4374 + 1459 1 4376 4375 4377 + 1460 1 4379 4378 4380 + 1461 1 4382 4381 4383 + 1462 1 4385 4384 4386 + 1463 1 4388 4387 4389 + 1464 1 4391 4390 4392 + 1465 1 4394 4393 4395 + 1466 1 4397 4396 4398 + 1467 1 4400 4399 4401 + 1468 1 4403 4402 4404 + 1469 1 4406 4405 4407 + 1470 1 4409 4408 4410 + 1471 1 4412 4411 4413 + 1472 1 4415 4414 4416 + 1473 1 4418 4417 4419 + 1474 1 4421 4420 4422 + 1475 1 4424 4423 4425 + 1476 1 4427 4426 4428 + 1477 1 4430 4429 4431 + 1478 1 4433 4432 4434 + 1479 1 4436 4435 4437 + 1480 1 4439 4438 4440 + 1481 1 4442 4441 4443 + 1482 1 4445 4444 4446 + 1483 1 4448 4447 4449 + 1484 1 4451 4450 4452 + 1485 1 4454 4453 4455 + 1486 1 4457 4456 4458 + 1487 1 4460 4459 4461 + 1488 1 4463 4462 4464 + 1489 1 4466 4465 4467 + 1490 1 4469 4468 4470 + 1491 1 4472 4471 4473 + 1492 1 4475 4474 4476 + 1493 1 4478 4477 4479 + 1494 1 4481 4480 4482 + 1495 1 4484 4483 4485 + 1496 1 4487 4486 4488 + 1497 1 4490 4489 4491 + 1498 1 4493 4492 4494 + 1499 1 4496 4495 4497 + 1500 1 4499 4498 4500 diff --git a/examples/python/funcs.py b/examples/python/funcs.py index 2f4830676e..f38aca53f2 100644 --- a/examples/python/funcs.py +++ b/examples/python/funcs.py @@ -1,9 +1,10 @@ # Python function that implements a loop of short runs # calls back to LAMMPS via "lmp" instance # lammps() must be called with ptr=lmpptr for this to work +from __future__ import print_function def loop(N,cut0,thresh,lmpptr): - print "LOOP ARGS",N,cut0,thresh,lmpptr + print("LOOP ARGS",N,cut0,thresh,lmpptr) from lammps import lammps lmp = lammps(ptr=lmpptr) natoms = lmp.get_natoms() @@ -12,11 +13,12 @@ def loop(N,cut0,thresh,lmpptr): cut = cut0 + i*0.1 lmp.set_variable("cut",cut) # set a variable in LAMMPS + lmp.command("pair_style lj/cut ${cut}") # LAMMPS command #lmp.command("pair_style lj/cut %d" % cut) # LAMMPS command option lmp.command("pair_coeff * * 1.0 1.0") # ditto lmp.command("run 10") # ditto pe = lmp.extract_compute("thermo_pe",0,0) # extract total PE from LAMMPS - print "PE",pe/natoms,thresh + print("PE",pe/natoms,thresh) if pe/natoms < thresh: return diff --git a/examples/python/in.fix_python b/examples/python/in.fix_python new file mode 100644 index 0000000000..c98029a63b --- /dev/null +++ b/examples/python/in.fix_python @@ -0,0 +1,50 @@ +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 1 box +create_atoms 1 box +mass 1 1.0 + +velocity all create 3.0 87287 + +pair_style lj/cut 2.5 +pair_coeff 1 1 1.0 1.0 2.5 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +python end_of_step_callback here """ +from __future__ import print_function +from lammps import lammps + +def end_of_step_callback(lmp): + L = lammps(ptr=lmp) + t = L.extract_global("ntimestep", 0) + print("### END OF STEP ###", t) + +def post_force_callback(lmp, v): + L = lammps(ptr=lmp) + t = L.extract_global("ntimestep", 0) + print("### POST_FORCE ###", t) +""" + +fix 1 all nve +fix 2 all python 50 end_of_step end_of_step_callback +fix 3 all python 50 post_force post_force_callback + +#dump id all atom 50 dump.melt + +#dump 2 all image 25 image.*.jpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 3 + +#dump 3 all movie 25 movie.mpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 3 + +thermo 50 +run 250 diff --git a/examples/python/in.pair_python_coulomb b/examples/python/in.pair_python_coulomb new file mode 100644 index 0000000000..7eb8599ac2 --- /dev/null +++ b/examples/python/in.pair_python_coulomb @@ -0,0 +1,42 @@ +units real +atom_style full + +read_data data.spce + +pair_style hybrid/overlay coul/cut 12.0 python 12.0 + +pair_coeff * * coul/cut +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 +fix 2 all nvt temp 300.0 300.0 100.0 + +# create combined lj/coul table for all atom types +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW -0.8472 -0.8472 +pair_write 1 2 2000 rsq 0.1 12 spce.table OW-HW -0.8472 0.4236 +pair_write 2 2 2000 rsq 0.1 12 spce.table HW-HW 0.4236 0.4236 + +# switch to tabulated potential +pair_style table linear 2000 pppm +pair_coeff 1 1 spce.table OW-OW +pair_coeff 1 2 spce.table OW-HW +pair_coeff 2 2 spce.table HW-HW + +thermo 10 +run 100 + +shell rm spce.table + diff --git a/examples/python/in.pair_python_hybrid b/examples/python/in.pair_python_hybrid new file mode 100644 index 0000000000..5d5157ae6d --- /dev/null +++ b/examples/python/in.pair_python_hybrid @@ -0,0 +1,63 @@ +# 3d Lennard-Jones hybrid + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 2 box +create_atoms 1 box +mass * 1.0 +region half block -0.1 4.9 0 10 0 10 +set region half type 2 + +velocity all create 3.0 87287 + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 + +write_data hybrid.data +write_restart hybrid.restart + +clear + +read_restart hybrid.restart + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +fix 1 all nve + +thermo 50 +run 250 + +clear + +units lj +atom_style atomic + +read_data hybrid.data + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 + +shell rm hybrid.data hybrid.restart diff --git a/examples/python/in.pair_python_long b/examples/python/in.pair_python_long new file mode 100644 index 0000000000..a600e824df --- /dev/null +++ b/examples/python/in.pair_python_long @@ -0,0 +1,38 @@ +units real +atom_style full + +read_data data.spce + +pair_style python 12.0 +pair_coeff * * py_pot.LJCutSPCE OW HW + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 +fix 2 all nvt temp 300.0 300.0 100.0 + +# create only lj/cut table for the oxygen atoms from python +shell rm -f spce.table +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW + +# switch to tabulated potential with long-range coulomb as overlay +pair_style hybrid/overlay coul/long 12.0 table linear 2000 +kspace_style pppm 1.0e-6 +pair_coeff * * coul/long +pair_coeff 1 1 table spce.table OW-OW + +thermo 10 +run 100 + +shell rm spce.table + diff --git a/examples/python/in.pair_python_melt b/examples/python/in.pair_python_melt new file mode 100644 index 0000000000..3f775e6651 --- /dev/null +++ b/examples/python/in.pair_python_melt @@ -0,0 +1,58 @@ +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 1 box +create_atoms 1 box +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 + +write_data melt.data +write_restart melt.restart + +clear + +read_restart melt.restart + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +fix 1 all nve + +thermo 50 +run 250 + +clear + +units lj +atom_style atomic + +read_data melt.data + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 + +shell rm melt.data melt.restart diff --git a/examples/python/in.pair_python_spce b/examples/python/in.pair_python_spce new file mode 100644 index 0000000000..5bd9e1e23a --- /dev/null +++ b/examples/python/in.pair_python_spce @@ -0,0 +1,28 @@ +units real +atom_style full + +read_data data.spce + +pair_style hybrid/overlay coul/long 12.0 python 12.0 +kspace_style pppm 1.0e-6 + +pair_coeff * * coul/long +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 +fix 2 all nvt temp 300.0 300.0 100.0 + +thermo 10 +run 100 diff --git a/examples/python/in.pair_python_table b/examples/python/in.pair_python_table new file mode 100644 index 0000000000..761a6efd78 --- /dev/null +++ b/examples/python/in.pair_python_table @@ -0,0 +1,32 @@ +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +region box block 0 10 0 10 0 10 +create_box 1 box +create_atoms 1 box +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.01 2.5 lj_1_1.table LJ + +pair_style table linear 2000 +pair_coeff 1 1 lj_1_1.table LJ + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 + +shell rm lj_1_1.table + diff --git a/examples/python/in.python b/examples/python/in.python index cb2013fdd3..c5aa504d43 100644 --- a/examples/python/in.python +++ b/examples/python/in.python @@ -25,13 +25,14 @@ run 10 # example of catching a syntax error python simple here """ +from __future__ import print_function + def simple(): - import exceptions - print "Inside simple function" + print("Inside simple function") try: foo += 1 - except Exception, e: - print "FOO error:",e + except Exception as e: + print("FOO error:", e) """ python simple invoke diff --git a/examples/python/log.4May17.pair_python_coulomb.1 b/examples/python/log.4May17.pair_python_coulomb.1 new file mode 100644 index 0000000000..97826eda47 --- /dev/null +++ b/examples/python/log.4May17.pair_python_coulomb.1 @@ -0,0 +1,178 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style hybrid/overlay python 12.0 coul/long 12.0 +kspace_style pppm 1.0e-6 + +pair_coeff * * coul/long +pair_coeff * * python potentials.LJCutSPCE OW NULL + +pair_modify table 0 + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# create combined lj/coul table for all atom types +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW -0.8472 -0.8472 +PPPM initialization ... +WARNING: Using polynomial approximation for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394206 + estimated relative force accuracy = 1.18714e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair python, perpetual, skip from (2) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +pair_write 1 2 2000 rsq 0.1 12 spce.table OW-HW -0.8472 0.4236 +PPPM initialization ... +WARNING: Using polynomial approximation for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394206 + estimated relative force accuracy = 1.18714e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 +pair_write 2 2 2000 rsq 0.1 12 spce.table HW-HW 0.4236 0.4236 +PPPM initialization ... +WARNING: Using polynomial approximation for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394206 + estimated relative force accuracy = 1.18714e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 + +# switch to tabulated potential +pair_style table linear 2000 pppm +pair_coeff 1 1 spce.table OW-OW +pair_coeff 1 2 spce.table OW-HW +pair_coeff 2 2 spce.table HW-HW + +thermo 10 +run 100 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394674 + estimated relative force accuracy = 1.18855e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair table, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 35.26 | 35.26 | 35.26 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -100272.97 0 -100272.97 -1282.0708 + 10 120.61568 -101350.63 0 -100272.39 -4077.5051 + 20 136.11379 -101465.43 0 -100248.65 -5136.5677 + 30 137.01602 -101455.3 0 -100230.46 -5347.8311 + 40 153.424 -101582.46 0 -100210.93 -5223.1676 + 50 167.73654 -101686.24 0 -100186.77 -4468.6687 + 60 163.11642 -101618.16 0 -100159.99 -3291.7815 + 70 169.64512 -101647.89 0 -100131.35 -2611.638 + 80 182.9979 -101737.01 0 -100101.11 -2390.6293 + 90 191.33873 -101778.71 0 -100068.24 -2239.386 + 100 194.7458 -101775.84 0 -100034.92 -1951.9128 +Loop time of 7.60221 on 1 procs for 100 steps with 4500 atoms + +Performance: 1.137 ns/day, 21.117 hours/ns, 13.154 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.7401 | 5.7401 | 5.7401 | 0.0 | 75.51 +Bond | 0.00017881 | 0.00017881 | 0.00017881 | 0.0 | 0.00 +Kspace | 1.5387 | 1.5387 | 1.5387 | 0.0 | 20.24 +Neigh | 0.2299 | 0.2299 | 0.2299 | 0.0 | 3.02 +Comm | 0.024311 | 0.024311 | 0.024311 | 0.0 | 0.32 +Output | 0.00057936 | 0.00057936 | 0.00057936 | 0.0 | 0.01 +Modify | 0.063158 | 0.063158 | 0.063158 | 0.0 | 0.83 +Other | | 0.005243 | | | 0.07 + +Nlocal: 4500 ave 4500 max 4500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 21216 ave 21216 max 21216 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.60177e+06 ave 2.60177e+06 max 2.60177e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2601766 +Ave neighs/atom = 578.17 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 + +shell rm spce.table + +Total wall time: 0:00:07 diff --git a/examples/python/log.4May17.pair_python_coulomb.g++.1 b/examples/python/log.4May17.pair_python_coulomb.g++.1 new file mode 100644 index 0000000000..b08d4b939c --- /dev/null +++ b/examples/python/log.4May17.pair_python_coulomb.g++.1 @@ -0,0 +1,138 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style hybrid/overlay coul/cut 12.0 python 12.0 + +pair_coeff * * coul/cut +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# create combined lj/coul table for all atom types +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW -0.8472 -0.8472 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair python, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +pair_write 1 2 2000 rsq 0.1 12 spce.table OW-HW -0.8472 0.4236 +pair_write 2 2 2000 rsq 0.1 12 spce.table HW-HW 0.4236 0.4236 + +# switch to tabulated potential +pair_style table linear 2000 pppm +pair_coeff 1 1 spce.table OW-OW +pair_coeff 1 2 spce.table OW-HW +pair_coeff 2 2 spce.table HW-HW + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair table, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 25.08 | 25.08 | 25.08 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -18284.922 0 -18284.922 -2080.7739 + 10 146.83806 -19552.072 0 -18239.421 -4865.31 + 20 183.15761 -18706.872 0 -17069.543 -4865.6695 + 30 205.96203 -18901.541 0 -17060.354 -4454.8634 + 40 241.62768 -18323.117 0 -16163.099 -3269.1475 + 50 265.98384 -19883.562 0 -17505.813 -2788.5194 + 60 274.01897 -21320.575 0 -18870.996 -2387.0708 + 70 288.7601 -19849.269 0 -17267.913 -1235.818 + 80 300.64724 -20958.602 0 -18270.981 -1714.7988 + 90 304.19113 -21580.4 0 -18861.099 -2144.1614 + 100 304.22027 -21239.014 0 -18519.452 -2092.6759 +Loop time of 6.01861 on 1 procs for 100 steps with 4500 atoms + +Performance: 1.436 ns/day, 16.718 hours/ns, 16.615 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.698 | 5.698 | 5.698 | 0.0 | 94.67 +Bond | 0.0001626 | 0.0001626 | 0.0001626 | 0.0 | 0.00 +Neigh | 0.23235 | 0.23235 | 0.23235 | 0.0 | 3.86 +Comm | 0.018961 | 0.018961 | 0.018961 | 0.0 | 0.32 +Output | 0.00058126 | 0.00058126 | 0.00058126 | 0.0 | 0.01 +Modify | 0.063452 | 0.063452 | 0.063452 | 0.0 | 1.05 +Other | | 0.005146 | | | 0.09 + +Nlocal: 4500 ave 4500 max 4500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 21285 ave 21285 max 21285 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.59766e+06 ave 2.59766e+06 max 2.59766e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2597662 +Ave neighs/atom = 577.258 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 + +shell rm spce.table + +Total wall time: 0:00:06 diff --git a/examples/python/log.4May17.pair_python_coulomb.g++.4 b/examples/python/log.4May17.pair_python_coulomb.g++.4 new file mode 100644 index 0000000000..b002d5c1ab --- /dev/null +++ b/examples/python/log.4May17.pair_python_coulomb.g++.4 @@ -0,0 +1,138 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style hybrid/overlay coul/cut 12.0 python 12.0 + +pair_coeff * * coul/cut +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# create combined lj/coul table for all atom types +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW -0.8472 -0.8472 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair python, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +pair_write 1 2 2000 rsq 0.1 12 spce.table OW-HW -0.8472 0.4236 +pair_write 2 2 2000 rsq 0.1 12 spce.table HW-HW 0.4236 0.4236 + +# switch to tabulated potential +pair_style table linear 2000 pppm +pair_coeff 1 1 spce.table OW-OW +pair_coeff 1 2 spce.table OW-HW +pair_coeff 2 2 spce.table HW-HW + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair table, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 9.962 | 9.963 | 9.963 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -18284.922 0 -18284.922 -2080.7739 + 10 146.83806 -19552.072 0 -18239.421 -4865.31 + 20 183.15761 -18706.872 0 -17069.543 -4865.6695 + 30 205.96203 -18901.541 0 -17060.354 -4454.8634 + 40 241.62768 -18323.117 0 -16163.099 -3269.1475 + 50 265.98384 -19883.562 0 -17505.813 -2788.5194 + 60 274.01897 -21320.575 0 -18870.996 -2387.0708 + 70 288.7601 -19849.269 0 -17267.913 -1235.818 + 80 300.64724 -20958.602 0 -18270.981 -1714.7988 + 90 304.19113 -21580.4 0 -18861.099 -2144.1614 + 100 304.22027 -21239.014 0 -18519.452 -2092.6759 +Loop time of 1.7361 on 4 procs for 100 steps with 4500 atoms + +Performance: 4.977 ns/day, 4.823 hours/ns, 57.600 timesteps/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.4424 | 1.5149 | 1.6066 | 5.3 | 87.26 +Bond | 8.9407e-05 | 0.00010258 | 0.00012374 | 0.0 | 0.01 +Neigh | 0.064205 | 0.064241 | 0.064295 | 0.0 | 3.70 +Comm | 0.023643 | 0.1155 | 0.18821 | 19.2 | 6.65 +Output | 0.00038004 | 0.00042355 | 0.00054145 | 0.0 | 0.02 +Modify | 0.037507 | 0.037787 | 0.038042 | 0.1 | 2.18 +Other | | 0.003148 | | | 0.18 + +Nlocal: 1125 ave 1162 max 1098 min +Histogram: 1 1 0 0 0 1 0 0 0 1 +Nghost: 12267.8 ave 12302 max 12238 min +Histogram: 2 0 0 0 0 0 0 0 1 1 +Neighs: 649416 ave 681458 max 630541 min +Histogram: 1 0 2 0 0 0 0 0 0 1 + +Total # of neighbors = 2597662 +Ave neighs/atom = 577.258 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 + +shell rm spce.table + +Total wall time: 0:00:01 diff --git a/examples/python/log.4May17.pair_python_hybrid.g++.1 b/examples/python/log.4May17.pair_python_hybrid.g++.1 new file mode 100644 index 0000000000..718f794a57 --- /dev/null +++ b/examples/python/log.4May17.pair_python_hybrid.g++.1 @@ -0,0 +1,250 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones hybrid + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 2 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 +region half block -0.1 4.9 0 10 0 10 +set region half type 2 + 2000 settings made for type + +velocity all create 3.0 87287 + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 4.446 | 4.446 | 4.446 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733681 0 -2.2744931 -3.7033504 + 50 1.6758903 -4.7955425 0 -2.2823355 5.670064 + 100 1.6458363 -4.7492704 0 -2.2811332 5.8691042 + 150 1.6324555 -4.7286791 0 -2.280608 5.9589514 + 200 1.6630725 -4.7750988 0 -2.2811136 5.7364886 + 250 1.6275257 -4.7224992 0 -2.281821 5.9567365 +Loop time of 10.0384 on 1 procs for 250 steps with 4000 atoms + +Performance: 10758.705 tau/day, 24.904 timesteps/s +98.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 9.913 | 9.913 | 9.913 | 0.0 | 98.75 +Neigh | 0.095569 | 0.095569 | 0.095569 | 0.0 | 0.95 +Comm | 0.012686 | 0.012686 | 0.012686 | 0.0 | 0.13 +Output | 0.00027537 | 0.00027537 | 0.00027537 | 0.0 | 0.00 +Modify | 0.01386 | 0.01386 | 0.01386 | 0.0 | 0.14 +Other | | 0.003027 | | | 0.03 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5499 ave 5499 max 5499 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 85978 ave 85978 max 85978 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 85978 +Ave neighs/atom = 21.4945 +Neighbor list builds = 12 +Dangerous builds not checked + +write_data hybrid.data +write_restart hybrid.restart + +clear + using 1 OpenMP thread(s) per MPI task + +read_restart hybrid.restart + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid + 4000 atoms + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 4.245 | 4.245 | 4.245 Mbytes +Step Temp E_pair E_mol TotEng Press + 250 1.6275257 -4.7224992 0 -2.281821 5.9567365 + 300 1.645592 -4.7496711 0 -2.2819002 5.8734193 + 350 1.6514972 -4.7580756 0 -2.2814491 5.810167 + 400 1.6540555 -4.7622999 0 -2.281837 5.8200413 + 450 1.6264734 -4.7200865 0 -2.2809863 5.9546991 + 500 1.6366891 -4.7350979 0 -2.2806781 5.9369284 +Loop time of 10.0803 on 1 procs for 250 steps with 4000 atoms + +Performance: 10713.932 tau/day, 24.801 timesteps/s +98.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 9.8479 | 9.8479 | 9.8479 | 0.0 | 97.69 +Neigh | 0.20002 | 0.20002 | 0.20002 | 0.0 | 1.98 +Comm | 0.01437 | 0.01437 | 0.01437 | 0.0 | 0.14 +Output | 0.00024033 | 0.00024033 | 0.00024033 | 0.0 | 0.00 +Modify | 0.013422 | 0.013422 | 0.013422 | 0.0 | 0.13 +Other | | 0.004348 | | | 0.04 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5472 ave 5472 max 5472 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 86930 ave 86930 max 86930 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 86930 +Ave neighs/atom = 21.7325 +Neighbor list builds = 25 +Dangerous builds = 25 + +clear + using 1 OpenMP thread(s) per MPI task + +units lj +atom_style atomic + +read_data hybrid.data + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4000 atoms + reading velocities ... + 4000 velocities + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.745 | 3.745 | 3.745 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1.6275257 -4.7224992 0 -2.281821 5.9567365 + 50 1.6454666 -4.7497515 0 -2.2821686 5.8729175 + 100 1.6512008 -4.7582693 0 -2.2820874 5.8090548 + 150 1.6537193 -4.7627023 0 -2.2827434 5.8177704 + 200 1.6258731 -4.7205017 0 -2.2823017 5.952511 + 250 1.6370862 -4.7373176 0 -2.2823022 5.925807 +Loop time of 9.93686 on 1 procs for 250 steps with 4000 atoms + +Performance: 10868.626 tau/day, 25.159 timesteps/s +98.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 9.8119 | 9.8119 | 9.8119 | 0.0 | 98.74 +Neigh | 0.096041 | 0.096041 | 0.096041 | 0.0 | 0.97 +Comm | 0.01243 | 0.01243 | 0.01243 | 0.0 | 0.13 +Output | 0.00028133 | 0.00028133 | 0.00028133 | 0.0 | 0.00 +Modify | 0.013261 | 0.013261 | 0.013261 | 0.0 | 0.13 +Other | | 0.002994 | | | 0.03 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5487 ave 5487 max 5487 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 86831 ave 86831 max 86831 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 86831 +Ave neighs/atom = 21.7078 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm hybrid.data hybrid.restart +Total wall time: 0:00:30 diff --git a/examples/python/log.4May17.pair_python_hybrid.g++.4 b/examples/python/log.4May17.pair_python_hybrid.g++.4 new file mode 100644 index 0000000000..32d9fc1740 --- /dev/null +++ b/examples/python/log.4May17.pair_python_hybrid.g++.4 @@ -0,0 +1,250 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones hybrid + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 2 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 +region half block -0.1 4.9 0 10 0 10 +set region half type 2 + 2000 settings made for type + +velocity all create 3.0 87287 + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.953 | 3.953 | 3.953 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733681 0 -2.2744931 -3.7033504 + 50 1.6754119 -4.7947589 0 -2.2822693 5.6615925 + 100 1.6503357 -4.756014 0 -2.2811293 5.8050524 + 150 1.6596605 -4.7699432 0 -2.2810749 5.7830138 + 200 1.6371874 -4.7365462 0 -2.2813789 5.9246674 + 250 1.6323462 -4.7292021 0 -2.2812949 5.9762238 +Loop time of 2.71748 on 4 procs for 250 steps with 4000 atoms + +Performance: 39742.745 tau/day, 91.997 timesteps/s +98.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.4777 | 2.5639 | 2.6253 | 3.9 | 94.35 +Neigh | 0.024626 | 0.025331 | 0.02598 | 0.3 | 0.93 +Comm | 0.061933 | 0.12297 | 0.20987 | 18.0 | 4.53 +Output | 0.00026131 | 0.00027591 | 0.00031352 | 0.0 | 0.01 +Modify | 0.0036087 | 0.0036573 | 0.0037553 | 0.1 | 0.13 +Other | | 0.001337 | | | 0.05 + +Nlocal: 1000 ave 1010 max 982 min +Histogram: 1 0 0 0 0 0 1 0 0 2 +Nghost: 2703.75 ave 2713 max 2689 min +Histogram: 1 0 0 0 0 0 0 2 0 1 +Neighs: 21469.8 ave 22167 max 20546 min +Histogram: 1 0 0 0 0 1 1 0 0 1 + +Total # of neighbors = 85879 +Ave neighs/atom = 21.4698 +Neighbor list builds = 12 +Dangerous builds not checked + +write_data hybrid.data +write_restart hybrid.restart + +clear + using 1 OpenMP thread(s) per MPI task + +read_restart hybrid.restart + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid + 4000 atoms + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.612 | 3.612 | 3.612 Mbytes +Step Temp E_pair E_mol TotEng Press + 250 1.6323462 -4.7292062 0 -2.2812991 5.9762168 + 300 1.6451788 -4.7488091 0 -2.2816578 5.8375485 + 350 1.6171909 -4.7064928 0 -2.2813129 6.0094235 + 400 1.6388136 -4.7387093 0 -2.2811035 5.9331084 + 450 1.6431295 -4.7452215 0 -2.2811435 5.8929898 + 500 1.643316 -4.7454222 0 -2.2810644 5.8454817 +Loop time of 2.75827 on 4 procs for 250 steps with 4000 atoms + +Performance: 39155.038 tau/day, 90.637 timesteps/s +98.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.3631 | 2.5412 | 2.6672 | 7.2 | 92.13 +Neigh | 0.050358 | 0.052316 | 0.053312 | 0.5 | 1.90 +Comm | 0.032793 | 0.15893 | 0.33904 | 29.1 | 5.76 +Output | 0.00018525 | 0.00020212 | 0.00024509 | 0.0 | 0.01 +Modify | 0.0034482 | 0.0035321 | 0.0036578 | 0.1 | 0.13 +Other | | 0.002039 | | | 0.07 + +Nlocal: 1000 ave 1012 max 983 min +Histogram: 1 0 0 0 0 0 2 0 0 1 +Nghost: 2699 ave 2706 max 2693 min +Histogram: 1 1 0 0 0 0 1 0 0 1 +Neighs: 21802 ave 22700 max 21236 min +Histogram: 1 1 0 1 0 0 0 0 0 1 + +Total # of neighbors = 87208 +Ave neighs/atom = 21.802 +Neighbor list builds = 25 +Dangerous builds = 25 + +clear + using 1 OpenMP thread(s) per MPI task + +units lj +atom_style atomic + +read_data hybrid.data + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 4000 atoms + reading velocities ... + 4000 velocities + +pair_style hybrid lj/cut 2.5 python 2.5 +pair_coeff * * python py_pot.LJCutMelt lj NULL +pair_coeff * 2 lj/cut 1.0 1.0 + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair lj/cut, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (2) pair python, perpetual, skip from (3) + attributes: half, newton on + pair build: skip + stencil: none + bin: none + (3) neighbor class addition, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.112 | 3.112 | 3.112 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1.6323462 -4.7292062 0 -2.2812991 5.9762168 + 50 1.6450626 -4.7488948 0 -2.2819177 5.8370409 + 100 1.6169004 -4.7066969 0 -2.2819526 6.0082546 + 150 1.6384234 -4.7389689 0 -2.2819482 5.9315273 + 200 1.6428814 -4.7460743 0 -2.2823683 5.8888228 + 250 1.6432631 -4.7466603 0 -2.2823818 5.8398819 +Loop time of 2.71936 on 4 procs for 250 steps with 4000 atoms + +Performance: 39715.257 tau/day, 91.933 timesteps/s +98.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.3769 | 2.5432 | 2.6447 | 6.6 | 93.52 +Neigh | 0.024088 | 0.025093 | 0.025748 | 0.4 | 0.92 +Comm | 0.044614 | 0.14598 | 0.31339 | 27.5 | 5.37 +Output | 0.00026488 | 0.00028872 | 0.00034189 | 0.0 | 0.01 +Modify | 0.0034099 | 0.0035709 | 0.0036535 | 0.2 | 0.13 +Other | | 0.001215 | | | 0.04 + +Nlocal: 1000 ave 1013 max 989 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Nghost: 2695.5 ave 2706 max 2682 min +Histogram: 1 0 0 0 0 0 2 0 0 1 +Neighs: 21792 ave 22490 max 21457 min +Histogram: 2 0 1 0 0 0 0 0 0 1 + +Total # of neighbors = 87168 +Ave neighs/atom = 21.792 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm hybrid.data hybrid.restart +Total wall time: 0:00:08 diff --git a/examples/python/log.4May17.pair_python_long.g++.1 b/examples/python/log.4May17.pair_python_long.g++.1 new file mode 100644 index 0000000000..e2d7cf1bde --- /dev/null +++ b/examples/python/log.4May17.pair_python_long.g++.1 @@ -0,0 +1,146 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style python 12.0 +pair_coeff * * py_pot.LJCutSPCE OW HW + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# create only lj/cut table for the oxygen atoms from python +shell rm -f spce.table +WARNING: Shell command 'rm' failed with error 'No such file or directory' (../input.cpp:1285) +WARNING: Shell command 'rm' failed with error 'No such file or directory' (../input.cpp:1285) +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + +# switch to tabulated potential with long-range coulomb as overlay +pair_style hybrid/overlay coul/long 12.0 table linear 2000 +kspace_style pppm 1.0e-6 +pair_coeff * * coul/long +pair_coeff 1 1 table spce.table OW-OW + +thermo 10 +run 100 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394674 + estimated relative force accuracy = 1.18855e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair table, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 36.47 | 36.47 | 36.47 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16690.032 0 -16690.032 -1268.9538 + 10 120.58553 -17767.504 0 -16689.536 -4063.8589 + 20 136.11736 -17882.557 0 -16665.742 -5124.6758 + 30 137.00764 -17872.318 0 -16647.545 -5337.2022 + 40 153.38868 -17999.269 0 -16628.059 -5213.6001 + 50 167.70342 -18103.06 0 -16603.883 -4460.6632 + 60 163.07134 -18034.856 0 -16577.088 -3285.0037 + 70 169.59286 -18064.636 0 -16548.57 -2606.407 + 80 182.92893 -18153.499 0 -16518.215 -2385.5152 + 90 191.2793 -18195.356 0 -16485.425 -2235.3701 + 100 194.68587 -18192.458 0 -16452.073 -1948.3746 +Loop time of 7.90705 on 1 procs for 100 steps with 4500 atoms + +Performance: 1.093 ns/day, 21.964 hours/ns, 12.647 timesteps/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.0343 | 6.0343 | 6.0343 | 0.0 | 76.32 +Bond | 0.00019622 | 0.00019622 | 0.00019622 | 0.0 | 0.00 +Kspace | 1.5311 | 1.5311 | 1.5311 | 0.0 | 19.36 +Neigh | 0.246 | 0.246 | 0.246 | 0.0 | 3.11 +Comm | 0.023937 | 0.023937 | 0.023937 | 0.0 | 0.30 +Output | 0.00060368 | 0.00060368 | 0.00060368 | 0.0 | 0.01 +Modify | 0.065543 | 0.065543 | 0.065543 | 0.0 | 0.83 +Other | | 0.005364 | | | 0.07 + +Nlocal: 4500 ave 4500 max 4500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 21216 ave 21216 max 21216 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.60177e+06 ave 2.60177e+06 max 2.60177e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2601769 +Ave neighs/atom = 578.171 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 + +shell rm spce.table + +Total wall time: 0:00:08 diff --git a/examples/python/log.4May17.pair_python_long.g++.4 b/examples/python/log.4May17.pair_python_long.g++.4 new file mode 100644 index 0000000000..35347da713 --- /dev/null +++ b/examples/python/log.4May17.pair_python_long.g++.4 @@ -0,0 +1,146 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style python 12.0 +pair_coeff * * py_pot.LJCutSPCE OW HW + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# create only lj/cut table for the oxygen atoms from python +shell rm -f spce.table +WARNING: Shell command 'rm' failed with error 'No such file or directory' (../input.cpp:1285) +WARNING: Shell command 'rm' failed with error 'No such file or directory' (../input.cpp:1285) +pair_write 1 1 2000 rsq 0.1 12 spce.table OW-OW +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + +# switch to tabulated potential with long-range coulomb as overlay +pair_style hybrid/overlay coul/long 12.0 table linear 2000 +kspace_style pppm 1.0e-6 +pair_coeff * * coul/long +pair_coeff 1 1 table spce.table OW-OW + +thermo 10 +run 100 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394674 + estimated relative force accuracy = 1.18855e-06 + using double precision FFTs + 3d grid and FFT values/proc = 34263 16000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair table, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 13.45 | 13.45 | 13.45 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16690.032 0 -16690.032 -1268.9538 + 10 120.58553 -17767.504 0 -16689.536 -4063.8589 + 20 136.11736 -17882.557 0 -16665.742 -5124.6758 + 30 137.00764 -17872.318 0 -16647.545 -5337.2022 + 40 153.38868 -17999.269 0 -16628.059 -5213.6001 + 50 167.70342 -18103.06 0 -16603.883 -4460.6632 + 60 163.07134 -18034.856 0 -16577.088 -3285.0037 + 70 169.59286 -18064.636 0 -16548.57 -2606.407 + 80 182.92893 -18153.499 0 -16518.215 -2385.5152 + 90 191.2793 -18195.356 0 -16485.425 -2235.3701 + 100 194.68587 -18192.458 0 -16452.073 -1948.3746 +Loop time of 2.36748 on 4 procs for 100 steps with 4500 atoms + +Performance: 3.649 ns/day, 6.576 hours/ns, 42.239 timesteps/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.5309 | 1.5977 | 1.6926 | 4.7 | 67.49 +Bond | 9.9182e-05 | 0.00012749 | 0.00016403 | 0.0 | 0.01 +Kspace | 0.52158 | 0.61232 | 0.67676 | 7.3 | 25.86 +Neigh | 0.066937 | 0.06702 | 0.067093 | 0.0 | 2.83 +Comm | 0.035882 | 0.039862 | 0.042244 | 1.2 | 1.68 +Output | 0.0004003 | 0.00044602 | 0.00057578 | 0.0 | 0.02 +Modify | 0.046088 | 0.046227 | 0.046315 | 0.0 | 1.95 +Other | | 0.003775 | | | 0.16 + +Nlocal: 1125 ave 1154 max 1092 min +Histogram: 1 0 0 0 1 0 0 1 0 1 +Nghost: 12256.2 ave 12296 max 12213 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 650442 ave 678831 max 626373 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 2601769 +Ave neighs/atom = 578.171 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 + +shell rm spce.table + +Total wall time: 0:00:02 diff --git a/examples/python/log.4May17.pair_python_melt.g++.1 b/examples/python/log.4May17.pair_python_melt.g++.1 new file mode 100644 index 0000000000..d234ce93b5 --- /dev/null +++ b/examples/python/log.4May17.pair_python_melt.g++.1 @@ -0,0 +1,214 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 1 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.184 | 3.184 | 3.184 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733681 0 -2.2744931 -3.7033504 + 50 1.6758903 -4.7955425 0 -2.2823355 5.670064 + 100 1.6458363 -4.7492704 0 -2.2811332 5.8691042 + 150 1.6324555 -4.7286791 0 -2.280608 5.9589514 + 200 1.6630725 -4.7750988 0 -2.2811136 5.7364886 + 250 1.6275257 -4.7224992 0 -2.281821 5.9567365 +Loop time of 20.9283 on 1 procs for 250 steps with 4000 atoms + +Performance: 5160.475 tau/day, 11.946 timesteps/s +98.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 20.809 | 20.809 | 20.809 | 0.0 | 99.43 +Neigh | 0.088638 | 0.088638 | 0.088638 | 0.0 | 0.42 +Comm | 0.013424 | 0.013424 | 0.013424 | 0.0 | 0.06 +Output | 0.0002737 | 0.0002737 | 0.0002737 | 0.0 | 0.00 +Modify | 0.014334 | 0.014334 | 0.014334 | 0.0 | 0.07 +Other | | 0.003089 | | | 0.01 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5499 ave 5499 max 5499 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 151513 ave 151513 max 151513 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 151513 +Ave neighs/atom = 37.8783 +Neighbor list builds = 12 +Dangerous builds not checked + +write_data melt.data +write_restart melt.restart + +clear + using 1 OpenMP thread(s) per MPI task + +read_restart melt.restart + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid + 4000 atoms + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.36 | 3.36 | 3.36 Mbytes +Step Temp E_pair E_mol TotEng Press + 250 1.6275257 -4.7224992 0 -2.281821 5.9567365 + 300 1.645592 -4.7496711 0 -2.2819002 5.8734193 + 350 1.6514972 -4.7580756 0 -2.2814491 5.810167 + 400 1.6540555 -4.7622999 0 -2.281837 5.8200413 + 450 1.6264734 -4.7200865 0 -2.2809863 5.9546991 + 500 1.6366891 -4.7350979 0 -2.2806781 5.9369284 +Loop time of 21.1422 on 1 procs for 250 steps with 4000 atoms + +Performance: 5108.279 tau/day, 11.825 timesteps/s +98.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 20.925 | 20.925 | 20.925 | 0.0 | 98.97 +Neigh | 0.18452 | 0.18452 | 0.18452 | 0.0 | 0.87 +Comm | 0.014836 | 0.014836 | 0.014836 | 0.0 | 0.07 +Output | 0.00027108 | 0.00027108 | 0.00027108 | 0.0 | 0.00 +Modify | 0.01366 | 0.01366 | 0.01366 | 0.0 | 0.06 +Other | | 0.004355 | | | 0.02 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5472 ave 5472 max 5472 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 151513 ave 151513 max 151513 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 151513 +Ave neighs/atom = 37.8783 +Neighbor list builds = 25 +Dangerous builds = 25 + +clear + using 1 OpenMP thread(s) per MPI task + +units lj +atom_style atomic + +read_data melt.data + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4000 atoms + reading velocities ... + 4000 velocities + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.86 | 2.86 | 2.86 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1.6275257 -4.7224992 0 -2.281821 5.9567365 + 50 1.6454666 -4.7497515 0 -2.2821686 5.8729175 + 100 1.6512008 -4.7582693 0 -2.2820874 5.8090548 + 150 1.6537193 -4.7627023 0 -2.2827434 5.8177704 + 200 1.6258731 -4.7205017 0 -2.2823017 5.952511 + 250 1.6370862 -4.7373176 0 -2.2823022 5.925807 +Loop time of 21.1026 on 1 procs for 250 steps with 4000 atoms + +Performance: 5117.845 tau/day, 11.847 timesteps/s +98.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 20.984 | 20.984 | 20.984 | 0.0 | 99.44 +Neigh | 0.088639 | 0.088639 | 0.088639 | 0.0 | 0.42 +Comm | 0.012881 | 0.012881 | 0.012881 | 0.0 | 0.06 +Output | 0.00028563 | 0.00028563 | 0.00028563 | 0.0 | 0.00 +Modify | 0.013523 | 0.013523 | 0.013523 | 0.0 | 0.06 +Other | | 0.003033 | | | 0.01 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5487 ave 5487 max 5487 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 151490 ave 151490 max 151490 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 151490 +Ave neighs/atom = 37.8725 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm melt.data melt.restart +Total wall time: 0:01:05 diff --git a/examples/python/log.4May17.pair_python_melt.g++.4 b/examples/python/log.4May17.pair_python_melt.g++.4 new file mode 100644 index 0000000000..58dae340bd --- /dev/null +++ b/examples/python/log.4May17.pair_python_melt.g++.4 @@ -0,0 +1,214 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 1 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.69 | 2.69 | 2.69 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733681 0 -2.2744931 -3.7033504 + 50 1.6754119 -4.7947589 0 -2.2822693 5.6615925 + 100 1.6503357 -4.756014 0 -2.2811293 5.8050524 + 150 1.6596605 -4.7699432 0 -2.2810749 5.7830138 + 200 1.6371874 -4.7365462 0 -2.2813789 5.9246674 + 250 1.6323462 -4.7292021 0 -2.2812949 5.9762238 +Loop time of 5.65922 on 4 procs for 250 steps with 4000 atoms + +Performance: 19083.895 tau/day, 44.176 timesteps/s +98.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.4529 | 5.5207 | 5.5575 | 1.7 | 97.55 +Neigh | 0.023164 | 0.023376 | 0.023883 | 0.2 | 0.41 +Comm | 0.073318 | 0.1099 | 0.17804 | 12.2 | 1.94 +Output | 0.00023365 | 0.00026143 | 0.00030684 | 0.0 | 0.00 +Modify | 0.0036483 | 0.0037143 | 0.003896 | 0.2 | 0.07 +Other | | 0.001274 | | | 0.02 + +Nlocal: 1000 ave 1010 max 982 min +Histogram: 1 0 0 0 0 0 1 0 0 2 +Nghost: 2703.75 ave 2713 max 2689 min +Histogram: 1 0 0 0 0 0 0 2 0 1 +Neighs: 37915.5 ave 39239 max 36193 min +Histogram: 1 0 0 0 0 1 1 0 0 1 + +Total # of neighbors = 151662 +Ave neighs/atom = 37.9155 +Neighbor list builds = 12 +Dangerous builds not checked + +write_data melt.data +write_restart melt.restart + +clear + using 1 OpenMP thread(s) per MPI task + +read_restart melt.restart + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid + 4000 atoms + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.815 | 2.816 | 2.816 Mbytes +Step Temp E_pair E_mol TotEng Press + 250 1.6323462 -4.7292062 0 -2.2812991 5.9762168 + 300 1.6451788 -4.7488091 0 -2.2816578 5.8375485 + 350 1.6171909 -4.7064928 0 -2.2813129 6.0094235 + 400 1.6388136 -4.7387093 0 -2.2811035 5.9331084 + 450 1.6431295 -4.7452215 0 -2.2811435 5.8929898 + 500 1.643316 -4.7454222 0 -2.2810644 5.8454817 +Loop time of 5.70169 on 4 procs for 250 steps with 4000 atoms + +Performance: 18941.760 tau/day, 43.847 timesteps/s +98.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.3919 | 5.4905 | 5.6136 | 3.7 | 96.30 +Neigh | 0.046791 | 0.047817 | 0.048795 | 0.3 | 0.84 +Comm | 0.034221 | 0.1575 | 0.25635 | 22.1 | 2.76 +Output | 0.00020409 | 0.00023448 | 0.00026131 | 0.0 | 0.00 +Modify | 0.0035028 | 0.0035674 | 0.0036926 | 0.1 | 0.06 +Other | | 0.002079 | | | 0.04 + +Nlocal: 1000 ave 1012 max 983 min +Histogram: 1 0 0 0 0 0 2 0 0 1 +Nghost: 2699 ave 2706 max 2693 min +Histogram: 1 1 0 0 0 0 1 0 0 1 +Neighs: 37930.8 ave 39292 max 36264 min +Histogram: 1 0 0 0 1 0 0 1 0 1 + +Total # of neighbors = 151723 +Ave neighs/atom = 37.9308 +Neighbor list builds = 25 +Dangerous builds = 25 + +clear + using 1 OpenMP thread(s) per MPI task + +units lj +atom_style atomic + +read_data melt.data + orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 4000 atoms + reading velocities ... + 4000 velocities + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.315 | 2.316 | 2.316 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 1.6323462 -4.7292062 0 -2.2812991 5.9762168 + 50 1.6450626 -4.7488948 0 -2.2819177 5.8370409 + 100 1.6169004 -4.7066969 0 -2.2819526 6.0082546 + 150 1.6384234 -4.7389689 0 -2.2819482 5.9315273 + 200 1.6428814 -4.7460743 0 -2.2823683 5.8888228 + 250 1.6432631 -4.7466603 0 -2.2823818 5.8398819 +Loop time of 5.69568 on 4 procs for 250 steps with 4000 atoms + +Performance: 18961.751 tau/day, 43.893 timesteps/s +98.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.4041 | 5.5245 | 5.6139 | 3.2 | 96.99 +Neigh | 0.022658 | 0.022986 | 0.023398 | 0.2 | 0.40 +Comm | 0.053521 | 0.14309 | 0.26385 | 20.2 | 2.51 +Output | 0.00027037 | 0.00029504 | 0.00033665 | 0.0 | 0.01 +Modify | 0.0035288 | 0.0035585 | 0.0035827 | 0.0 | 0.06 +Other | | 0.001275 | | | 0.02 + +Nlocal: 1000 ave 1013 max 989 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Nghost: 2695.5 ave 2706 max 2682 min +Histogram: 1 0 0 0 0 0 2 0 0 1 +Neighs: 37927.2 ave 39002 max 36400 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 151709 +Ave neighs/atom = 37.9273 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm melt.data melt.restart +Total wall time: 0:00:17 diff --git a/examples/python/log.4May17.pair_python_spce.g++.1 b/examples/python/log.4May17.pair_python_spce.g++.1 new file mode 100644 index 0000000000..540c06853f --- /dev/null +++ b/examples/python/log.4May17.pair_python_spce.g++.1 @@ -0,0 +1,122 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style hybrid/overlay coul/long 12.0 python 12.0 +kspace_style pppm 1.0e-6 + +pair_coeff * * coul/long +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +thermo 10 +run 100 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394674 + estimated relative force accuracy = 1.18855e-06 + using double precision FFTs + 3d grid and FFT values/proc = 103823 64000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair python, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 41.05 | 41.05 | 41.05 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.369 0 -16692.369 -1289.222 + 10 120.56861 -17769.719 0 -16691.902 -4082.7098 + 20 136.08014 -17884.591 0 -16668.109 -5140.7824 + 30 136.97316 -17874.351 0 -16649.887 -5351.3571 + 40 153.37285 -18001.493 0 -16630.424 -5227.0601 + 50 167.70414 -18105.435 0 -16606.252 -4473.2089 + 60 163.08253 -18037.29 0 -16579.422 -3295.8963 + 70 169.60395 -18067.078 0 -16550.912 -2615.7026 + 80 182.94811 -18155.978 0 -16520.523 -2393.3156 + 90 191.29902 -18197.887 0 -16487.779 -2242.7104 + 100 194.70949 -18195.021 0 -16454.425 -1955.2916 +Loop time of 23.5385 on 1 procs for 100 steps with 4500 atoms + +Performance: 0.367 ns/day, 65.385 hours/ns, 4.248 timesteps/s +98.9% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 21.642 | 21.642 | 21.642 | 0.0 | 91.94 +Bond | 0.00021696 | 0.00021696 | 0.00021696 | 0.0 | 0.00 +Kspace | 1.5436 | 1.5436 | 1.5436 | 0.0 | 6.56 +Neigh | 0.25623 | 0.25623 | 0.25623 | 0.0 | 1.09 +Comm | 0.024325 | 0.024325 | 0.024325 | 0.0 | 0.10 +Output | 0.00064301 | 0.00064301 | 0.00064301 | 0.0 | 0.00 +Modify | 0.065919 | 0.065919 | 0.065919 | 0.0 | 0.28 +Other | | 0.005401 | | | 0.02 + +Nlocal: 4500 ave 4500 max 4500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 21216 ave 21216 max 21216 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.60176e+06 ave 2.60176e+06 max 2.60176e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2601762 +Ave neighs/atom = 578.169 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 +Total wall time: 0:00:24 diff --git a/examples/python/log.4May17.pair_python_spce.g++.4 b/examples/python/log.4May17.pair_python_spce.g++.4 new file mode 100644 index 0000000000..332c079ec1 --- /dev/null +++ b/examples/python/log.4May17.pair_python_spce.g++.4 @@ -0,0 +1,122 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style hybrid/overlay coul/long 12.0 python 12.0 +kspace_style pppm 1.0e-6 + +pair_coeff * * coul/long +pair_coeff * * python py_pot.LJCutSPCE OW NULL + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +thermo 10 +run 100 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.279652 + grid = 40 40 40 + stencil order = 5 + estimated absolute RMS force accuracy = 0.000394674 + estimated relative force accuracy = 1.18855e-06 + using double precision FFTs + 3d grid and FFT values/proc = 34263 16000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair python, perpetual, skip from (1) + attributes: half, newton on + pair build: skip + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 14.59 | 14.59 | 14.59 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.369 0 -16692.369 -1289.222 + 10 120.56861 -17769.719 0 -16691.902 -4082.7098 + 20 136.08014 -17884.591 0 -16668.109 -5140.7824 + 30 136.97316 -17874.351 0 -16649.887 -5351.3571 + 40 153.37285 -18001.493 0 -16630.424 -5227.0601 + 50 167.70414 -18105.435 0 -16606.252 -4473.2089 + 60 163.08253 -18037.29 0 -16579.422 -3295.8963 + 70 169.60395 -18067.078 0 -16550.912 -2615.7026 + 80 182.94811 -18155.978 0 -16520.523 -2393.3156 + 90 191.29902 -18197.887 0 -16487.779 -2242.7104 + 100 194.70949 -18195.021 0 -16454.425 -1955.2916 +Loop time of 6.4942 on 4 procs for 100 steps with 4500 atoms + +Performance: 1.330 ns/day, 18.039 hours/ns, 15.398 timesteps/s +98.7% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.4084 | 5.572 | 5.8013 | 7.2 | 85.80 +Bond | 0.00012994 | 0.0001421 | 0.00016356 | 0.0 | 0.00 +Kspace | 0.52942 | 0.75773 | 0.92078 | 19.5 | 11.67 +Neigh | 0.071055 | 0.07116 | 0.071278 | 0.0 | 1.10 +Comm | 0.040311 | 0.041255 | 0.041817 | 0.3 | 0.64 +Output | 0.00040603 | 0.00048071 | 0.00058675 | 0.0 | 0.01 +Modify | 0.047507 | 0.047629 | 0.047772 | 0.1 | 0.73 +Other | | 0.003771 | | | 0.06 + +Nlocal: 1125 ave 1154 max 1092 min +Histogram: 1 0 0 0 1 0 0 1 0 1 +Nghost: 12256.2 ave 12296 max 12213 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 650440 ave 678828 max 626375 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 2601762 +Ave neighs/atom = 578.169 +Ave special neighs/atom = 2 +Neighbor list builds = 3 +Dangerous builds = 0 +Total wall time: 0:00:06 diff --git a/examples/python/log.4May17.pair_python_table.g++.1 b/examples/python/log.4May17.pair_python_table.g++.1 new file mode 100644 index 0000000000..c594a8e90a --- /dev/null +++ b/examples/python/log.4May17.pair_python_table.g++.1 @@ -0,0 +1,99 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 1 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.01 2.5 lj_1_1.table LJ +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + +pair_style table linear 2000 +pair_coeff 1 1 lj_1_1.table LJ +WARNING: 2 of 2000 force values in table are inconsistent with -dE/dr. + Should only be flagged at inflection points (../pair_table.cpp:476) + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair table, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.184 | 3.184 | 3.184 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733629 0 -2.2744879 -3.7032813 + 50 1.6758731 -4.7953067 0 -2.2821255 5.6706553 + 100 1.6458118 -4.7490281 0 -2.2809276 5.8697466 + 150 1.632425 -4.7284533 0 -2.2804279 5.9595684 + 200 1.6631578 -4.7749889 0 -2.2808759 5.7365839 + 250 1.6277062 -4.7224727 0 -2.2815238 5.9572913 +Loop time of 0.996739 on 1 procs for 250 steps with 4000 atoms + +Performance: 108353.298 tau/day, 250.818 timesteps/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.87985 | 0.87985 | 0.87985 | 0.0 | 88.27 +Neigh | 0.08799 | 0.08799 | 0.08799 | 0.0 | 8.83 +Comm | 0.012301 | 0.012301 | 0.012301 | 0.0 | 1.23 +Output | 0.00013161 | 0.00013161 | 0.00013161 | 0.0 | 0.01 +Modify | 0.013656 | 0.013656 | 0.013656 | 0.0 | 1.37 +Other | | 0.002808 | | | 0.28 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5500 ave 5500 max 5500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 151496 ave 151496 max 151496 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 151496 +Ave neighs/atom = 37.874 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm lj_1_1.table + +Total wall time: 0:00:01 diff --git a/examples/python/log.4May17.pair_python_table.g++.4 b/examples/python/log.4May17.pair_python_table.g++.4 new file mode 100644 index 0000000000..e509fc7f6a --- /dev/null +++ b/examples/python/log.4May17.pair_python_table.g++.4 @@ -0,0 +1,99 @@ +LAMMPS (4 May 2017) + using 1 OpenMP thread(s) per MPI task +# 3d Lennard-Jones melt + +units lj +atom_style atomic + +lattice fcc 0.8442 +Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 +region box block 0 10 0 10 0 10 +create_box 1 box +Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box +Created 4000 atoms +mass * 1.0 + +velocity all create 3.0 87287 + +pair_style python 2.5 +pair_coeff * * py_pot.LJCutMelt lj + +# generate tabulated potential from python variant +pair_write 1 1 2000 rsq 0.01 2.5 lj_1_1.table LJ +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair python, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + +pair_style table linear 2000 +pair_coeff 1 1 lj_1_1.table LJ +WARNING: 2 of 2000 force values in table are inconsistent with -dE/dr. + Should only be flagged at inflection points (../pair_table.cpp:476) + +neighbor 0.3 bin +neigh_modify every 20 delay 0 check no + +fix 1 all nve + +thermo 50 +run 250 +Neighbor list info ... + update every 20 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 12 12 12 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair table, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.69 | 2.69 | 2.69 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 3 -6.7733629 0 -2.2744879 -3.7032813 + 50 1.675395 -4.7945736 0 -2.2821094 5.6620623 + 100 1.6503067 -4.7558145 0 -2.2809733 5.8055967 + 150 1.6595852 -4.7697199 0 -2.2809644 5.7837898 + 200 1.6371471 -4.7363942 0 -2.2812874 5.924977 + 250 1.6315623 -4.7278268 0 -2.2810951 5.9807196 +Loop time of 0.291846 on 4 procs for 250 steps with 4000 atoms + +Performance: 370058.286 tau/day, 856.616 timesteps/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.22586 | 0.23364 | 0.24085 | 1.3 | 80.06 +Neigh | 0.022808 | 0.023235 | 0.023602 | 0.2 | 7.96 +Comm | 0.022573 | 0.030065 | 0.038092 | 3.9 | 10.30 +Output | 0.00013423 | 0.00014067 | 0.00015759 | 0.0 | 0.05 +Modify | 0.0035079 | 0.0035501 | 0.0036008 | 0.1 | 1.22 +Other | | 0.001211 | | | 0.42 + +Nlocal: 1000 ave 1010 max 981 min +Histogram: 1 0 0 0 0 0 1 0 0 2 +Nghost: 2703 ave 2715 max 2688 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Neighs: 37915.2 ave 39191 max 36151 min +Histogram: 1 0 0 0 0 1 0 1 0 1 + +Total # of neighbors = 151661 +Ave neighs/atom = 37.9153 +Neighbor list builds = 12 +Dangerous builds not checked + +shell rm lj_1_1.table + +Total wall time: 0:00:00 diff --git a/examples/python/py_pot.py b/examples/python/py_pot.py new file mode 100644 index 0000000000..5699bd082c --- /dev/null +++ b/examples/python/py_pot.py @@ -0,0 +1,65 @@ +from __future__ import print_function + +class LAMMPSPairPotential(object): + def __init__(self): + self.pmap=dict() + self.units='lj' + def map_coeff(self,name,ltype): + self.pmap[ltype]=name + def check_units(self,units): + if (units != self.units): + raise Exception("Conflicting units: %s vs. %s" % (self.units,units)) + +class LJCutMelt(LAMMPSPairPotential): + def __init__(self): + super(LJCutMelt,self).__init__() + # set coeffs: 48*eps*sig**12, 24*eps*sig**6, + # 4*eps*sig**12, 4*eps*sig**6 + self.units = 'lj' + self.coeff = {'lj' : {'lj' : (48.0,24.0,4.0,4.0)}} + + def compute_force(self,rsq,itype,jtype): + coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj1 = coeff[0] + lj2 = coeff[1] + return (r6inv * (lj1*r6inv - lj2))*r2inv + + def compute_energy(self,rsq,itype,jtype): + coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj3 = coeff[2] + lj4 = coeff[3] + return (r6inv * (lj3*r6inv - lj4)) + + +class LJCutSPCE(LAMMPSPairPotential): + def __init__(self): + super(LJCutSPCE,self).__init__() + self.units='real' + # SPCE oxygen LJ parameters in real units + eps=0.15535 + sig=3.166 + self.coeff = {'OW' : {'OW' : (48.0*eps*sig**12,24.0*eps*sig**6, + 4.0*eps*sig**12, 4.0*eps*sig**6), + 'HW' : (0.0,0.0, 0.0,0.0)}, + 'HW' : {'OW' : (0.0,0.0, 0.0,0.0), + 'HW' : (0.0,0.0, 0.0,0.0)}} + + def compute_force(self,rsq,itype,jtype): + coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj1 = coeff[0] + lj2 = coeff[1] + return (r6inv * (lj1*r6inv - lj2))*r2inv + + def compute_energy(self,rsq,itype,jtype): + coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] + r2inv = 1.0/rsq + r6inv = r2inv*r2inv*r2inv + lj3 = coeff[2] + lj4 = coeff[3] + return (r6inv * (lj3*r6inv - lj4)) diff --git a/lib/Install.py b/lib/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/README b/lib/README index 72ebb0a5f7..3c8f46dd0a 100644 --- a/lib/README +++ b/lib/README @@ -33,14 +33,16 @@ kokkos Kokkos package for GPU and many-core acceleration from Kokkos development team (Sandia) linalg set of BLAS and LAPACK routines needed by USER-ATC package from Axel Kohlmeyer (Temple U) -poems POEMS rigid-body integration package, POEMS package - from Rudranarayan Mukherjee (RPI) meam modified embedded atom method (MEAM) potential, MEAM package from Greg Wagner (Sandia) molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package from Axel Kohlmeyer (Temple U) and the VMD development team mscg hooks to the MSCG library, used by fix_mscg command from Jacob Wagner and Greg Voth group (U Chicago) +netcdf hooks to a NetCDF library installed on your system + from Lars Pastewka (Karlsruhe Institute of Technology) +poems POEMS rigid-body integration package, POEMS package + from Rudranarayan Mukherjee (RPI) python hooks to the system Python library, used by the PYTHON package from the LAMMPS development team qmmm quantum mechanics/molecular mechanics coupling interface diff --git a/lib/atc/Install.py b/lib/atc/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/atc/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/atc/README b/lib/atc/README index 106c303dd1..d3adfdafe4 100644 --- a/lib/atc/README +++ b/lib/atc/README @@ -15,6 +15,11 @@ links against when using the USER-ATC package. This library must be built with a C++ compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-atc" from the src directory to see help on how +to build this library via make commands, or you can do the same thing +by typing "python Install.py" from within this directory, or you can +do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: @@ -44,16 +49,16 @@ user-atc_SYSINC = leave blank for this package user-atc_SYSLIB = BLAS and LAPACK libraries needed by this package user-atc_SYSPATH = path(s) to where those libraries are -You have several choices for these settings: +You have 3 choices for these settings: -If the 2 libraries are already installed on your system, the settings -in Makefile.lammps.installed should work. +a) If the 2 libraries are already installed on your system, the +settings in Makefile.lammps.installed should work. -If they are not, you can install them yourself, and speficy the -appropriate settings accordingly. +b) If they are not, you can install them yourself, and specify the +appropriate settings accordingly in a Makefile.lammps.* file +and set the EXTRAMAKE setting in Makefile.* to that file. -If you want to use the minimalist version of these libraries provided -with LAMMPS in lib/linalg, then the settings in Makefile.lammps.linalg -should work. Note that in this case you also need to build the -linear-algebra in lib/linalg; see the lib/linalg/README for more -details. +c) Use the minimalist version of these libraries provided with LAMMPS +in lib/linalg, by using Makefile.lammps.linalg. In this case you also +need to build the library in lib/linalg; see the lib/linalg/README +file for more details. diff --git a/lib/awpmd/Install.py b/lib/awpmd/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/awpmd/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/awpmd/README b/lib/awpmd/README index 3c02480419..20e142f74c 100644 --- a/lib/awpmd/README +++ b/lib/awpmd/README @@ -19,6 +19,11 @@ links against when using the USER-AWPMD package. This library must be built with a C++ compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-awpmd" from the src directory to see help on +how to build this library via make commands, or you can do the same +thing by typing "python Install.py" from within this directory, or you +can do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: @@ -47,16 +52,16 @@ user-awpmd_SYSINC = leave blank for this package user-awpmd_SYSLIB = BLAS and LAPACK libraries needed by this package user-awpmd_SYSPATH = path(s) to where those libraries are -You have several choices for these settings: +You have 3 choices for these settings: -If the 2 libraries are already installed on your system, the settings -in Makefile.lammps.installed should work. +a) If the 2 libraries are already installed on your system, the +settings in Makefile.lammps.installed should work. -If they are not, you can install them yourself, and speficy the -appropriate settings accordingly. +b) If they are not, you can install them yourself, and specify the +appropriate settings accordingly in a Makefile.lammps.* file +and set the EXTRAMAKE setting in Makefile.* to that file. -If you want to use the minimalist version of these libraries provided -with LAMMPS in lib/linalg, then the settings in Makefile.lammps.linalg -should work. Note that in this case you also need to build the -linear-algebra in lib/linalg; see the lib/linalg/README for more -details. +c) Use the minimalist version of these libraries provided with LAMMPS +in lib/linalg, by using Makefile.lammps.linalg. In this case you also +need to build the library in lib/linalg; see the lib/linalg/README +file for more details. diff --git a/lib/colvars/Install.py b/lib/colvars/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/colvars/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/colvars/README b/lib/colvars/README index d6efc333a5..a5e5938b20 100644 --- a/lib/colvars/README +++ b/lib/colvars/README @@ -35,6 +35,11 @@ links against when using the USER-COLVARS package. This library must be built with a C++ compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-colvars" from the src directory to see help on +how to build this library via make commands, or you can do the same +thing by typing "python Install.py" from within this directory, or you +can do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: diff --git a/lib/gpu/Install.py b/lib/gpu/Install.py new file mode 100644 index 0000000000..d396be5e1a --- /dev/null +++ b/lib/gpu/Install.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +# Install.py tool to build the GPU library +# used to automate the steps described in the README file in this dir + +import sys,os,re,commands + +# help message + +help = """ +Syntax: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix + specify one or more options, order does not matter + copies an existing Makefile.isuffix in lib/gpu to Makefile.auto + optionally edits these variables in Makefile.auto: + CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE + optionally uses Makefile.auto to build the GPU library -> libgpu.a + and to copy a Makefile.lammps.esuffix -> Makefile.lammps + optionally copies Makefile.auto to a new Makefile.osuffix + + -i = use Makefile.isuffix as starting point, copy to Makefile.auto + default isuffix = linux + -h = set CUDA_HOME variable in Makefile.auto to hdir + hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda + -a = set CUDA_ARCH variable in Makefile.auto to arch + use arch = ?? for K40 (Tesla) + use arch = 37 for dual K80 (Tesla) + use arch = 60 for P100 (Pascal) + -p = set CUDA_PRECISION variable in Makefile.auto to precision + use precision = double or mixed or single + -e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix + -m = make the GPU library using Makefile.auto + first performs a "make clean" + produces libgpu.a if successful + also copies EXTRAMAKE file -> Makefile.lammps + -e can set which Makefile.lammps.esuffix file is copied + -o = copy final Makefile.auto to Makefile.osuffix +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +isuffix = "linux" +hflag = aflag = pflag = eflag = 0 +makeflag = 0 +outflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-i": + if iarg+2 > nargs: error() + isuffix = args[iarg+1] + iarg += 2 + elif args[iarg] == "-h": + if iarg+2 > nargs: error() + hflag = 1 + hdir = args[iarg+1] + iarg += 2 + elif args[iarg] == "-a": + if iarg+2 > nargs: error() + aflag = 1 + arch = args[iarg+1] + iarg += 2 + elif args[iarg] == "-p": + if iarg+2 > nargs: error() + pflag = 1 + precision = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + eflag = 1 + lmpsuffix = args[iarg+1] + iarg += 2 + elif args[iarg] == "-m": + makeflag = 1 + iarg += 1 + elif args[iarg] == "-o": + if iarg+2 > nargs: error() + outflag = 1 + osuffix = args[iarg+1] + iarg += 2 + else: error() + +if pflag: + if precision == "double": precstr = "-D_DOUBLE_DOUBLE" + elif precision == "mixed": precstr = "-D_SINGLE_DOUBLE" + elif precision == "single": precstr = "-D_SINGLE_SINGLE" + else: error("Invalid precision setting") + +# create Makefile.auto +# reset EXTRAMAKE, CUDA_HOME, CUDA_ARCH, CUDA_PRECISION if requested + +if not os.path.exists("Makefile.%s" % isuffix): + error("lib/gpu/Makefile.%s does not exist" % isuffix) + +lines = open("Makefile.%s" % isuffix,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) != 3: + print >>fp,line, + continue + + if hflag and words[0] == "CUDA_HOME" and words[1] == '=': + line = line.replace(words[2],hdir) + if aflag and words[0] == "CUDA_ARCH" and words[1] == '=': + line = line.replace(words[2],"-arch=sm_%s" % arch) + if pflag and words[0] == "CUDA_PRECISION" and words[1] == '=': + line = line.replace(words[2],precstr) + if eflag and words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix) + + print >>fp,line, + +fp.close() + +# perform make +# make operations copies EXTRAMAKE file to Makefile.lammps + +if makeflag: + print "Building libgpu.a ..." + cmd = "rm -f libgpu.a" + commands.getoutput(cmd) + cmd = "make -f Makefile.auto clean; make -f Makefile.auto" + commands.getoutput(cmd) + if not os.path.exists("libgpu.a"): + error("Build of lib/gpu/libgpu.a was NOT successful") + if not os.path.exists("Makefile.lammps"): + error("lib/gpu/Makefile.lammps was NOT created") + +# copy new Makefile.auto to Makefile.osuffix + +if outflag: + print "Creating new Makefile.%s" % osuffix + cmd = "cp Makefile.auto Makefile.%s" % osuffix + commands.getoutput(cmd) diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index e02849cfed..660544cfaa 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -43,8 +43,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \ $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \ $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \ $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \ - $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \ - $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \ + $(OBJ_DIR)/lal_lj_sdk.o $(OBJ_DIR)/lal_lj_sdk_ext.o \ + $(OBJ_DIR)/lal_lj_sdk_long.o $(OBJ_DIR)/lal_lj_sdk_long_ext.o \ $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \ $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \ $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \ @@ -98,8 +98,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \ $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long_cubin.h \ $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse_cubin.h \ $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long_cubin.h \ - $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm_cubin.h \ - $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long_cubin.h \ + $(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk_cubin.h \ + $(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long_cubin.h \ $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \ $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \ $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \ @@ -391,29 +391,29 @@ $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/cg_cmm.cubin: lal_cg_cmm.cu lal_precision.h lal_preprocessor.h - $(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm.cu +$(OBJ_DIR)/lj_sdk.cubin: lal_lj_sdk.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_sdk.cu -$(OBJ_DIR)/cg_cmm_cubin.h: $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm.cubin - $(BIN2C) -c -n cg_cmm $(OBJ_DIR)/cg_cmm.cubin > $(OBJ_DIR)/cg_cmm_cubin.h +$(OBJ_DIR)/lj_sdk_cubin.h: $(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk.cubin + $(BIN2C) -c -n lj_sdk $(OBJ_DIR)/lj_sdk.cubin > $(OBJ_DIR)/lj_sdk_cubin.h -$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cubin.h $(OBJ_DIR)/lal_base_atomic.o - $(CUDR) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk.cpp $(OBJ_DIR)/lj_sdk_cubin.h $(OBJ_DIR)/lal_base_atomic.o + $(CUDR) -o $@ -c lal_lj_sdk.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h - $(CUDR) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_ext.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_lj_sdk_ext.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/cg_cmm_long.cubin: lal_cg_cmm_long.cu lal_precision.h lal_preprocessor.h - $(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm_long.cu +$(OBJ_DIR)/lj_sdk_long.cubin: lal_lj_sdk_long.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_sdk_long.cu -$(OBJ_DIR)/cg_cmm_long_cubin.h: $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long.cubin - $(BIN2C) -c -n cg_cmm_long $(OBJ_DIR)/cg_cmm_long.cubin > $(OBJ_DIR)/cg_cmm_long_cubin.h +$(OBJ_DIR)/lj_sdk_long_cubin.h: $(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long.cubin + $(BIN2C) -c -n lj_sdk_long $(OBJ_DIR)/lj_sdk_long.cubin > $(OBJ_DIR)/lj_sdk_long_cubin.h -$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o - $(CUDR) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_long.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long.cpp $(OBJ_DIR)/lj_sdk_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o + $(CUDR) -o $@ -c lal_lj_sdk_long.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h - $(CUDR) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_long_ext.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long_ext.cpp lal_base_charge.h + $(CUDR) -o $@ -c lal_lj_sdk_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/eam.cubin: lal_eam.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_eam.cu diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 7ef1dfba0c..4a59595313 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -32,8 +32,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \ $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \ $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \ $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \ - $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \ - $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \ + $(OBJ_DIR)/lal_lj_sdk.o $(OBJ_DIR)/lal_lj_sdk_ext.o \ + $(OBJ_DIR)/lal_lj_sdk_long.o $(OBJ_DIR)/lal_lj_sdk_long_ext.o \ $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \ $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \ $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \ @@ -75,8 +75,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \ $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lj_dsf_cl.h \ $(OBJ_DIR)/lj_class2_long_cl.h \ $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/morse_cl.h \ - $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/cg_cmm_cl.h \ - $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \ + $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/lj_sdk_cl.h \ + $(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \ $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/buck_cl.h \ $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_long_cl.h \ $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/yukawa_cl.h \ @@ -273,23 +273,23 @@ $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DI $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/cg_cmm_cl.h: lal_cg_cmm.cu $(PRE1_H) - $(BSH) ./geryon/file_to_cstr.sh cg_cmm $(PRE1_H) lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cl.h; +$(OBJ_DIR)/lj_sdk_cl.h: lal_lj_sdk.cu $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh lj_sdk $(PRE1_H) lal_lj_sdk.cu $(OBJ_DIR)/lj_sdk_cl.h; -$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/lal_base_atomic.o - $(OCL) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk.cpp $(OBJ_DIR)/lj_sdk_cl.h $(OBJ_DIR)/lj_sdk_cl.h $(OBJ_DIR)/lal_base_atomic.o + $(OCL) -o $@ -c lal_lj_sdk.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h - $(OCL) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_ext.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk_ext.cpp lal_base_atomic.h + $(OCL) -o $@ -c lal_lj_sdk_ext.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/cg_cmm_long_cl.h: lal_cg_cmm_long.cu $(PRE1_H) - $(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(PRE1_H) lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cl.h; +$(OBJ_DIR)/lj_sdk_long_cl.h: lal_lj_sdk_long.cu $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh lj_sdk_long $(PRE1_H) lal_lj_sdk_long.cu $(OBJ_DIR)/lj_sdk_long_cl.h; -$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/lal_base_atomic.o - $(OCL) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_long.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long.cpp $(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/lal_base_atomic.o + $(OCL) -o $@ -c lal_lj_sdk_long.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h - $(OCL) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_lj_sdk_long_ext.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long_ext.cpp lal_base_charge.h + $(OCL) -o $@ -c lal_lj_sdk_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/eam_cl.h: lal_eam.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh eam $(PRE1_H) lal_eam.cu $(OBJ_DIR)/eam_cl.h; diff --git a/lib/gpu/README b/lib/gpu/README index 45c8ce49ba..b26897e885 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -17,6 +17,11 @@ links against when using the GPU package. This library must be built with a C++ compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-gpu" from the src directory to see help on how +to build this library via make commands, or you can do the same thing +by typing "python Install.py" from within this directory, or you can +do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: @@ -164,9 +169,9 @@ this directory). The gpu library supports 3 precision modes as determined by the CUDA_PRECISION variable: - CUDA_PREC = -D_SINGLE_SINGLE # Single precision for all calculations - CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations - CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double + CUDA_PRECISION = -D_SINGLE_SINGLE # Single precision for all calculations + CUDA_PRECISION = -D_DOUBLE_DOUBLE # Double precision for all calculations + CUDA_PRECISION = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1. You will get the error "GPU library not compiled for this accelerator." diff --git a/lib/gpu/lal_cg_cmm.cpp b/lib/gpu/lal_lj_sdk.cpp similarity index 85% rename from lib/gpu/lal_cg_cmm.cpp rename to lib/gpu/lal_lj_sdk.cpp index d361e32b09..618555e38a 100644 --- a/lib/gpu/lal_cg_cmm.cpp +++ b/lib/gpu/lal_lj_sdk.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm.cpp + lj_sdk.cpp ------------------- W. Michael Brown (ORNL) @@ -14,14 +14,14 @@ ***************************************************************************/ #if defined(USE_OPENCL) -#include "cg_cmm_cl.h" +#include "lj_sdk_cl.h" #elif defined(USE_CUDART) -const char *cg_cmm=0; +const char *lj_sdk=0; #else -#include "cg_cmm_cubin.h" +#include "lj_sdk_cubin.h" #endif -#include "lal_cg_cmm.h" +#include "lal_lj_sdk.h" #include using namespace LAMMPS_AL; #define CGCMMT CGCMM @@ -53,33 +53,33 @@ int CGCMMT::init(const int ntypes, double **host_cutsq, const double gpu_split, FILE *_screen) { int success; success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,cg_cmm,"k_cg_cmm"); + _screen,lj_sdk,"k_lj_sdk"); if (success!=0) return success; // If atom type constants fit in shared memory use fast kernel - int cmm_types=ntypes; + int sdk_types=ntypes; shared_types=false; int max_shared_types=this->device->max_shared_types(); - if (cmm_types<=max_shared_types && this->_block_size>=max_shared_types) { - cmm_types=max_shared_types; + if (sdk_types<=max_shared_types && this->_block_size>=max_shared_types) { + sdk_types=max_shared_types; shared_types=true; } - _cmm_types=cmm_types; + _sdk_types=sdk_types; // Allocate a host write buffer for data initialization - UCL_H_Vec host_write(cmm_types*cmm_types*32,*(this->ucl_device), + UCL_H_Vec host_write(sdk_types*sdk_types*32,*(this->ucl_device), UCL_WRITE_ONLY); - for (int i=0; iucl_device),UCL_READ_ONLY); - this->atom->type_pack4(ntypes,cmm_types,lj1,host_write,host_cutsq, + lj1.alloc(sdk_types*sdk_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,sdk_types,lj1,host_write,host_cutsq, host_cg_type,host_lj1,host_lj2); - lj3.alloc(cmm_types*cmm_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack4(ntypes,cmm_types,lj3,host_write,host_lj3,host_lj4, + lj3.alloc(sdk_types*sdk_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,sdk_types,lj3,host_write,host_lj3,host_lj4, host_offset); UCL_H_Vec dview; @@ -143,7 +143,7 @@ void CGCMMT::loop(const bool _eflag, const bool _vflag) { } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, &lj1, &lj3, - &_cmm_types, &sp_lj, &this->nbor->dev_nbor, + &_sdk_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); diff --git a/lib/gpu/lal_cg_cmm.cu b/lib/gpu/lal_lj_sdk.cu similarity index 97% rename from lib/gpu/lal_cg_cmm.cu rename to lib/gpu/lal_lj_sdk.cu index 70d2ab6092..01b2cdd18d 100644 --- a/lib/gpu/lal_cg_cmm.cu +++ b/lib/gpu/lal_lj_sdk.cu @@ -1,5 +1,5 @@ // ************************************************************************** -// cg_cmm.cu +// lj_sdk.cu // ------------------- // W. Michael Brown (ORNL) // @@ -24,7 +24,7 @@ texture pos_tex; #define pos_tex x_ #endif -__kernel void k_cg_cmm(const __global numtyp4 *restrict x_, +__kernel void k_lj_sdk(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj3, const int lj_types, @@ -116,7 +116,7 @@ __kernel void k_cg_cmm(const __global numtyp4 *restrict x_, } // if ii } -__kernel void k_cg_cmm_fast(const __global numtyp4 *restrict x_, +__kernel void k_lj_sdk_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1_in, const __global numtyp4 *restrict lj3_in, const __global numtyp *restrict sp_lj_in, diff --git a/lib/gpu/lal_cg_cmm.h b/lib/gpu/lal_lj_sdk.h similarity index 97% rename from lib/gpu/lal_cg_cmm.h rename to lib/gpu/lal_lj_sdk.h index b7895b5898..ac2b9aafe3 100644 --- a/lib/gpu/lal_cg_cmm.h +++ b/lib/gpu/lal_lj_sdk.h @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm.h + lj_sdk.h ------------------- W. Michael Brown (ORNL) @@ -67,7 +67,7 @@ class CGCMM : public BaseAtomic { bool shared_types; /// Number of atom types - int _cmm_types; + int _sdk_types; private: bool _allocated; diff --git a/lib/gpu/lal_cg_cmm_ext.cpp b/lib/gpu/lal_lj_sdk_ext.cpp similarity index 93% rename from lib/gpu/lal_cg_cmm_ext.cpp rename to lib/gpu/lal_lj_sdk_ext.cpp index b6fc110b15..386106161e 100644 --- a/lib/gpu/lal_cg_cmm_ext.cpp +++ b/lib/gpu/lal_lj_sdk_ext.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm.h + lj_sdk.h ------------------- W. Michael Brown (ORNL) @@ -17,7 +17,7 @@ #include #include -#include "lal_cg_cmm.h" +#include "lal_lj_sdk.h" using namespace std; using namespace LAMMPS_AL; @@ -27,7 +27,7 @@ static CGCMM CMMMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, +int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, const int inum, const int nall, const int max_nbors, @@ -89,11 +89,11 @@ int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, return init_ok; } -void cmm_gpu_clear() { +void sdk_gpu_clear() { CMMMF.clear(); } -int** cmm_gpu_compute_n(const int ago, const int inum_full, +int** sdk_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, @@ -105,7 +105,7 @@ int** cmm_gpu_compute_n(const int ago, const int inum_full, vatom, host_start, ilist, jnum, cpu_time, success); } -void cmm_gpu_compute(const int ago, const int inum_full, const int nall, +void sdk_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -114,7 +114,7 @@ void cmm_gpu_compute(const int ago, const int inum_full, const int nall, firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success); } -double cmm_gpu_bytes() { +double sdk_gpu_bytes() { return CMMMF.host_memory_usage(); } diff --git a/lib/gpu/lal_cg_cmm_long.cpp b/lib/gpu/lal_lj_sdk_long.cpp similarity index 96% rename from lib/gpu/lal_cg_cmm_long.cpp rename to lib/gpu/lal_lj_sdk_long.cpp index 14b5b7622c..46caf6bd36 100644 --- a/lib/gpu/lal_cg_cmm_long.cpp +++ b/lib/gpu/lal_lj_sdk_long.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm_long.cpp + lj_sdk_long.cpp ------------------- W. Michael Brown (ORNL) @@ -14,14 +14,14 @@ ***************************************************************************/ #if defined(USE_OPENCL) -#include "cg_cmm_long_cl.h" +#include "lj_sdk_long_cl.h" #elif defined(USE_CUDART) -const char *cg_cmm_long=0; +const char *lj_sdk_long=0; #else -#include "cg_cmm_long_cubin.h" +#include "lj_sdk_long_cubin.h" #endif -#include "lal_cg_cmm_long.h" +#include "lal_lj_sdk_long.h" #include using namespace LAMMPS_AL; #define CGCMMLongT CGCMMLong @@ -58,7 +58,7 @@ int CGCMMLongT::init(const int ntypes, double **host_cutsq, const double g_ewald) { int success; success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,cg_cmm_long,"k_cg_cmm_long"); + _screen,lj_sdk_long,"k_lj_sdk_long"); if (success!=0) return success; diff --git a/lib/gpu/lal_cg_cmm_long.cu b/lib/gpu/lal_lj_sdk_long.cu similarity index 98% rename from lib/gpu/lal_cg_cmm_long.cu rename to lib/gpu/lal_lj_sdk_long.cu index f6942d1809..5ff64b2254 100644 --- a/lib/gpu/lal_cg_cmm_long.cu +++ b/lib/gpu/lal_lj_sdk_long.cu @@ -1,5 +1,5 @@ // ************************************************************************** -// cg_cmm_long.cu +// lj_sdk_long.cu // ------------------- // W. Michael Brown (ORNL) // @@ -29,7 +29,7 @@ texture q_tex; #define q_tex q_ #endif -__kernel void k_cg_cmm_long(const __global numtyp4 *restrict x_, +__kernel void k_lj_sdk_long(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj3, const int lj_types, @@ -154,7 +154,7 @@ __kernel void k_cg_cmm_long(const __global numtyp4 *restrict x_, } // if ii } -__kernel void k_cg_cmm_long_fast(const __global numtyp4 *restrict x_, +__kernel void k_lj_sdk_long_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1_in, const __global numtyp4 *restrict lj3_in, const __global numtyp *restrict sp_lj_in, diff --git a/lib/gpu/lal_cg_cmm_long.h b/lib/gpu/lal_lj_sdk_long.h similarity index 98% rename from lib/gpu/lal_cg_cmm_long.h rename to lib/gpu/lal_lj_sdk_long.h index aa0cbfbaf0..f56687cd7d 100644 --- a/lib/gpu/lal_cg_cmm_long.h +++ b/lib/gpu/lal_lj_sdk_long.h @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm_long.h + lj_sdk_long.h ------------------- W. Michael Brown (ORNL) diff --git a/lib/gpu/lal_cg_cmm_long_ext.cpp b/lib/gpu/lal_lj_sdk_long_ext.cpp similarity index 93% rename from lib/gpu/lal_cg_cmm_long_ext.cpp rename to lib/gpu/lal_lj_sdk_long_ext.cpp index ee0a0269e5..08390d3eeb 100644 --- a/lib/gpu/lal_cg_cmm_long_ext.cpp +++ b/lib/gpu/lal_lj_sdk_long_ext.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - cg_cmm_long.h + lj_sdk_long.h ------------------- W. Michael Brown (ORNL) @@ -17,7 +17,7 @@ #include #include -#include "lal_cg_cmm_long.h" +#include "lal_lj_sdk_long.h" using namespace std; using namespace LAMMPS_AL; @@ -27,7 +27,7 @@ static CGCMMLong CMMLMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, +int sdkl_gpu_init(const int ntypes, double **cutsq, int **cg_type, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, const int inum, const int nall, const int max_nbors, @@ -93,11 +93,11 @@ int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, return init_ok; } -void cmml_gpu_clear() { +void sdkl_gpu_clear() { CMMLMF.clear(); } -int** cmml_gpu_compute_n(const int ago, const int inum_full, +int** sdkl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, @@ -111,7 +111,7 @@ int** cmml_gpu_compute_n(const int ago, const int inum_full, host_q,boxlo,prd); } -void cmml_gpu_compute(const int ago, const int inum_full, const int nall, +void sdkl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -122,7 +122,7 @@ void cmml_gpu_compute(const int ago, const int inum_full, const int nall, host_q,nlocal,boxlo,prd); } -double cmml_gpu_bytes() { +double sdkl_gpu_bytes() { return CMMLMF.host_memory_usage(); } diff --git a/lib/h5md/Install.py b/lib/h5md/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/h5md/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/h5md/Makefile b/lib/h5md/Makefile.h5cc similarity index 95% rename from lib/h5md/Makefile rename to lib/h5md/Makefile.h5cc index 085d21ff69..bd3e8a9784 100644 --- a/lib/h5md/Makefile +++ b/lib/h5md/Makefile.h5cc @@ -19,7 +19,7 @@ build/ch5md.o: src/ch5md.c | build $(CC) $(INC) $(CFLAGS) -c $< -o $@ Makefile.lammps: - cp Makefile.lammps.empty $@ + cp $(EXTRAMAKE) $@ .PHONY: all lib clean diff --git a/lib/h5md/README b/lib/h5md/README index 62a4979cba..fb7d82bfcc 100644 --- a/lib/h5md/README +++ b/lib/h5md/README @@ -3,6 +3,11 @@ LAMMPS under its own BSD license; see below. This library is used when the USER-H5MD package is included in a LAMMPS build and the dump h5md command is invoked in a LAMMPS input script. +You can type "make lib-h5md" from the src directory to see help on how +to build this library via make commands, or you can do the same thing +by typing "python Install.py" from within this directory, or you can +do it manually by following the instructions below. + --------------------- ch5md : Read and write H5MD files in C @@ -17,8 +22,14 @@ molecular data, whose development is found at . ch5md is developped by Pierre de Buyl and is released under the 3-clause BSD license that can be found in the file LICENSE. -To use the h5md dump style in lammps, execute make in this directory then 'make -yes-user-h5md' in the src directory of lammps. Rebuild lammps. +To use the h5md dump style in lammps, execute +make -f Makefile.h5cc +in this directory then +make yes-user-h5md +in the src directory of LAMMPS to rebuild LAMMPS. + +Note that you must have the h5cc compiler installed to use +Makefile.h5cc. It should be part If HDF5 is not in a standard system location, edit Makefile.lammps accordingly. diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 4a96e24418..c6fe991b97 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,28 @@ # Change Log +## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00) + +**Implemented enhancements:** + +- UnorderedMap: make it accept Devices or MemorySpaces [\#711](https://github.com/kokkos/kokkos/issues/711) +- sort to accept DynamicView and \[begin,end\) indices [\#691](https://github.com/kokkos/kokkos/issues/691) +- ENABLE Macros should only be used via \#ifdef or \#if defined [\#675](https://github.com/kokkos/kokkos/issues/675) +- Remove impl/Kokkos\_Synchronic\_\* [\#666](https://github.com/kokkos/kokkos/issues/666) +- Turning off IVDEP for Intel 14. [\#638](https://github.com/kokkos/kokkos/issues/638) +- Using an installed Kokkos in a target application using CMake [\#633](https://github.com/kokkos/kokkos/issues/633) +- Create Kokkos Bill of Materials [\#632](https://github.com/kokkos/kokkos/issues/632) +- MDRangePolicy and tagged evaluators [\#547](https://github.com/kokkos/kokkos/issues/547) +- Add PGI support [\#289](https://github.com/kokkos/kokkos/issues/289) + +**Fixed bugs:** + +- Output from PerTeam fails [\#733](https://github.com/kokkos/kokkos/issues/733) +- Cuda: architecture flag not added to link line [\#688](https://github.com/kokkos/kokkos/issues/688) +- Getting large chunks of memory for a thread team in a universal way [\#664](https://github.com/kokkos/kokkos/issues/664) +- Kokkos RNG normal\(\) function hangs for small seed value [\#655](https://github.com/kokkos/kokkos/issues/655) +- Kokkos Tests Errors on Shepard/HSW Builds [\#644](https://github.com/kokkos/kokkos/issues/644) + ## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 16854c839a..1c820660ae 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -98,10 +98,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_QTHREAD - KOKKOS_HAVE_QTHREAD - "Enable QTHREAD support in Kokkos." - "${TPL_ENABLE_QTHREAD}" + Kokkos_ENABLE_Qthreads + KOKKOS_HAVE_QTHREADS + "Enable Qthreads support in Kokkos." + "${TPL_ENABLE_QTHREADS}" ) TRIBITS_ADD_OPTION_AND_DEFINE( @@ -110,7 +110,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "Enable C++11 support in Kokkos." "${${PROJECT_NAME}_ENABLE_CXX11}" ) - + TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_HWLOC KOKKOS_HAVE_HWLOC @@ -213,4 +213,3 @@ TRIBITS_EXCLUDE_FILES( ) TRIBITS_PACKAGE_POSTPROCESS() - diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 9d00c19027..5b094dba8c 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -1,39 +1,38 @@ -# Default settings common options +# Default settings common options. #LAMMPS specific settings: KOKKOS_PATH=../../lib/kokkos CXXFLAGS=$(CCFLAGS) -#Options: OpenMP,Serial,Pthreads,Cuda +# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Pthreads" -#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX +# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX KOKKOS_ARCH ?= "" -#Options: yes,no +# Options: yes,no KOKKOS_DEBUG ?= "no" -#Options: hwloc,librt,experimental_memkind +# Options: hwloc,librt,experimental_memkind KOKKOS_USE_TPLS ?= "" -#Options: c++11,c++1z +# Options: c++11,c++1z KOKKOS_CXX_STANDARD ?= "c++11" -#Options: aggressive_vectorization,disable_profiling +# Options: aggressive_vectorization,disable_profiling KOKKOS_OPTIONS ?= "" -#Default settings specific options -#Options: force_uvm,use_ldg,rdc,enable_lambda +# Default settings specific options. +# Options: force_uvm,use_ldg,rdc,enable_lambda KOKKOS_CUDA_OPTIONS ?= "enable_lambda" -# Check for general settings - +# Check for general settings. KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l)) -# Check for external libraries +# Check for external libraries. KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) -# Check for advanced settings +# Check for advanced settings. KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) @@ -41,21 +40,21 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) -# Check for Kokkos Host Execution Spaces one of which must be on - +# Check for Kokkos Host Execution Spaces one of which must be on. KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) +KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) -KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 +endif endif endif -# Check for other Execution Spaces - +# Check for other Execution Spaces. KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -64,27 +63,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) endif -# Check OS - +# Check OS. KOKKOS_OS := $(shell uname -s) KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l) KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l) KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l) -# Check compiler - -KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) -KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) -KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) -KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) -KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) +# Check compiler. +KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) +KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) +KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) +KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) +KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) ifneq ($(OMPI_CXX),) KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l) endif ifneq ($(MPICH_CXX),) KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l) endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) +KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -95,17 +92,17 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) - $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) + $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) endif KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 endif endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -mp + KOKKOS_INTERNAL_OPENMP_FLAG := -mp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp @@ -114,7 +111,7 @@ else KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment + # OpenMP is turned on by default in Cray compiler environment. KOKKOS_INTERNAL_OPENMP_FLAG := else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp @@ -138,9 +135,9 @@ else endif endif -# Check for Kokkos Architecture settings +# Check for Kokkos Architecture settings. -#Intel based +# Intel based. KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) @@ -148,8 +145,8 @@ KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) -#NVIDIA based -NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +# NVIDIA based. +NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) @@ -170,46 +167,46 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) + KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) + KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) + KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) endif -#ARM based +# ARM based. KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) -#IBM based +# IBM based. KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) -#AMD based +# AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) -#Any AVX? +# Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -# Decide what ISA level we are able to support -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) +# Decide what ISA level we are able to support. +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) +KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) -#Incompatible flags? +# Incompatible flags? KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) @@ -220,7 +217,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) endif -#Generating the list of Flags +# Generating the list of Flags. KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src @@ -233,98 +230,96 @@ KOKKOS_CXXFLAGS = KOKKOS_LIBS = -lkokkos -ldl KOKKOS_LDFLAGS = -L$(shell pwd) -KOKKOS_SRC = +KOKKOS_SRC = KOKKOS_HEADERS = -#Generating the KokkosCore_config.h file +# Generating the KokkosCore_config.h file. tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) tmp := $(shell date >> KokkosCore_config.tmp) tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) - tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) + tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include - KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib - tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo + KOKKOS_CXXFLAGS += -lineinfo endif - KOKKOS_CXXFLAGS += -g - KOKKOS_LDFLAGS += -g -ldl - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -g + KOKKOS_LDFLAGS += -g -ldl + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_LIBS += -lhwloc - tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) - tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) - KOKKOS_LIBS += -lrt + KOKKOS_LIBS += -lrt endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_LIBS += -lmemkind + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) endif @@ -341,262 +336,286 @@ endif tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda else $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) endif endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) endif endif + endif -#Add Architecture flags +# Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a - KOKKOS_LDFLAGS += -march=armv8-a - endif + KOKKOS_CXXFLAGS += -march=armv8-a + KOKKOS_LDFLAGS += -march=armv8-a endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8.1-a - KOKKOS_LDFLAGS += -march=armv8.1-a - endif + KOKKOS_CXXFLAGS += -march=armv8.1-a + KOKKOS_LDFLAGS += -march=armv8.1-a endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx - KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx - endif + KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx + KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=sandybridge - KOKKOS_LDFLAGS += -tp=sandybridge - else - # Assume that this is a really a GNU compiler - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - endif - endif - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=sandybridge + KOKKOS_LDFLAGS += -tp=sandybridge + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - else - # Assume that this is a really a GNU compiler or it could be XL on P8 - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler or it could be XL on P8. + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) - else - # Assume that this is a really a GNU compiler or it could be XL on P9 - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler or it could be XL on P9. + KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 + KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=haswell - KOKKOS_LDFLAGS += -tp=haswell - else - # Assume that this is a really a GNU compiler - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 - endif - endif - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=haswell + KOKKOS_LDFLAGS += -tp=haswell + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xMIC-AVX512 - KOKKOS_LDFLAGS += -xMIC-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xMIC-AVX512 + KOKKOS_LDFLAGS += -xMIC-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Asssume that this is really a GNU compiler - KOKKOS_CXXFLAGS += -march=knl - KOKKOS_LDFLAGS += -march=knl - endif - endif - endif + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Asssume that this is really a GNU compiler. + KOKKOS_CXXFLAGS += -march=knl + KOKKOS_LDFLAGS += -march=knl + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX512 + KOKKOS_LDFLAGS += -xCORE-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Nothing here yet - KOKKOS_CXXFLAGS += -march=skylake-avx512 - KOKKOS_LDFLAGS += -march=skylake-avx512 - endif - endif - endif + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Nothing here yet. + KOKKOS_CXXFLAGS += -march=skylake-avx512 + KOKKOS_LDFLAGS += -march=skylake-avx512 + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -mmic - KOKKOS_LDFLAGS += -mmic + tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mmic + KOKKOS_LDFLAGS += -mmic endif -#Figure out the architecture flag for Cuda +# Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch + KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch + KOKKOS_CXXFLAGS += -x cuda endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 endif + endif - + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) -KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) + KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) else -KOKKOS_INTERNAL_NEW_CONFIG := 1 + KOKKOS_INTERNAL_NEW_CONFIG := 1 endif ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) - tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) + tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) @@ -609,53 +628,57 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_LIBS += -lcudart -lcuda -endif - -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_LIBS += -lpthread - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_LIBS += -lqthread - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBS += -lcudart -lcuda endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) - endif - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + endif + + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif -#Explicitly set the GCC Toolchain for Clang +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) + KOKKOS_LIBS += -lpthread +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) + KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_LIBS += -lqthread +endif + +# Explicitly set the GCC Toolchain for Clang. ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) - KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC - KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) + KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) + KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) + KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC + KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) endif -#With Cygwin functions such as fdopen and fileno are not defined -#when strict ansi is enabled. strict ansi gets enabled with --std=c++11 -#though. So we hard undefine it here. Not sure if that has any bad side effects -#This is needed for gtest actually, not for Kokkos itself! +# With Cygwin functions such as fdopen and fileno are not defined +# when strict ansi is enabled. strict ansi gets enabled with --std=c++11 +# though. So we hard undefine it here. Not sure if that has any bad side effects +# This is needed for gtest actually, not for Kokkos itself! ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ endif -# Setting up dependencies +# Setting up dependencies. KokkosCore_config.h: diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index a48a5f6eb7..54cacb741b 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -18,6 +18,8 @@ Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp +Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -43,11 +45,11 @@ Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokk $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) -Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp -Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) +Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp +Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -59,4 +61,3 @@ endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp - diff --git a/lib/kokkos/README b/lib/kokkos/README index 7ebde23a1f..257a2e5db4 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -45,31 +45,39 @@ Primary tested compilers on X86 are: GCC 4.8.4 GCC 4.9.2 GCC 5.1.0 + GCC 5.2.0 Intel 14.0.4 Intel 15.0.2 Intel 16.0.1 Intel 17.0.098 + Intel 17.1.132 Clang 3.5.2 Clang 3.6.1 + Clang 3.7.1 + Clang 3.8.1 Clang 3.9.0 + PGI 17.1 Primary tested compilers on Power 8 are: GCC 5.4.0 (OpenMP,Serial) IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) Primary tested compilers on Intel KNL are: + GCC 6.2.0 Intel 16.2.181 (with gcc 4.7.2) Intel 17.0.098 (with gcc 4.7.2) + Intel 17.1.132 (with gcc 4.9.3) + Intel 17.2.174 (with gcc 4.9.3) + Intel 18.0.061 (beta) (with gcc 4.9.3) Secondary tested compilers are: - CUDA 7.0 (with gcc 4.7.2) - CUDA 7.5 (with gcc 4.7.2) + CUDA 7.0 (with gcc 4.8.4) + CUDA 7.5 (with gcc 4.8.4) CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8) CUDA/Clang 8.0 using Clang/Trunk compiler Other compilers working: X86: - PGI 15.4 Cygwin 2.1.0 64bit with gcc 4.9.3 Known non-working combinations: diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake index 1d71d8af34..c36b62523f 100644 --- a/lib/kokkos/algorithms/cmake/Dependencies.cmake +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_REQUIRED_PACKAGES KokkosCore + LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers LIB_OPTIONAL_TPLS Pthread CUDA HWLOC TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index a0d666183c..4b58d9d7c3 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -547,7 +547,7 @@ namespace Kokkos { KOKKOS_INLINE_FUNCTION Random_XorShift64 (uint64_t state, int state_idx = 0) - : state_(state),state_idx_(state_idx){} + : state_(state==0?uint64_t(1318319):state),state_idx_(state_idx){} KOKKOS_INLINE_FUNCTION uint32_t urand() { @@ -719,6 +719,9 @@ namespace Kokkos { } void init(uint64_t seed, int num_states) { + if(seed==0) + seed = uint64_t(1318319); + num_states_ = num_states; locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); @@ -974,8 +977,9 @@ namespace Kokkos { inline void init(uint64_t seed, int num_states) { + if(seed==0) + seed = uint64_t(1318319); num_states_ = num_states; - locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 5b8c65fee1..237de751fe 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -53,69 +53,122 @@ namespace Kokkos { namespace Impl { - template + template< class DstViewType , class SrcViewType + , int Rank = DstViewType::Rank > struct CopyOp; - template - struct CopyOp { - template + template< class DstViewType , class SrcViewType > + struct CopyOp { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { + static void copy(DstViewType const& dst, size_t i_dst, + SrcViewType const& src, size_t i_src ) { dst(i_dst) = src(i_src); } }; - template - struct CopyOp { - template + template< class DstViewType , class SrcViewType > + struct CopyOp { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { - for(int j = 0;j< (int) dst.dimension_1(); j++) + static void copy(DstViewType const& dst, size_t i_dst, + SrcViewType const& src, size_t i_src ) { + for(int j = 0;j< (int) dst.extent(1); j++) dst(i_dst,j) = src(i_src,j); } }; - template - struct CopyOp { - template + template< class DstViewType , class SrcViewType > + struct CopyOp { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { - for(int j = 0; j +//---------------------------------------------------------------------------- + +template< class KeyViewType + , class BinSortOp + , class Space = typename KeyViewType::device_type + , class SizeType = typename KeyViewType::memory_space::size_type + > class BinSort { - - public: - template - struct bin_sort_sort_functor { - typedef ExecutionSpace execution_space; - typedef typename ValuesViewType::non_const_type values_view_type; - typedef typename ValuesViewType::const_type const_values_view_type; - Kokkos::View > values; - values_view_type sorted_values; - typename PermuteViewType::const_type sort_order; - bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_): - values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {} + + template< class DstViewType , class SrcViewType > + struct copy_functor { + + typedef typename SrcViewType::const_type src_view_type ; + + typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + + DstViewType dst_values ; + src_view_type src_values ; + int dst_offset ; + + copy_functor( DstViewType const & dst_values_ + , int const & dst_offset_ + , SrcViewType const & src_values_ + ) + : dst_values( dst_values_ ) + , src_values( src_values_ ) + , dst_offset( dst_offset_ ) + {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - //printf("Sort: %i %i\n",i,sort_order(i)); - CopyOp::copy(sorted_values,i,values,sort_order(i)); + void operator() (const int& i) const { + // printf("copy: dst(%i) src(%i)\n",i+dst_offset,i); + copy_op::copy(dst_values,i+dst_offset,src_values,i); } }; - typedef ExecutionSpace execution_space; + template< class DstViewType + , class PermuteViewType + , class SrcViewType + > + struct copy_permute_functor { + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + typedef typename std::conditional + < Kokkos::is_view< SrcViewType >::value + , Kokkos::View< typename SrcViewType::const_data_type + , typename SrcViewType::array_layout + , typename SrcViewType::device_type + , Kokkos::MemoryTraits + > + , typename SrcViewType::const_type + >::type src_view_type ; + + typedef typename PermuteViewType::const_type perm_view_type ; + + typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + + DstViewType dst_values ; + perm_view_type sort_order ; + src_view_type src_values ; + + copy_permute_functor( DstViewType const & dst_values_ + , PermuteViewType const & sort_order_ + , SrcViewType const & src_values_ + ) + : dst_values( dst_values_ ) + , sort_order( sort_order_ ) + , src_values( src_values_ ) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + // printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i)); + copy_op::copy(dst_values,i,src_values,sort_order(i)); + } + }; + + typedef typename Space::execution_space execution_space; typedef BinSortOp bin_op_type; struct bin_count_tag {}; @@ -124,84 +177,137 @@ public: struct bin_sort_bins_tag {}; public: + typedef SizeType size_type; typedef size_type value_type; - typedef Kokkos::View offset_type; - typedef Kokkos::View bin_count_type; + typedef Kokkos::View offset_type; + typedef Kokkos::View bin_count_type; + typedef typename KeyViewType::const_type const_key_view_type ; - typedef Kokkos::View const_key_view_type; - typedef Kokkos::View > const_rnd_key_view_type; + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + typedef typename std::conditional + < Kokkos::is_view< KeyViewType >::value + , Kokkos::View< typename KeyViewType::const_data_type, + typename KeyViewType::array_layout, + typename KeyViewType::device_type, + Kokkos::MemoryTraits > + , const_key_view_type + >::type const_rnd_key_view_type; typedef typename KeyViewType::non_const_value_type non_const_key_scalar; typedef typename KeyViewType::const_value_type const_key_scalar; + typedef Kokkos::View > bin_count_atomic_type ; + private: + const_key_view_type keys; const_rnd_key_view_type keys_rnd; public: - BinSortOp bin_op; - offset_type bin_offsets; + BinSortOp bin_op ; + offset_type bin_offsets ; + bin_count_atomic_type bin_count_atomic ; + bin_count_type bin_count_const ; + offset_type sort_order ; - Kokkos::View > bin_count_atomic; - bin_count_type bin_count_const; - - offset_type sort_order; - - bool sort_within_bins; + int range_begin ; + int range_end ; + bool sort_within_bins ; public: - // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - :keys(keys_),keys_rnd(keys_), bin_op(bin_op_) { + BinSort() {} - bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) + BinSort( const_key_view_type keys_ + , int range_begin_ + , int range_end_ + , BinSortOp bin_op_ + , bool sort_within_bins_ = false + ) + : keys(keys_) + , keys_rnd(keys_) + , bin_op(bin_op_) + , bin_offsets() + , bin_count_atomic() + , bin_count_const() + , sort_order() + , range_begin( range_begin_ ) + , range_end( range_end_ ) + , sort_within_bins( sort_within_bins_ ) + { + bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); bin_count_const = bin_count_atomic; bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); - sort_order = offset_type("PermutationVector",keys.dimension_0()); - sort_within_bins = sort_within_bins_; + sort_order = offset_type("PermutationVector",range_end-range_begin); } + BinSort( const_key_view_type keys_ + , BinSortOp bin_op_ + , bool sort_within_bins_ = false + ) + : BinSort( keys_ , 0 , keys_.extent(0), bin_op_ , sort_within_bins_ ) {} + + //---------------------------------------- // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed void create_permute_vector() { - Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); - Kokkos::parallel_scan(Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); + const size_t len = range_end - range_begin ; + Kokkos::parallel_for (Kokkos::RangePolicy (0,len),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); Kokkos::deep_copy(bin_count_atomic,0); - Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); + Kokkos::parallel_for (Kokkos::RangePolicy (0,len),*this); if(sort_within_bins) - Kokkos::parallel_for (Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + Kokkos::parallel_for (Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); } // Sort a view with respect ot the first dimension using the permutation array template - void sort(ValuesViewType values) { - ValuesViewType sorted_values = ValuesViewType("Copy", - values.dimension_0(), - values.dimension_1(), - values.dimension_2(), - values.dimension_3(), - values.dimension_4(), - values.dimension_5(), - values.dimension_6(), - values.dimension_7()); + void sort( ValuesViewType const & values) + { + typedef + Kokkos::View< typename ValuesViewType::data_type, + typename ValuesViewType::array_layout, + typename ValuesViewType::device_type > + scratch_view_type ; - parallel_for(values.dimension_0(), - bin_sort_sort_functor >(values,sorted_values,sort_order)); + const size_t len = range_end - range_begin ; - deep_copy(values,sorted_values); + scratch_view_type + sorted_values("Scratch", + len, + values.extent(1), + values.extent(2), + values.extent(3), + values.extent(4), + values.extent(5), + values.extent(6), + values.extent(7)); + + { + copy_permute_functor< scratch_view_type /* DstViewType */ + , offset_type /* PermuteViewType */ + , ValuesViewType /* SrcViewType */ + > + functor( sorted_values , sort_order , values ); + + parallel_for( Kokkos::RangePolicy(0,len),functor); + } + + { + copy_functor< ValuesViewType , scratch_view_type > + functor( values , range_begin , sorted_values ); + + parallel_for( Kokkos::RangePolicy(0,len),functor); + } } // Get the permutation vector @@ -217,9 +323,11 @@ public: bin_count_type get_bin_count() const {return bin_count_const;} public: + KOKKOS_INLINE_FUNCTION void operator() (const bin_count_tag& tag, const int& i) const { - bin_count_atomic(bin_op.bin(keys,i))++; + const int j = range_begin + i ; + bin_count_atomic(bin_op.bin(keys,j))++; } KOKKOS_INLINE_FUNCTION @@ -232,10 +340,11 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const bin_binning_tag& tag, const int& i) const { - const int bin = bin_op.bin(keys,i); + const int j = range_begin + i ; + const int bin = bin_op.bin(keys,j); const int count = bin_count_atomic(bin)++; - sort_order(bin_offsets(bin) + count) = i; + sort_order(bin_offsets(bin) + count) = j ; } KOKKOS_INLINE_FUNCTION @@ -262,13 +371,19 @@ public: } }; +//---------------------------------------------------------------------------- + template struct BinOp1D { - const int max_bins_; - const double mul_; + int max_bins_; + double mul_; typename KeyViewType::const_value_type range_; typename KeyViewType::const_value_type min_; + BinOp1D():max_bins_(0),mul_(0.0), + range_(typename KeyViewType::const_value_type()), + min_(typename KeyViewType::const_value_type()) {} + //Construct BinOp with number of bins, minimum value and maxuimum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max ) @@ -302,12 +417,14 @@ struct BinOp3D { typename KeyViewType::non_const_value_type range_[3]; typename KeyViewType::non_const_value_type min_[3]; + BinOp3D() {} + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[] ) { - max_bins_[0] = max_bins__[0]+1; - max_bins_[1] = max_bins__[1]+1; - max_bins_[2] = max_bins__[2]+1; + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); @@ -364,7 +481,7 @@ bool try_std_sort(ViewType view) { possible = possible && (ViewType::Rank == 1); possible = possible && (stride[0] == 1); if(possible) { - std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0()); + std::sort(view.data(),view.data()+view.extent(0)); } return possible; } @@ -386,7 +503,8 @@ struct min_max_functor { } template -void sort(ViewType view, bool always_use_kokkos_sort = false) { +void sort( ViewType const & view , bool const always_use_kokkos_sort = false) +{ if(!always_use_kokkos_sort) { if(Impl::try_std_sort(view)) return; } @@ -394,14 +512,37 @@ void sort(ViewType view, bool always_use_kokkos_sort = false) { Kokkos::Experimental::MinMaxScalar result; Kokkos::Experimental::MinMax reducer(result); - parallel_reduce(Kokkos::RangePolicy(0,view.dimension_0()), + parallel_reduce(Kokkos::RangePolicy(0,view.extent(0)), Impl::min_max_functor(view),reducer); if(result.min_val == result.max_val) return; - BinSort bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true); + BinSort bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true); bin_sort.create_permute_vector(); bin_sort.sort(view); } +template +void sort( ViewType view + , size_t const begin + , size_t const end + ) +{ + typedef Kokkos::RangePolicy range_policy ; + typedef BinOp1D CompType; + + Kokkos::Experimental::MinMaxScalar result; + Kokkos::Experimental::MinMax reducer(result); + + parallel_reduce( range_policy( begin , end ) + , Impl::min_max_functor(view),reducer ); + + if(result.min_val == result.max_val) return; + + BinSort + bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true); + + bin_sort.create_permute_vector(); + bin_sort.sort(view); +} } #endif diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 03e4fb691e..61ffa6f43a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -192,17 +193,81 @@ void test_3D_sort(unsigned int n) { double epsilon = 1e-10; unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; - printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + if ( sort_fails ) + printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + ASSERT_EQ(sort_fails,0); ASSERT_EQ(equal_sum,1); } +//---------------------------------------------------------------------------- + +template +void test_dynamic_view_sort(unsigned int n ) +{ + typedef typename ExecutionSpace::memory_space memory_space ; + typedef Kokkos::Experimental::DynamicView KeyDynamicViewType; + typedef Kokkos::View KeyViewType; + + const size_t upper_bound = 2 * n ; + + typename KeyDynamicViewType::memory_pool + pool( memory_space() , 2 * n * sizeof(KeyType) ); + + KeyDynamicViewType keys("Keys",pool,upper_bound); + + keys.resize_serial(n); + + KeyViewType keys_view("KeysTmp", n ); + + // Test sorting array with all numbers equal + Kokkos::deep_copy(keys_view,KeyType(1)); + Kokkos::Experimental::deep_copy(keys,keys_view); + Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + + Kokkos::Experimental::deep_copy(keys,keys_view); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(n,sum(keys_view),sum_before); + + Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + + Kokkos::Experimental::deep_copy( keys_view , keys ); + + Kokkos::parallel_reduce(n,sum(keys_view),sum_after); + Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + if ( sort_fails != 0 || equal_sum != 1 ) { + std::cout << " N = " << n + << " ; sum_before = " << sum_before + << " ; sum_after = " << sum_after + << " ; ratio = " << ratio + << std::endl ; + } + + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +//---------------------------------------------------------------------------- + template void test_sort(unsigned int N) { test_1D_sort(N*N*N, true); test_1D_sort(N*N*N, false); test_3D_sort(N); + test_dynamic_view_sort(N*N); } } diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index cb206cf88b..09fa5d500a 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -140,6 +140,9 @@ do #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor -pedantic|-Wpedantic|-ansi) ;; + #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" + -Woverloaded-virtual) + ;; #strip -Xcompiler because we add it -Xcompiler) if [ $first_xcompiler_arg -eq 1 ]; then @@ -190,7 +193,7 @@ do object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - *.dylib) + @*|*.dylib) object_files="$object_files -Xlinker $1" object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; diff --git a/lib/kokkos/cmake/deps/QTHREAD.cmake b/lib/kokkos/cmake/deps/QTHREADS.cmake similarity index 98% rename from lib/kokkos/cmake/deps/QTHREAD.cmake rename to lib/kokkos/cmake/deps/QTHREADS.cmake index 994b72b200..c312f2590b 100644 --- a/lib/kokkos/cmake/deps/QTHREAD.cmake +++ b/lib/kokkos/cmake/deps/QTHREADS.cmake @@ -63,8 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) - diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake similarity index 98% rename from lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake rename to lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake index 994b72b200..c312f2590b 100644 --- a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake +++ b/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake @@ -63,8 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) - diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh index fa588c778f..d4fb25a8e1 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-all.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -6,7 +6,7 @@ #----------------------------------------------------------------------------- # Building on 'kokkos-dev.sandia.gov' with enabled capabilities: # -# Cuda, OpenMP, Threads, Qthread, hwloc +# Cuda, OpenMP, Threads, Qthreads, hwloc # # module loaded on 'kokkos-dev.sandia.gov' for this build # @@ -82,13 +82,13 @@ CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" #----------------------------------------------------------------------------- -# Qthread +# Qthreads -QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" +QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib" #----------------------------------------------------------------------------- # C++11 @@ -108,6 +108,3 @@ rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index 446cbb0216..9eaecb5031 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -4,4 +4,5 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304 tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966 -tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 +tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 +tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index 2c15e951ba..6909606643 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -6,29 +6,29 @@ set -o pipefail -# Determine current machine +# Determine current machine. MACHINE="" HOSTNAME=$(hostname) PROCESSOR=`uname -p` if [[ "$HOSTNAME" =~ (white|ride).* ]]; then - MACHINE=white + MACHINE=white elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then - MACHINE=bowman + MACHINE=bowman elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name - if [[ "$PROCESSOR" = "aarch64" ]]; then - MACHINE=sullivan - else - MACHINE=shepard - fi + if [[ "$PROCESSOR" = "aarch64" ]]; then + MACHINE=sullivan + else + MACHINE=shepard + fi elif [[ "$HOSTNAME" =~ apollo ]]; then - MACHINE=apollo + MACHINE=apollo elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then - MACHINE=sems + MACHINE=sems else - echo "Unrecognized machine" >&2 - exit 1 + echo "Unrecognized machine" >&2 + exit 1 fi GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" @@ -45,10 +45,11 @@ CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limi INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CUDA_WARNING_FLAGS="" -# Default. Machine specific can override +# Default. Machine specific can override. DEBUG=False ARGS="" CUSTOM_BUILD_LIST="" +QTHREADS_PATH="" DRYRUN=False BUILD_ONLY=False declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 @@ -60,86 +61,90 @@ PRINT_HELP=False OPT_FLAG="" KOKKOS_OPTIONS="" - # -# Handle arguments +# Handle arguments. # while [[ $# > 0 ]] do -key="$1" -case $key in ---kokkos-path*) -KOKKOS_PATH="${key#*=}" -;; ---build-list*) -CUSTOM_BUILD_LIST="${key#*=}" -;; ---debug*) -DEBUG=True -;; ---build-only*) -BUILD_ONLY=True -;; ---test-script*) -TEST_SCRIPT=True -;; ---skip-hwloc*) -SKIP_HWLOC=True -;; ---num*) -NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" -;; ---dry-run*) -DRYRUN=True -;; ---spot-check*) -SPOT_CHECK=True -;; ---arch*) -ARCH_FLAG="--arch=${key#*=}" -;; ---opt-flag*) -OPT_FLAG="${key#*=}" -;; ---with-cuda-options*) -KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" -;; ---help*) -PRINT_HELP=True -;; -*) -# args, just append -ARGS="$ARGS $1" -;; -esac -shift + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --qthreads-path*) + QTHREADS_PATH="${key#*=}" + ;; + --build-list*) + CUSTOM_BUILD_LIST="${key#*=}" + ;; + --debug*) + DEBUG=True + ;; + --build-only*) + BUILD_ONLY=True + ;; + --test-script*) + TEST_SCRIPT=True + ;; + --skip-hwloc*) + SKIP_HWLOC=True + ;; + --num*) + NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" + ;; + --dry-run*) + DRYRUN=True + ;; + --spot-check*) + SPOT_CHECK=True + ;; + --arch*) + ARCH_FLAG="--arch=${key#*=}" + ;; + --opt-flag*) + OPT_FLAG="${key#*=}" + ;; + --with-cuda-options*) + KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" + ;; + --help*) + PRINT_HELP=True + ;; + *) + # args, just append + ARGS="$ARGS $1" + ;; + esac + + shift done SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) -# set kokkos path +# Set kokkos path. if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) + # Ensure KOKKOS_PATH is abs path. + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi # -# Machine specific config +# Machine specific config. # if [ "$MACHINE" = "sems" ]; then - source /projects/sems/modulefiles/utils/sems-modules-init.sh + source /projects/sems/modulefiles/utils/sems-modules-init.sh - BASE_MODULE_LIST="sems-env,kokkos-env,sems-/,kokkos-hwloc/1.10.1/base" - CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,kokkos-env,sems-/,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="" + fi if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) @@ -153,120 +158,118 @@ if [ "$MACHINE" = "sems" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi - elif [ "$MACHINE" = "white" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/" - IBM_MODULE_LIST="/xl/" - CUDA_MODULE_LIST="/,gcc/5.4.0" + BASE_MODULE_LIST="/" + IBM_MODULE_LIST="/xl/" + CUDA_MODULE_LIST="/,gcc/5.4.0" - # Don't do pthread on white - GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + # Don't do pthread on white. + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=Power8,Kepler37" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Power8,Kepler37" + fi + + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "bowman" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/compilers/" + BASE_MODULE_LIST="/compilers/" - OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - ) + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=KNL" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=KNL" + fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "sullivan" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=96 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=96 - BASE_MODULE_LIST="/" + BASE_MODULE_LIST="/" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=ARMv8-ThunderX" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=ARMv8-ThunderX" + fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "shepard" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/compilers/" + BASE_MODULE_LIST="/compilers/" - OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - ) + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=HSW" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=HSW" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "apollo" ]; then - source /projects/sems/modulefiles/utils/sems-modules-init.sh - module use /home/projects/modulefiles/local/x86-64 - module load kokkos-env + source /projects/sems/modulefiles/utils/sems-modules-init.sh + module use /home/projects/modulefiles/local/x86-64 + module load kokkos-env - module load sems-git - module load sems-tex - module load sems-cmake/3.5.2 - module load sems-gdb + module load sems-git + module load sems-tex + module load sems-cmake/3.5.2 + module load sems-gdb - SKIP_HWLOC=True + SKIP_HWLOC=True - BASE_MODULE_LIST="sems-env,kokkos-env,sems-/,kokkos-hwloc/1.10.1/base" - CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,kokkos-env,sems-/,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,cuda/8.0.44" - NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,sems-gcc/5.3.0" + CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,cuda/8.0.44" + NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,sems-gcc/5.3.0" - BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" - BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" - BUILD_LIST_CLANG="Serial,Pthread,OpenMP" + BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" + BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" + BUILD_LIST_CLANG="Serial,Pthread,OpenMP" if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) @@ -297,17 +300,17 @@ elif [ "$MACHINE" = "apollo" ]; then ) fi - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=SNB,Kepler35" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SNB,Kepler35" + fi + + NUM_JOBS_TO_RUN_IN_PARALLEL=2 + else - echo "Unhandled machine $MACHINE" >&2 - exit 1 + echo "Unhandled machine $MACHINE" >&2 + exit 1 fi - - export OMP_NUM_THREADS=4 declare -i NUM_RESULTS_TO_KEEP=7 @@ -315,119 +318,149 @@ declare -i NUM_RESULTS_TO_KEEP=7 RESULT_ROOT_PREFIX=TestAll if [ "$PRINT_HELP" = "True" ]; then -echo "test_all_sandia :" -echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" -echo " Defaults to root repo containing this script" -echo "--debug: Run tests in debug. Defaults to False" -echo "--test-script: Test this script, not Kokkos" -echo "--skip-hwloc: Do not do hwloc tests" -echo "--num=N: Number of jobs to run in parallel" -echo "--spot-check: Minimal test set to issue pull request" -echo "--dry-run: Just print what would be executed" -echo "--build-only: Just do builds, don't run anything" -echo "--opt-flag=FLAG: Optimization flag (default: -O3)" -echo "--arch=ARCHITECTURE: overwrite architecture flags" -echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" -echo "--build-list=BUILD,BUILD,BUILD..." -echo " Provide a comma-separated list of builds instead of running all builds" -echo " Valid items:" -echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" -echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" -echo "" + echo "test_all_sandia :" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo " Defaults to root repo containing this script" + echo "--debug: Run tests in debug. Defaults to False" + echo "--test-script: Test this script, not Kokkos" + echo "--skip-hwloc: Do not do hwloc tests" + echo "--num=N: Number of jobs to run in parallel" + echo "--spot-check: Minimal test set to issue pull request" + echo "--dry-run: Just print what would be executed" + echo "--build-only: Just do builds, don't run anything" + echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--arch=ARCHITECTURE: overwrite architecture flags" + echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" + echo "--build-list=BUILD,BUILD,BUILD..." + echo " Provide a comma-separated list of builds instead of running all builds" + echo " Valid items:" + echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial" + echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo "" -echo "ARGS: list of expressions matching compilers to test" -echo " supported compilers sems" -for COMPILER_DATA in "${COMPILERS[@]}"; do + echo "ARGS: list of expressions matching compilers to test" + echo " supported compilers sems" + for COMPILER_DATA in "${COMPILERS[@]}"; do ARR=($COMPILER_DATA) COMPILER=${ARR[0]} echo " $COMPILER" -done -echo "" + done + echo "" -echo "Examples:" -echo " Run all tests" -echo " % test_all_sandia" -echo "" -echo " Run all gcc tests" -echo " % test_all_sandia gcc" -echo "" -echo " Run all gcc/4.7.2 and all intel tests" -echo " % test_all_sandia gcc/4.7.2 intel" -echo "" -echo " Run all tests in debug" -echo " % test_all_sandia --debug" -echo "" -echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" -echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" -echo "" -echo "If you want to kill the tests, do:" -echo " hit ctrl-z" -echo " % kill -9 %1" -echo -exit 0 + echo "Examples:" + echo " Run all tests" + echo " % test_all_sandia" + echo "" + echo " Run all gcc tests" + echo " % test_all_sandia gcc" + echo "" + echo " Run all gcc/4.7.2 and all intel tests" + echo " % test_all_sandia gcc/4.7.2 intel" + echo "" + echo " Run all tests in debug" + echo " % test_all_sandia --debug" + echo "" + echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" + echo "" + echo "If you want to kill the tests, do:" + echo " hit ctrl-z" + echo " % kill -9 %1" + echo + exit 0 fi -# set build type +# Set build type. if [ "$DEBUG" = "True" ]; then - BUILD_TYPE=debug + BUILD_TYPE=debug else - BUILD_TYPE=release + BUILD_TYPE=release fi -# If no args provided, do all compilers +# If no args provided, do all compilers. if [ -z "$ARGS" ]; then - ARGS='?' + ARGS='?' fi -# Process args to figure out which compilers to test +# Process args to figure out which compilers to test. COMPILERS_TO_TEST="" + for ARG in $ARGS; do - for COMPILER_DATA in "${COMPILERS[@]}"; do - ARR=($COMPILER_DATA) - COMPILER=${ARR[0]} - if [[ "$COMPILER" = $ARG* ]]; then - if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then - COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" - else - echo "Tried to add $COMPILER twice" - fi - fi - done + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done done +# Check if Qthreads build requested. +HAVE_QTHREADS_BUILD="False" +if [ -n "$CUSTOM_BUILD_LIST" ]; then + if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi +else + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + BUILD_LIST=${ARR[2]} + if [[ "$BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi + done +fi + +# Ensure Qthreads path is set if Qthreads build is requested. +if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then + if [ -z "$QTHREADS_PATH" ]; then + echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2 + exit 1 + else + # Strip trailing slashes from path. + QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//') + fi +fi + # -# Functions +# Functions. # # get_compiler_name get_compiler_name() { - echo $1 | cut -d/ -f1 + echo $1 | cut -d/ -f1 } # get_compiler_version get_compiler_version() { - echo $1 | cut -d/ -f2 + echo $1 | cut -d/ -f2 } -# Do not call directly +# Do not call directly. get_compiler_data() { - local compiler=$1 - local item=$2 - local compiler_name=$(get_compiler_name $compiler) - local compiler_vers=$(get_compiler_version $compiler) + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) - local compiler_data - for compiler_data in "${COMPILERS[@]}" ; do - local arr=($compiler_data) - if [ "$compiler" = "${arr[0]}" ]; then - echo "${arr[$item]}" | tr , ' ' | sed -e "s//$compiler_name/g" -e "s//$compiler_vers/g" - return 0 - fi - done + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) - # Not found - echo "Unreconized compiler $compiler" >&2 - exit 1 + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s//$compiler_name/g" -e "s//$compiler_vers/g" + return 0 + fi + done + + # Not found. + echo "Unreconized compiler $compiler" >&2 + exit 1 } # @@ -435,227 +468,232 @@ get_compiler_data() { # get_compiler_modules() { - get_compiler_data $1 1 + get_compiler_data $1 1 } get_compiler_build_list() { - get_compiler_data $1 2 + get_compiler_data $1 2 } get_compiler_exe_name() { - get_compiler_data $1 3 + get_compiler_data $1 3 } get_compiler_warning_flags() { - get_compiler_data $1 4 + get_compiler_data $1 4 } run_cmd() { - echo "RUNNING: $*" - if [ "$DRYRUN" != "True" ]; then - eval "$* 2>&1" - fi + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi } # report_and_log_test_results report_and_log_test_result() { - # Use sane var names - local success=$1; local desc=$2; local comment=$3; + # Use sane var names. + local success=$1; local desc=$2; local comment=$3; - if [ "$success" = "0" ]; then - echo " PASSED $desc" - echo $comment > $PASSED_DIR/$desc - else - # For failures, comment should be the name of the phase that failed - echo " FAILED $desc" >&2 - echo $comment > $FAILED_DIR/$desc - cat ${desc}.${comment}.log - fi + if [ "$success" = "0" ]; then + echo " PASSED $desc" + echo $comment > $PASSED_DIR/$desc + else + # For failures, comment should be the name of the phase that failed. + echo " FAILED $desc" >&2 + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log + fi } setup_env() { - local compiler=$1 - local compiler_modules=$(get_compiler_modules $compiler) + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) - module purge + module purge - local mod - for mod in $compiler_modules; do - echo "Loading module $mod" - module load $mod 2>&1 - # It is ridiculously hard to check for the success of a loaded - # module. Module does not return error codes and piping to grep - # causes module to run in a subshell. - module list 2>&1 | grep "$mod" >& /dev/null || return 1 - done + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done - return 0 + return 0 } # single_build_and_test single_build_and_test() { - # Use sane var names - local compiler=$1; local build=$2; local build_type=$3; + # Use sane var names. + local compiler=$1; local build=$2; local build_type=$3; - # set up env - mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" - cd $ROOT_DIR/$compiler/"${build}-$build_type" - local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') - setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + # Set up env. + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } - # Set up flags - local compiler_warning_flags=$(get_compiler_warning_flags $compiler) - local compiler_exe=$(get_compiler_exe_name $compiler) + # Set up flags. + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + + if [[ "$build" = *Qthreads* ]]; then if [[ "$build_type" = hwloc* ]]; then - local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) - fi - - if [[ "$OPT_FLAG" = "" ]]; then - OPT_FLAG="-O3" - fi - - if [[ "$build_type" = *debug* ]]; then - local extra_args="$extra_args --debug" - local cxxflags="-g $compiler_warning_flags" + local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc" else - local cxxflags="$OPT_FLAG $compiler_warning_flags" + local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH" fi + fi - if [[ "$compiler" == cuda* ]]; then - cxxflags="--keep --keep-dir=$(pwd) $cxxflags" - export TMPDIR=$(pwd) + if [[ "$OPT_FLAG" = "" ]]; then + OPT_FLAG="-O3" + fi + + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + else + local cxxflags="$OPT_FLAG $compiler_warning_flags" + fi + + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + fi + + echo " Starting job $desc" + + local comment="no_comment" + + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand + + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) + run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" - if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then - local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" fi + fi - echo " Starting job $desc" + report_and_log_test_result 0 $desc "$comment" - local comment="no_comment" - - if [ "$TEST_SCRIPT" = "True" ]; then - local rand=$[ 1 + $[ RANDOM % 10 ]] - sleep $rand - if [ $rand -gt 5 ]; then - run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } - fi - else - run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } - local -i build_start_time=$(date +%s) - run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } - local -i build_end_time=$(date +%s) - comment="build_time=$(($build_end_time-$build_start_time))" - if [[ "$BUILD_ONLY" == False ]]; then - run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } - local -i run_end_time=$(date +%s) - comment="$comment run_time=$(($run_end_time-$build_end_time))" - fi - fi - - report_and_log_test_result 0 $desc "$comment" - - return 0 + return 0 } # wait_for_jobs wait_for_jobs() { - local -i max_jobs=$1 - local -i num_active_jobs=$(jobs | wc -l) - while [ $num_active_jobs -ge $max_jobs ] - do - sleep 1 - num_active_jobs=$(jobs | wc -l) - jobs >& /dev/null - done + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done } # run_in_background run_in_background() { - local compiler=$1 + local compiler=$1 - local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL - # don't override command line input - # if [[ "$BUILD_ONLY" == True ]]; then - # num_jobs=8 - # else - if [[ "$compiler" == cuda* ]]; then - num_jobs=1 - fi - # fi - wait_for_jobs $num_jobs + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + # Don't override command line input. + # if [[ "$BUILD_ONLY" == True ]]; then + # num_jobs=8 + # else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + # fi + wait_for_jobs $num_jobs - single_build_and_test $* & + single_build_and_test $* & } # build_and_test_all build_and_test_all() { - # Get compiler data - local compiler=$1 - if [ -z "$CUSTOM_BUILD_LIST" ]; then - local compiler_build_list=$(get_compiler_build_list $compiler) - else - local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + # Get compiler data. + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi + + # Do builds. + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE + + # If not cuda, do a hwloc test too. + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" fi + done - # do builds - local build - for build in $compiler_build_list - do - run_in_background $compiler $build $BUILD_TYPE - - # If not cuda, do a hwloc test too - if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then - run_in_background $compiler $build "hwloc-$BUILD_TYPE" - fi - done - - return 0 + return 0 } get_test_root_dir() { - local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) - local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) - local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} - if [ $num_to_delete -gt 0 ]; then - /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) - fi + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi - echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") } wait_summarize_and_exit() { - wait_for_jobs 1 + wait_for_jobs 1 - echo "#######################################################" - echo "PASSED TESTS" - echo "#######################################################" + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" - local passed_test - for passed_test in $(\ls -1 $PASSED_DIR | sort) - do - echo $passed_test $(cat $PASSED_DIR/$passed_test) - done + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done - echo "#######################################################" - echo "FAILED TESTS" - echo "#######################################################" + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" - local failed_test - local -i rv=0 - for failed_test in $(\ls -1 $FAILED_DIR | sort) - do - echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" - rv=$rv+1 - done + local failed_test + local -i rv=0 + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done - exit $rv + exit $rv } # -# Main +# Main. # ROOT_DIR=$(get_test_root_dir) @@ -669,8 +707,8 @@ mkdir -p $FAILED_DIR echo "Going to test compilers: " $COMPILERS_TO_TEST for COMPILER in $COMPILERS_TO_TEST; do - echo "Testing compiler $COMPILER" - build_and_test_all $COMPILER + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER done wait_summarize_and_exit diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index 3277c007d0..53e0eab693 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -60,7 +60,7 @@ class DynamicView : public Kokkos::ViewTraits< DataType , P ... > { public: - typedef ViewTraits< DataType , P ... > traits ; + typedef Kokkos::ViewTraits< DataType , P ... > traits ; private: @@ -123,30 +123,41 @@ public: enum { Rank = 1 }; - KOKKOS_INLINE_FUNCTION constexpr size_t size() const + KOKKOS_INLINE_FUNCTION + size_t size() const noexcept { - return - Kokkos::Impl::MemorySpaceAccess - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space - >::accessible - ? // Runtime size is at the end of the chunk pointer array - (*reinterpret_cast( m_chunks + m_chunk_max )) - << m_chunk_shift - : 0 ; + uintptr_t n = 0 ; + + if ( Kokkos::Impl::MemorySpaceAccess + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space + >::accessible ) { + n = *reinterpret_cast( m_chunks + m_chunk_max ); + } +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + else { + Kokkos::Impl::DeepCopy< Kokkos::HostSpace + , typename traits::memory_space + , Kokkos::HostSpace::execution_space > + ( & n + , reinterpret_cast( m_chunks + m_chunk_max ) + , sizeof(uintptr_t) ); + } +#endif + return n << m_chunk_shift ; } template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr + KOKKOS_INLINE_FUNCTION size_t extent( const iType & r ) const { return r == 0 ? size() : 1 ; } template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr + KOKKOS_INLINE_FUNCTION size_t extent_int( const iType & r ) const { return r == 0 ? size() : 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return size(); } + KOKKOS_INLINE_FUNCTION size_t dimension_0() const { return size(); } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1 ; } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1 ; } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1 ; } @@ -270,10 +281,18 @@ public: } /** \brief Resizing in serial can grow or shrink the array size, */ + template< typename IntType > inline - void resize_serial( size_t n ) + typename std::enable_if + < std::is_integral::value && + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace + , typename traits::memory_space + >::accessible + >::type + resize_serial( IntType const & n ) { - DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); + typedef typename traits::value_type value_type ; + typedef value_type * pointer_type ; const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; @@ -286,8 +305,8 @@ public: if ( *pc < NC ) { while ( *pc < NC ) { - m_chunks[*pc] = - m_pool.allocate( sizeof(traits::value_type) << m_chunk_shift ); + m_chunks[*pc] = reinterpret_cast + ( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) ); ++*pc ; } } @@ -295,12 +314,90 @@ public: while ( NC + 1 <= *pc ) { --*pc ; m_pool.deallocate( m_chunks[*pc] - , sizeof(traits::value_type) << m_chunk_shift ); + , sizeof(value_type) << m_chunk_shift ); m_chunks[*pc] = 0 ; } } } + //---------------------------------------- + + struct ResizeSerial { + memory_pool m_pool ; + typename traits::value_type ** m_chunks ; + uintptr_t * m_pc ; + uintptr_t m_nc ; + unsigned m_chunk_shift ; + + KOKKOS_INLINE_FUNCTION + void operator()( int ) const + { + typedef typename traits::value_type value_type ; + typedef value_type * pointer_type ; + + if ( *m_pc < m_nc ) { + while ( *m_pc < m_nc ) { + m_chunks[*m_pc] = reinterpret_cast + ( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) ); + ++*m_pc ; + } + } + else { + while ( m_nc + 1 <= *m_pc ) { + --*m_pc ; + m_pool.deallocate( m_chunks[*m_pc] + , sizeof(value_type) << m_chunk_shift ); + m_chunks[*m_pc] = 0 ; + } + } + } + + ResizeSerial( memory_pool const & arg_pool + , typename traits::value_type ** arg_chunks + , uintptr_t * arg_pc + , uintptr_t arg_nc + , unsigned arg_chunk_shift + ) + : m_pool( arg_pool ) + , m_chunks( arg_chunks ) + , m_pc( arg_pc ) + , m_nc( arg_nc ) + , m_chunk_shift( arg_chunk_shift ) + {} + }; + + template< typename IntType > + inline + typename std::enable_if + < std::is_integral::value && + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace + , typename traits::memory_space + >::accessible + >::type + resize_serial( IntType const & n ) + { + const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; + + if ( m_chunk_max < NC ) { + Kokkos::abort("DynamicView::resize_serial exceeded maximum size"); + } + + // Must dispatch kernel + + typedef Kokkos::RangePolicy< typename traits::execution_space > Range ; + + uintptr_t * const pc = + reinterpret_cast( m_chunks + m_chunk_max ); + + Kokkos::Impl::ParallelFor + closure( ResizeSerial( m_pool, m_chunks, pc, NC, m_chunk_shift ) + , Range(0,1) ); + + closure.execute(); + + traits::execution_space::fence(); + } + //---------------------------------------------------------------------- ~DynamicView() = default ; @@ -311,15 +408,17 @@ public: DynamicView & operator = ( const DynamicView & ) = default ; template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION DynamicView( const DynamicView & rhs ) : m_pool( rhs.m_pool ) , m_track( rhs.m_track ) - , m_chunks( rhs.m_chunks ) + , m_chunks( (typename traits::value_type **) rhs.m_chunks ) , m_chunk_shift( rhs.m_chunk_shift ) , m_chunk_mask( rhs.m_chunk_mask ) , m_chunk_max( rhs.m_chunk_max ) { + typedef typename DynamicView::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynamicView copy construction" ); } //---------------------------------------------------------------------- @@ -400,8 +499,6 @@ public: , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 ) , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift ) { - DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); - // A functor to deallocate all of the chunks upon final destruction typedef typename traits::memory_space memory_space ; diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 8646d27792..193f1bc334 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -230,16 +230,17 @@ public: typedef typename Impl::remove_const::type value_type; typedef typename Impl::add_const::type const_value_type; - typedef Device execution_space; + typedef Device device_type; + typedef typename Device::execution_space execution_space; typedef Hasher hasher_type; typedef EqualTo equal_to_type; typedef uint32_t size_type; //map_types - typedef UnorderedMap declared_map_type; - typedef UnorderedMap insertable_map_type; - typedef UnorderedMap modifiable_map_type; - typedef UnorderedMap const_map_type; + typedef UnorderedMap declared_map_type; + typedef UnorderedMap insertable_map_type; + typedef UnorderedMap modifiable_map_type; + typedef UnorderedMap const_map_type; static const bool is_set = std::is_same::value; static const bool has_const_key = std::is_same::value; @@ -264,18 +265,18 @@ private: typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type; typedef typename Impl::if_c< is_insertable_map - , View< key_type *, execution_space> - , View< const key_type *, execution_space, MemoryTraits > + , View< key_type *, device_type> + , View< const key_type *, device_type, MemoryTraits > >::type key_type_view; typedef typename Impl::if_c< is_insertable_map || is_modifiable_map - , View< impl_value_type *, execution_space> - , View< const impl_value_type *, execution_space, MemoryTraits > + , View< impl_value_type *, device_type> + , View< const impl_value_type *, device_type, MemoryTraits > >::type value_type_view; typedef typename Impl::if_c< is_insertable_map - , View< size_type *, execution_space> - , View< const size_type *, execution_space, MemoryTraits > + , View< size_type *, device_type> + , View< const size_type *, device_type, MemoryTraits > >::type size_type_view; typedef typename Impl::if_c< is_insertable_map @@ -285,7 +286,7 @@ private: enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 }; enum { num_scalars = 3 }; - typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view; + typedef View< int[num_scalars], LayoutLeft, device_type> scalars_view; public: //! \name Public member functions @@ -757,7 +758,7 @@ public: Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes); - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, typename SDevice::memory_space > raw_deep_copy; raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0()); raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0()); @@ -781,21 +782,21 @@ private: // private member functions void set_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy; const int true_ = true; raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int)); } void reset_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy; const int false_ = false; raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int)); } bool get_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > raw_deep_copy; int result = false; raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int)); return result; diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index b9d860f32f..0c59c616d6 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -3,38 +3,49 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -SET(SOURCES - UnitTestMain.cpp - TestCuda.cpp - ) - SET(LIBRARIES kokkoscore) IF(Kokkos_ENABLE_Pthread) - LIST( APPEND SOURCES - TestThreads.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_Serial) - LIST( APPEND SOURCES - TestSerial.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_OpenMP) - LIST( APPEND SOURCES - TestOpenMP.cpp - ) -ENDIF() - - TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest - SOURCES ${SOURCES} + UnitTest_Threads + SOURCES TestThreads.cpp UnitTestMain.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ) - +ENDIF() + +IF(Kokkos_ENABLE_Serial) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial + SOURCES TestSerial.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP + SOURCES TestOpenMP.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + +IF(Kokkos_ENABLE_Cuda) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Cuda + SOURCES TestCuda.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) +ENDIF() + diff --git a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp index 7e3ca005f4..beb07bd791 100644 --- a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp @@ -64,6 +64,7 @@ struct TestDynamicView typedef Kokkos::Experimental::MemoryPool memory_pool_type; typedef Kokkos::Experimental::DynamicView view_type; + typedef typename view_type::const_type const_view_type ; typedef typename Kokkos::TeamPolicy::member_type member_type ; typedef double value_type; @@ -136,6 +137,8 @@ struct TestDynamicView view_type da("A",pool,arg_total_size); + const_view_type ca(da); + // printf("TestDynamicView::run(%d) construct test functor\n",arg_total_size); TestDynamicView functor(da,arg_total_size); diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake index ae9a20c50e..8d9872725e 100644 --- a/lib/kokkos/core/cmake/Dependencies.cmake +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -1,6 +1,6 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib TEST_OPTIONAL_TPLS CUSPARSE ) -TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) \ No newline at end of file +TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in index 9359b5a32b..a71e60f207 100644 --- a/lib/kokkos/core/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in @@ -30,7 +30,7 @@ #cmakedefine KOKKOS_HAVE_PTHREAD #cmakedefine KOKKOS_HAVE_SERIAL -#cmakedefine KOKKOS_HAVE_QTHREAD +#cmakedefine KOKKOS_HAVE_QTHREADS #cmakedefine KOKKOS_HAVE_Winthread #cmakedefine KOKKOS_HAVE_OPENMP #cmakedefine KOKKOS_HAVE_HWLOC diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index 85f869971a..3a0ad2d4c1 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -60,4 +60,3 @@ clean: kokkos-clean gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/core/perf_test/PerfTestCuda.cpp b/lib/kokkos/core/perf_test/PerfTestCuda.cpp index 7386ecef20..65ce61fb53 100644 --- a/lib/kokkos/core/perf_test/PerfTestCuda.cpp +++ b/lib/kokkos/core/perf_test/PerfTestCuda.cpp @@ -52,6 +52,8 @@ #include +#include + #include #include #include @@ -72,6 +74,14 @@ class cuda : public ::testing::Test { } }; +//TEST_F( cuda, mdrange_lr ) { +// EXPECT_NO_THROW( (run_test_mdrange( 5, 8, "Kokkos::Cuda" )) ); +//} + +//TEST_F( cuda, mdrange_ll ) { +// EXPECT_NO_THROW( (run_test_mdrange( 5, 8, "Kokkos::Cuda" )) ); +//} + TEST_F( cuda, hexgrad ) { EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); diff --git a/lib/kokkos/core/perf_test/PerfTestDriver.hpp b/lib/kokkos/core/perf_test/PerfTestDriver.hpp index 7b6cfc5b5c..4732c3275a 100644 --- a/lib/kokkos/core/perf_test/PerfTestDriver.hpp +++ b/lib/kokkos/core/perf_test/PerfTestDriver.hpp @@ -60,6 +60,342 @@ namespace Test { enum { NUMBER_OF_TRIALS = 5 }; +template< class DeviceType , class LayoutType > +void run_test_mdrange( int exp_beg , int exp_end, const char deviceTypeName[], int range_offset = 0, int tile_offset = 0 ) +// exp_beg = 6 => 2^6 = 64 is starting range length +{ +#define MDRANGE_PERFORMANCE_OUTPUT_VERBOSE 0 + + std::string label_mdrange ; + label_mdrange.append( "\"MDRange< double , " ); + label_mdrange.append( deviceTypeName ); + label_mdrange.append( " >\"" ); + + std::string label_range_col2 ; + label_range_col2.append( "\"RangeColTwo< double , " ); + label_range_col2.append( deviceTypeName ); + label_range_col2.append( " >\"" ); + + std::string label_range_col_all ; + label_range_col_all.append( "\"RangeColAll< double , " ); + label_range_col_all.append( deviceTypeName ); + label_range_col_all.append( " >\"" ); + + if ( std::is_same::value) { + std::cout << "--------------------------------------------------------------\n" + << "Performance tests for MDRange Layout Right" + << "\n--------------------------------------------------------------" << std::endl; + } else { + std::cout << "--------------------------------------------------------------\n" + << "Performance tests for MDRange Layout Left" + << "\n--------------------------------------------------------------" << std::endl; + } + + + for (int i = exp_beg ; i < exp_end ; ++i) { + const int range_length = (1<= min_bnd ) { + int tmid = min_bnd; + while ( tmid < tfast ) { + t0 = min_bnd; + t1 = tmid; + t2 = tfast; + int t2_rev = min_bnd; + int t1_rev = tmid; + int t0_rev = tfast; + +#if defined(KOKKOS_HAVE_CUDA) + //Note: Product of tile sizes must be < 1024 for Cuda + if ( t0*t1*t2 >= 1024 ) { + printf(" Exceeded Cuda tile limits; onto next range set\n\n"); + break; + } +#endif + + // Run 1 with tiles LayoutRight style + double seconds_1 = 0; + { seconds_1 = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0, t1, t2) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << label_mdrange + << " , " << t0 << " , " << t1 << " , " << t2 + << " , " << seconds_1 + << std::endl ; +#endif + + if ( counter == 1 ) { + seconds_min = seconds_1; + t0_min = t0; + t1_min = t1; + t2_min = t2; + } + else { + if ( seconds_1 < seconds_min ) + { + seconds_min = seconds_1; + t0_min = t0; + t1_min = t1; + t2_min = t2; + } + } + + // Run 2 with tiles LayoutLeft style - reverse order of tile dims + double seconds_1rev = 0; + { seconds_1rev = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0_rev, t1_rev, t2_rev) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << label_mdrange + << " , " << t0_rev << " , " << t1_rev << " , " << t2_rev + << " , " << seconds_1rev + << std::endl ; +#endif + + if ( seconds_1rev < seconds_min ) + { + seconds_min = seconds_1rev; + t0_min = t0_rev; + t1_min = t1_rev; + t2_min = t2_rev; + } + + ++counter; + tmid <<= 1; + } //end inner while + tfast >>=1; + } //end outer while + + std::cout << "\n" + << "--------------------------------------------------------------\n" + << label_mdrange + << "\n Min values " + << "\n Range length per dim (3D): " << range_length + << "\n TileDims: " << t0_min << " , " << t1_min << " , " << t2_min + << "\n Min time: " << seconds_min + << "\n---------------------------------------------------------------" + << std::endl ; + } //end scope + +#if !defined(KOKKOS_HAVE_CUDA) + double seconds_min_c = 0.0; + int t0c_min = 0, t1c_min = 0, t2c_min = 0; + int counter = 1; + { + int min_bnd = 8; + // Test 1_c: MDRange with 0 for 'inner' tile dim; this case will utilize the full span in that direction, should be similar to Collapse<2> + if ( std::is_same::value ) { + for ( unsigned int T0 = min_bnd; T0 < static_cast(range_length); T0<<=1 ) { + for ( unsigned int T1 = min_bnd; T1 < static_cast(range_length); T1<<=1 ) { + double seconds_c = 0; + { seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, T0, T1, 0) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << " MDRange LR with '0' tile - collapse-like \n" + << label_mdrange + << " , " << T0 << " , " << T1 << " , " << range_length + << " , " << seconds_c + << std::endl ; +#endif + + t2c_min = range_length; + if ( counter == 1 ) { + seconds_min_c = seconds_c; + t0c_min = T0; + t1c_min = T1; + } + else { + if ( seconds_c < seconds_min_c ) + { + seconds_min_c = seconds_c; + t0c_min = T0; + t1c_min = T1; + } + } + ++counter; + } + } + } + else { + for ( unsigned int T1 = min_bnd; T1 <= static_cast(range_length); T1<<=1 ) { + for ( unsigned int T2 = min_bnd; T2 <= static_cast(range_length); T2<<=1 ) { + double seconds_c = 0; + { seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, 0, T1, T2) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << " MDRange LL with '0' tile - collapse-like \n" + << label_mdrange + << " , " < style: " + << "\n Min values " + << "\n Range length per dim (3D): " << range_length + << "\n TileDims: " << t0c_min << " , " << t1c_min << " , " << t2c_min + << "\n Min time: " << seconds_min_c + << "\n---------------------------------------------------------------" + << std::endl ; + } //end scope test 2 +#endif + + + // Test 2: RangePolicy Collapse2 style + double seconds_2 = 0; + { seconds_2 = RangePolicyCollapseTwo< DeviceType , double , LayoutType >::test_index_collapse_two(range_length,range_length,range_length) ; } + std::cout << label_range_col2 + << " , " << range_length + << " , " << seconds_2 + << std::endl ; + + + // Test 3: RangePolicy Collapse all style - not necessary, always slow + /* + double seconds_3 = 0; + { seconds_3 = RangePolicyCollapseAll< DeviceType , double , LayoutType >::test_collapse_all(range_length,range_length,range_length) ; } + std::cout << label_range_col_all + << " , " << range_length + << " , " << seconds_3 + << "\n---------------------------------------------------------------" + << std::endl ; + */ + + // Compare fastest times... will never be collapse all so ignore it + // seconds_min = tiled MDRange + // seconds_min_c = collapse<2>-like MDRange (tiledim = span for fast dim) - only for non-Cuda, else tile too long + // seconds_2 = collapse<2>-style RangePolicy + // seconds_3 = collapse<3>-style RangePolicy + +#if !defined(KOKKOS_HAVE_CUDA) + if ( seconds_min < seconds_min_c ) { + if ( seconds_min < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange tiled\n" + << " Time: " << seconds_min + << " Difference: " << seconds_2 - seconds_min + << " Other times: \n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + } + else if ( seconds_min > seconds_min_c ) { + if ( seconds_min_c < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange collapse-like (tiledim = span on fast dim) type\n" + << " Time: " << seconds_min_c + << " Difference: " << seconds_2 - seconds_min_c + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min_c > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min_c - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + } // end else if +#else + if ( seconds_min < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange tiled\n" + << " Time: " << seconds_min + << " Difference: " << seconds_2 - seconds_min + << " Other times: \n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } +#endif + + } //end for + +#undef MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + +} template< class DeviceType > diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp index 606177ca50..831d581109 100644 --- a/lib/kokkos/core/perf_test/PerfTestHost.cpp +++ b/lib/kokkos/core/perf_test/PerfTestHost.cpp @@ -66,6 +66,8 @@ const char TestHostDeviceName[] = "Kokkos::Serial" ; #include +#include + #include #include #include @@ -102,6 +104,14 @@ protected: } }; +//TEST_F( host, mdrange_lr ) { +// EXPECT_NO_THROW( (run_test_mdrange (5, 8, TestHostDeviceName) ) ); +//} + +//TEST_F( host, mdrange_ll ) { +// EXPECT_NO_THROW( (run_test_mdrange (5, 8, TestHostDeviceName) ) ); +//} + TEST_F( host, hexgrad ) { EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName )); } diff --git a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp new file mode 100644 index 0000000000..d910b513c6 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp @@ -0,0 +1,564 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace Test { +template< class DeviceType + , typename ScalarType = double + , typename TestLayout = Kokkos::LayoutRight + > +struct MultiDimRangePerf3D +{ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + + using iterate_type = Kokkos::Experimental::Iterate; + + typedef Kokkos::View view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + MultiDimRangePerf3D(const view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long i, const long j, const long k) const + { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + + + struct InitZeroTag {}; +// struct InitViewTag {}; + + struct Init + { + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long i, const long j, const long k) const + { + input(i,j,k) = 1.0; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const InitZeroTag&, const long i, const long j, const long k) const + { + input(i,j,k) = 0; + } + + view_type input; + const long irange; + const long jrange; + const long krange; + }; + + + static double test_multi_index(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const unsigned int Ti = 1, const unsigned int Tj = 1, const unsigned int Tk = 1, const long iter = 1) + { + //This test performs multidim range over all dims + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef MultiDimRangePerf3D FunctorType; + + double dt_min = 0; + + // LayoutRight + if ( std::is_same::value ) { + Kokkos::Experimental::MDRangePolicy, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}}); + Kokkos::Experimental::MDRangePolicy, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}}); + + typedef typename Kokkos::Experimental::MDRangePolicy, execution_space > MDRangeType; + using tile_type = typename MDRangeType::tile_type; + using point_type = typename MDRangeType::point_type; + + Kokkos::Experimental::MDRangePolicy, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); + + Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); + execution_space::fence(); + Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - only the first run + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast(icount); ++l ) { + for ( long j = 0; j < static_cast(jcount); ++j ) { + for ( long k = 0; k < static_cast(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","<, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}}); + Kokkos::Experimental::MDRangePolicy, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}}); + + //typedef typename Kokkos::Experimental::MDRangePolicy, execution_space > MDRangeType; + //using tile_type = typename MDRangeType::tile_type; + //using point_type = typename MDRangeType::point_type; + //Kokkos::Experimental::MDRangePolicy, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); + Kokkos::Experimental::MDRangePolicy, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} ); + + Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); + execution_space::fence(); + Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - only the first run + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast(icount); ++l ) { + for ( long j = 0; j < static_cast(jcount); ++j ) { + for ( long k = 0; k < static_cast(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","< +struct RangePolicyCollapseTwo +{ + // RangePolicy for 3D range, but will collapse only 2 dims => like Rank<2> for multi-dim; unroll 2 dims in one-dim + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + typedef TestLayout layout; + + using iterate_type = Kokkos::Experimental::Iterate; + + typedef Kokkos::View view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + RangePolicyCollapseTwo(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_) , irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same::value ) + { +//id(i,j,k) = k + j*Nk + i*Nk*Nj = k + Nk*(j + i*Nj) = k + Nk*r +//r = j + i*Nj + long i = int(r / jrange); + long j = int( r - i*jrange); + for (int k = 0; k < krange; ++k) { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + else if ( std::is_same::value ) + { +//id(i,j,k) = i + j*Ni + k*Ni*Nj = i + Ni*(j + k*Nj) = i + Ni*r +//r = j + k*Nj + long k = int(r / jrange); + long j = int( r - k*jrange); + for (int i = 0; i < irange; ++i) { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + } + + + struct Init + { + view_type input; + const long irange; + const long jrange; + const long krange; + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same::value ) + { + long i = int(r / jrange); + long j = int( r - i*jrange); + for (int k = 0; k < krange; ++k) { + input(i,j,k) = 1; + } + } + else if ( std::is_same::value ) + { + long k = int(r / jrange); + long j = int( r - k*jrange); + for (int i = 0; i < irange; ++i) { + input(i,j,k) = 1; + } + } + } + }; + + + static double test_index_collapse_two(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1) + { + // This test refers to collapsing two dims while using the RangePolicy + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef RangePolicyCollapseTwo FunctorType; + + long collapse_index_rangeA = 0; + long collapse_index_rangeB = 0; + if ( std::is_same::value ) { + collapse_index_rangeA = icount*jcount; + collapse_index_rangeB = (icount+2)*(jcount+2); +// std::cout << " LayoutRight " << std::endl; + } else if ( std::is_same::value ) { + collapse_index_rangeA = kcount*jcount; + collapse_index_rangeB = (kcount+2)*(jcount+2); +// std::cout << " LayoutLeft " << std::endl; + } else { + std::cout << " LayoutRight or LayoutLeft required - will pass 0 as range instead " << std::endl; + exit(-1); + } + + Kokkos::RangePolicy policy(0, (collapse_index_rangeA) ); + Kokkos::RangePolicy policy_initB(0, (collapse_index_rangeB) ); + + double dt_min = 0; + + Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); + execution_space::fence(); + Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - first iteration only + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast(icount); ++l ) { + for ( long j = 0; j < static_cast(jcount); ++j ) { + for ( long k = 0; k < static_cast(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","< +struct RangePolicyCollapseAll +{ + // RangePolicy for 3D range, but will collapse all dims + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + typedef TestLayout layout; + + typedef Kokkos::View view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + RangePolicyCollapseAll(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same::value ) + { + long i = int(r / (jrange*krange)); + long j = int(( r - i*jrange*krange)/krange); + long k = int(r - i*jrange*krange - j*krange); + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + else if ( std::is_same::value ) + { + long k = int(r / (irange*jrange)); + long j = int(( r - k*irange*jrange)/irange); + long i = int(r - k*irange*jrange - j*irange); + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + + + struct Init + { + view_type input; + const long irange; + const long jrange; + const long krange; + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same::value ) + { + long i = int(r / (jrange*krange)); + long j = int(( r - i*jrange*krange)/krange); + long k = int(r - i*jrange*krange - j*krange); + input(i,j,k) = 1; + } + else if ( std::is_same::value ) + { + long k = int(r / (irange*jrange)); + long j = int(( r - k*irange*jrange)/irange); + long i = int(r - k*irange*jrange - j*irange); + input(i,j,k) = 1; + } + } + }; + + + static double test_collapse_all(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1) + { + //This test refers to collapsing all dims using the RangePolicy + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef RangePolicyCollapseAll FunctorType; + + const long flat_index_range = icount*jcount*kcount; + Kokkos::RangePolicy policy(0, flat_index_range ); + Kokkos::RangePolicy policy_initB(0, (icount+2)*(jcount+2)*(kcount+2) ); + + double dt_min = 0; + + Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); + execution_space::fence(); + Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - first iteration only + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast(icount); ++l ) { + for ( long j = 0; j < static_cast(jcount); ++j ) { + for ( long k = 0; k < static_cast(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Callapse ALL Correctness error at index: " << l << ","< +#include +#include + +#include + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) + +#include + +//#include +// Including the file above, leads to following type of errors: +// /home/ndellin/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp(84): error: incomplete type is not allowed +// As a result, recreate cuda_parallel_launch and associated code + +#if defined(KOKKOS_ENABLE_PROFILING) +#include +#include +#endif + +namespace Kokkos { namespace Experimental { namespace Impl { + +// ------------------------------------------------------------------ // + +template< class DriverType > +__global__ +static void cuda_parallel_launch( const DriverType driver ) +{ + driver(); +} + +template< class DriverType > +struct CudaLaunch +{ + inline + CudaLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + ) + { + cuda_parallel_launch< DriverType ><<< grid , block >>>(driver); + } + +}; + +// ------------------------------------------------------------------ // +template< int N , typename RP , typename Functor , typename Tag > +struct apply_impl; + +//Rank 2 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<2,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + /* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + m_func(i, j); + } } +*/ + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1); + } + } + } + } + } +// LR + else { +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + m_func(i, j); + } } +*/ + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + m_func(offset_0 , offset_1); + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; + +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<2,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + // Loop over size maxnumblocks until full range covered +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + m_func(Tag(), i, j); + } } +*/ + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1); + } + } + } + } + } + else { +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + m_func(Tag(), i, j); + } } +*/ + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + m_func(Tag(), offset_0 , offset_1); + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 3 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<3,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } +// LR + else { + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + m_func(offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for void tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<3,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + else { + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 4 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<4,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } +// LR + else { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for void tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<4,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + else { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 5 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<5,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4); + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4); + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<5,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4); + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4); + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 6 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<6,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z % numbl4; + const index_type tile_id5 = blockIdx.z / numbl4; + const index_type thr_id4 = threadIdx.z % m_rp.m_tile[4]; + const index_type thr_id5 = threadIdx.z / m_rp.m_tile[4]; + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z / numbl5; + const index_type tile_id5 = blockIdx.z % numbl5; + const index_type thr_id4 = threadIdx.z / m_rp.m_tile[5]; + const index_type thr_id5 = threadIdx.z % m_rp.m_tile[5]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<6,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z % numbl4; + const index_type tile_id5 = blockIdx.z / numbl4; + const index_type thr_id4 = threadIdx.z % m_rp.m_tile[4]; + const index_type thr_id5 = threadIdx.z / m_rp.m_tile[4]; + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z / numbl5; + const index_type tile_id5 = blockIdx.z % numbl5; + const index_type thr_id4 = threadIdx.z / m_rp.m_tile[5]; + const index_type thr_id5 = threadIdx.z % m_rp.m_tile[5]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// ---------------------------------------------------------------------------------- + +template < typename RP + , typename Functor + , typename Tag + > +struct DeviceIterateTile +{ + using index_type = typename RP::index_type; + using array_index_type = typename RP::array_index_type; + using point_type = typename RP::point_type; + + struct VoidDummy {}; + typedef typename std::conditional< std::is_same::value, VoidDummy, Tag>::type usable_tag; + + DeviceIterateTile( const RP & rp, const Functor & func ) + : m_rp{rp} + , m_func{func} + {} + +private: + inline __device__ + void apply() const + { + apply_impl(m_rp,m_func).exec_range(); + } //end apply + +public: + + inline + __device__ + void operator()(void) const + { + this-> apply(); + } + + inline + void execute() const + { + const array_index_type maxblocks = 65535; //not true for blockIdx.x for newer archs + if ( RP::rank == 2 ) + { + const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , 1); + const dim3 grid( + std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks ) + , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) + , 1 + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 3 ) + { + const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , m_rp.m_tile[2] ); + const dim3 grid( + std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks ) + , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) + , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 4 ) + { + // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2] , m_rp.m_tile[3] ); + const dim3 grid( + std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast(maxblocks) ) + , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1 ) / block.y , maxblocks ) + , std::min( ( m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 5 ) + { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4] ); + const dim3 grid( + std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast(maxblocks) ) + , std::min( static_cast( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] ) + , static_cast(maxblocks) ) + , std::min( ( m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 6 ) + { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4]*m_rp.m_tile[5] ); + const dim3 grid( + std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast(maxblocks) ) + , std::min( static_cast( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] ) + , static_cast(maxblocks) ) + , std::min( static_cast( m_rp.m_tile_end[4] * m_rp.m_tile_end[5] ) + , static_cast(maxblocks) ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else + { + printf("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n"); + Kokkos::abort("Aborting"); + } + + } //end execute + +protected: + const RP m_rp; + const Functor m_func; +}; + +} } } //end namespace Kokkos::Experimental::Impl + +#endif +#endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp index 0a0f41686b..a273db998b 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -131,6 +131,7 @@ namespace Impl { int* atomic; int* scratch; int* threadid; + int n; }; } } @@ -250,6 +251,7 @@ struct CudaParallelLaunch< DriverType , true > { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif @@ -292,6 +294,7 @@ struct CudaParallelLaunch< DriverType , false > { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 91a3c92138..303b3fa4f6 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -59,7 +59,7 @@ #include #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #endif @@ -184,7 +184,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const enum { max_uvm_allocations = 65536 }; - if ( arg_alloc_size > 0 ) + if ( arg_alloc_size > 0 ) { Kokkos::Impl::num_uvm_allocations++; @@ -193,7 +193,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const } CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); - } + } return ptr ; } @@ -375,7 +375,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::CudaSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { SharedAllocationHeader header ; @@ -395,7 +395,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >:: SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::fence(); //Make sure I can access the label ... Kokkos::Profiling::deallocateData( @@ -412,7 +412,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::CudaHostPinnedSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -442,7 +442,7 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -479,7 +479,7 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -510,7 +510,7 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -745,14 +745,14 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail //Formatting dependent on sizeof(uintptr_t) const char * format_string; - if (sizeof(uintptr_t) == sizeof(unsigned long)) { + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; } - else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { format_string = "Cuda addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; } - snprintf( buffer , 256 + snprintf( buffer , 256 , format_string , reinterpret_cast( r ) , reinterpret_cast( r->m_prev ) @@ -776,14 +776,14 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail //Formatting dependent on sizeof(uintptr_t) const char * format_string; - if (sizeof(uintptr_t) == sizeof(unsigned long)) { + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "Cuda [ 0x%.12lx + %ld ] %s\n"; } - else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { format_string = "Cuda [ 0x%.12llx + %ld ] %s\n"; } - snprintf( buffer , 256 + snprintf( buffer , 256 , format_string , reinterpret_cast< uintptr_t >( r->data() ) , r->size() @@ -883,6 +883,7 @@ void init_lock_arrays_cuda_space() { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>(); init_lock_array_kernel_scratch_threadid<<<(Kokkos::Cuda::concurrency()+255)/256,256>>>(Kokkos::Cuda::concurrency()); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp index eeea97049f..44d908d102 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -505,18 +505,18 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl; std::cout << " The code must call Cuda::fence() after each kernel" << std::endl; - std::cout << " or will likely crash when accessing data on the host." << std::endl; + std::cout << " or will likely crash when accessing data on the host." << std::endl; } const char * env_force_device_alloc = getenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC"); bool force_device_alloc; if (env_force_device_alloc == 0) force_device_alloc=false; else force_device_alloc=atoi(env_force_device_alloc)!=0; - + const char * env_visible_devices = getenv("CUDA_VISIBLE_DEVICES"); bool visible_devices_one=true; if (env_visible_devices == 0) visible_devices_one=false; - + if(!visible_devices_one && !force_device_alloc) { std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cout << " without setting CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or " << std::endl; @@ -536,6 +536,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif } @@ -620,9 +621,9 @@ void CudaInternal::finalize() was_finalized = 1; if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) { - atomic_lock_array_cuda_space_ptr(false); - scratch_lock_array_cuda_space_ptr(false); - threadid_lock_array_cuda_space_ptr(false); + atomic_lock_array_cuda_space_ptr(true); + scratch_lock_array_cuda_space_ptr(true); + threadid_lock_array_cuda_space_ptr(true); if ( m_stream ) { for ( size_type i = 1 ; i < m_streamCount ; ++i ) { @@ -700,7 +701,7 @@ void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances ) { Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -739,7 +740,7 @@ void Cuda::finalize() { Impl::CudaInternal::singleton().finalize(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index fa29d732f4..56e6a3c1e3 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -61,7 +61,7 @@ #include #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #include #endif @@ -586,13 +586,35 @@ public: void operator()(void) const { // Iterate this block through the league + int threadid = 0; + if ( m_scratch_size[1]>0 ) { + __shared__ int base_thread_id; + if (threadIdx.x==0 && threadIdx.y==0 ) { + threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n; + threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1)); + if(!done) { + threadid += blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + } + + for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) { this-> template exec_team< WorkTag >( typename Policy::member_type( kokkos_impl_cuda_shared_memory() , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) ); @@ -946,11 +968,32 @@ public: __device__ inline void operator() () const { - run(Kokkos::Impl::if_c::select(1,1.0) ); + int threadid = 0; + if ( m_scratch_size[1]>0 ) { + __shared__ int base_thread_id; + if (threadIdx.x==0 && threadIdx.y==0 ) { + threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n; + threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1)); + if(!done) { + threadid += blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + } + + run(Kokkos::Impl::if_c::select(1,1.0), threadid ); } __device__ inline - void run(const DummySHMEMReductionType&) const + void run(const DummySHMEMReductionType&, const int& threadid) const { const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); @@ -964,7 +1007,7 @@ public: ( Member( kokkos_impl_cuda_shared_memory() + m_team_begin , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) @@ -992,7 +1035,7 @@ public: } __device__ inline - void run(const DummyShflReductionType&) const + void run(const DummyShflReductionType&, const int& threadid) const { value_type value; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value); @@ -1003,7 +1046,7 @@ public: ( Member( kokkos_impl_cuda_shared_memory() + m_team_begin , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) @@ -1128,9 +1171,9 @@ public: Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); } - if ( m_team_size > - Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length()) { + if ( unsigned(m_team_size) > + unsigned(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > + ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); } @@ -1621,14 +1664,25 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, ValueType& result) { +void parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct + const & loop_boundaries + , Lambda const & lambda + , ValueType & result ) +{ #ifdef __CUDA_ARCH__ result = ValueType(); @@ -1636,52 +1690,42 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct 1) - result += shfl_down(result, 1,loop_boundaries.increment); - if (loop_boundaries.increment > 2) - result += shfl_down(result, 2,loop_boundaries.increment); - if (loop_boundaries.increment > 4) - result += shfl_down(result, 4,loop_boundaries.increment); - if (loop_boundaries.increment > 8) - result += shfl_down(result, 8,loop_boundaries.increment); - if (loop_boundaries.increment > 16) - result += shfl_down(result, 16,loop_boundaries.increment); + Impl::cuda_intra_warp_vector_reduce( + Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & result ) ); - result = shfl(result,0,loop_boundaries.increment); #endif } -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. +/** \brief Intra-thread vector parallel_reduce. * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ + * Calls lambda(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed + * using JoinType::operator()(ValueType& val, const ValueType& update) + * and output into result. + * + * The input value of result must be the identity value for the + * reduction operation; e.g., ( 0 , += ) or ( 1 , *= ). + */ template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - +void parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct + const & loop_boundaries + , Lambda const & lambda + , JoinType const & join + , ValueType & result ) +{ #ifdef __CUDA_ARCH__ - ValueType result = init_result; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i,result); } - if (loop_boundaries.increment > 1) - join( result, shfl_down(result, 1,loop_boundaries.increment)); - if (loop_boundaries.increment > 2) - join( result, shfl_down(result, 2,loop_boundaries.increment)); - if (loop_boundaries.increment > 4) - join( result, shfl_down(result, 4,loop_boundaries.increment)); - if (loop_boundaries.increment > 8) - join( result, shfl_down(result, 8,loop_boundaries.increment)); - if (loop_boundaries.increment > 16) - join( result, shfl_down(result, 16,loop_boundaries.increment)); + Impl::cuda_intra_warp_vector_reduce( + Impl::Reducer< ValueType , JoinType >( join , & result ) ); - init_result = shfl(result,0,loop_boundaries.increment); #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index ad9cca26ce..79b3867ba2 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -55,15 +55,163 @@ #include #include #include + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { +//---------------------------------------------------------------------------- +template< typename T > +__device__ inline +void cuda_shfl( T & out , T const & in , int lane , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast(&out) = + __shfl( *reinterpret_cast(&in) , lane , width ); +} -//Shfl based reductions +template< typename T > +__device__ inline +void cuda_shfl( T & out , T const & in , int lane , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast(&out)[i] = + __shfl( reinterpret_cast(&in)[i] , lane , width ); + } +} + +//---------------------------------------------------------------------------- + +template< typename T > +__device__ inline +void cuda_shfl_down( T & out , T const & in , int delta , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast(&out) = + __shfl_down( *reinterpret_cast(&in) , delta , width ); +} + +template< typename T > +__device__ inline +void cuda_shfl_down( T & out , T const & in , int delta , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast(&out)[i] = + __shfl_down( reinterpret_cast(&in)[i] , delta , width ); + } +} + +//---------------------------------------------------------------------------- + +template< typename T > +__device__ inline +void cuda_shfl_up( T & out , T const & in , int delta , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast(&out) = + __shfl_up( *reinterpret_cast(&in) , delta , width ); +} + +template< typename T > +__device__ inline +void cuda_shfl_up( T & out , T const & in , int delta , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast(&out)[i] = + __shfl_up( reinterpret_cast(&in)[i] , delta , width ); + } +} + +//---------------------------------------------------------------------------- +/** \brief Reduce within a warp over blockDim.x, the "vector" dimension. + * + * This will be called within a nested, intra-team parallel operation. + * Use shuffle operations to avoid conflicts with shared memory usage. + * + * Requires: + * blockDim.x is power of 2 + * blockDim.x <= 32 (one warp) + * + * Cannot use "butterfly" pattern because floating point + * addition is non-associative. Therefore, must broadcast + * the final result. + */ +template< class Reducer > +__device__ inline +void cuda_intra_warp_vector_reduce( Reducer const & reducer ) +{ + static_assert( + std::is_reference< typename Reducer::reference_type >::value , "" ); + + if ( 1 < blockDim.x ) { + + typename Reducer::value_type tmp ; + + for ( int i = blockDim.x ; ( i >>= 1 ) ; ) { + + cuda_shfl_down( tmp , reducer.reference() , i , blockDim.x ); + + if ( threadIdx.x < i ) { reducer.join( reducer.data() , & tmp ); } + } + + // Broadcast from root "lane" to all other "lanes" + + cuda_shfl( reducer.reference() , reducer.reference() , 0 , blockDim.x ); + } +} + +/** \brief Inclusive scan over blockDim.x, the "vector" dimension. + * + * This will be called within a nested, intra-team parallel operation. + * Use shuffle operations to avoid conflicts with shared memory usage. + * + * Algorithm is concurrent bottom-up reductions in triangular pattern + * where each CUDA thread is the root of a reduction tree from the + * zeroth CUDA thread to itself. + * + * Requires: + * blockDim.x is power of 2 + * blockDim.x <= 32 (one warp) + */ +template< typename ValueType > +__device__ inline +void cuda_intra_warp_vector_inclusive_scan( ValueType & local ) +{ + ValueType tmp ; + + // Bottom up: + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + + for ( int i = 1 ; i < blockDim.x ; i <<= 1 ) { + + cuda_shfl_up( tmp , local , i , blockDim.x ); + + if ( i <= threadIdx.x ) { local += tmp ; } + } +} + +//---------------------------------------------------------------------------- /* * Algorithmic constraints: * (a) threads with same threadIdx.y have same value @@ -98,7 +246,10 @@ inline void cuda_inter_warp_reduction( ValueType& value, const int max_active_thread = blockDim.y) { #define STEP_WIDTH 4 - __shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH]; + // Depending on the ValueType _shared__ memory must be aligned up to 8byte boundaries + // The reason not to use ValueType directly is that for types with constructors it + // could lead to race conditions + __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; ValueType* result = (ValueType*) & sh_result; const unsigned step = 32 / blockDim.x; unsigned shift = STEP_WIDTH; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index c96b8b7d40..cf3e55d50c 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -91,7 +91,7 @@ void TaskQueueSpecialization< Kokkos::Cuda >::driver // Loop by priority and then type for ( int i = 0 ; i < Queue::NumQueue && end == task.ptr ; ++i ) { for ( int j = 0 ; j < 2 && end == task.ptr ; ++j ) { - task.ptr = Queue::pop_task( & queue->m_ready[i][j] ); + task.ptr = Queue::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 479294f307..a13e37837d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -61,6 +61,8 @@ void set_cuda_task_base_apply_function_pointer } +template< class > class TaskExec ; + template<> class TaskQueueSpecialization< Kokkos::Cuda > { @@ -69,6 +71,7 @@ public: using execution_space = Kokkos::Cuda ; using memory_space = Kokkos::CudaUVMSpace ; using queue_type = TaskQueue< execution_space > ; + using member_type = TaskExec< Kokkos::Cuda > ; static void iff_single_thread_recursive_execute( queue_type * const ) {} @@ -79,13 +82,15 @@ public: static void execute( queue_type * const ); - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( TaskBase::function_type * ptr ) + typename TaskType::function_type + get_function_pointer() { - using TaskType = TaskBase< execution_space - , typename FunctorType::value_type - , FunctorType > ; + using function_type = typename TaskType::function_type ; + + function_type * const ptr = + (function_type*) cuda_internal_scratch_unified( sizeof(function_type) ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); @@ -93,6 +98,8 @@ public: CUDA_SAFE_CALL( cudaGetLastError() ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + return *ptr ; } }; @@ -435,18 +442,26 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename ValueType, typename iType, class Lambda > +template< typename iType, class Closure > KOKKOS_INLINE_FUNCTION void parallel_scan (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) { + const Closure & closure ) +{ + // Extract value_type from closure - ValueType accum = 0 ; - ValueType val, y, local_total; + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + value_type val, y, local_total; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { val = 0; - lambda(i,val,false); + closure(i,val,false); // intra-blockDim.y exclusive scan on 'val' // accum = accumulated, sum in total for this iteration @@ -458,7 +473,7 @@ void parallel_scan } // pass accum to all threads - local_total = shfl_warp_broadcast(val, + local_total = shfl_warp_broadcast(val, threadIdx.x+Impl::CudaTraits::WarpSize-blockDim.x, Impl::CudaTraits::WarpSize); @@ -467,7 +482,7 @@ void parallel_scan if ( threadIdx.y == 0 ) { val = 0 ; } val += accum; - lambda(i,val,true); + closure(i,val,true); accum += local_total; } } @@ -478,18 +493,26 @@ void parallel_scan // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Closure > KOKKOS_INLINE_FUNCTION void parallel_scan (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) + const Closure & closure ) { - ValueType accum = 0 ; - ValueType val, y, local_total; + // Extract value_type from closure + + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + value_type val, y, local_total; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { val = 0; - lambda(i,val,false); + closure(i,val,false); // intra-blockDim.x exclusive scan on 'val' // accum = accumulated, sum in total for this iteration @@ -501,14 +524,14 @@ void parallel_scan } // pass accum to all threads - local_total = shfl_warp_broadcast(val, blockDim.x-1, blockDim.x); + local_total = shfl_warp_broadcast(val, blockDim.x-1, blockDim.x); // make EXCLUSIVE scan by shifting values over one val = Kokkos::shfl_up(val, 1, blockDim.x); if ( threadIdx.x == 0 ) { val = 0 ; } val += accum; - lambda(i,val,true); + closure(i,val,true); accum += local_total; } } diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index 4e1ce855c5..a450ca36ae 100644 --- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -44,36 +44,47 @@ #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP -#include -#include #include -#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) -#define KOKKOS_IMPL_MDRANGE_IVDEP +#include +#include +#include + +#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) +#include #endif namespace Kokkos { namespace Experimental { +// ------------------------------------------------------------------ // + enum class Iterate { Default, // Default for the device Left, // Left indices stride fastest Right, // Right indices stride fastest - Flat, // Do not tile, only valid for inner direction }; template struct default_outer_direction { using type = Iterate; + #if defined( KOKKOS_ENABLE_CUDA) + static constexpr Iterate value = Iterate::Left; + #else static constexpr Iterate value = Iterate::Right; + #endif }; template struct default_inner_direction { using type = Iterate; + #if defined( KOKKOS_ENABLE_CUDA) + static constexpr Iterate value = Iterate::Left; + #else static constexpr Iterate value = Iterate::Right; + #endif }; @@ -86,7 +97,7 @@ struct Rank { static_assert( N != 0u, "Kokkos Error: rank 0 undefined"); static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range"); - static_assert( N < 4u, "Kokkos Error: Unsupported rank..."); + static_assert( N < 7u, "Kokkos Error: Unsupported rank..."); using iteration_pattern = Rank; @@ -96,498 +107,236 @@ struct Rank }; - // multi-dimensional iteration pattern template struct MDRangePolicy + : public Kokkos::Impl::PolicyTraits { + using traits = Kokkos::Impl::PolicyTraits; using range_policy = RangePolicy; - static_assert( !std::is_same::value + using impl_range_policy = RangePolicy< typename traits::execution_space + , typename traits::schedule_type + , typename traits::index_type + > ; + + static_assert( !std::is_same::value , "Kokkos Error: MD iteration pattern not defined" ); - using iteration_pattern = typename range_policy::iteration_pattern; - using work_tag = typename range_policy::work_tag; + using iteration_pattern = typename traits::iteration_pattern; + using work_tag = typename traits::work_tag; static constexpr int rank = iteration_pattern::rank; static constexpr int outer_direction = static_cast ( - (iteration_pattern::outer_direction != Iterate::Default && iteration_pattern::outer_direction != Iterate::Flat) + (iteration_pattern::outer_direction != Iterate::Default) ? iteration_pattern::outer_direction - : default_outer_direction< typename range_policy::execution_space>::value ); + : default_outer_direction< typename traits::execution_space>::value ); static constexpr int inner_direction = static_cast ( iteration_pattern::inner_direction != Iterate::Default ? iteration_pattern::inner_direction - : default_inner_direction< typename range_policy::execution_space>::value ) ; + : default_inner_direction< typename traits::execution_space>::value ) ; // Ugly ugly workaround intel 14 not handling scoped enum correctly - static constexpr int Flat = static_cast( Iterate::Flat ); static constexpr int Right = static_cast( Iterate::Right ); + static constexpr int Left = static_cast( Iterate::Left ); + using index_type = typename traits::index_type; + using array_index_type = long; + using point_type = Kokkos::Array; //was index_type + using tile_type = Kokkos::Array; + // If point_type or tile_type is not templated on a signed integral type (if it is unsigned), + // then if user passes in intializer_list of runtime-determined values of + // signed integral type that are not const will receive a compiler error due + // to an invalid case for implicit conversion - + // "conversion from integer or unscoped enumeration type to integer type that cannot represent all values of the original, except where source is a constant expression whose value can be stored exactly in the target type" + // This would require the user to either pass a matching index_type parameter + // as template parameter to the MDRangePolicy or static_cast the individual values - using size_type = typename range_policy::index_type; - using index_type = typename std::make_signed::type; - - - template - MDRangePolicy( std::initializer_list upper_corner ) + MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) + : m_lower(lower) + , m_upper(upper) + , m_tile(tile) + , m_num_tiles(1) { - static_assert( std::is_integral::value, "Kokkos Error: corner defined with non-integral type" ); - - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - - //static_assert( upper_corner.size() == rank, "Kokkos Error: upper_corner has incorrect rank" ); - - const auto u = upper_corner.begin(); - - m_num_tiles = 1; - for (int i=0; i(0); - m_dim[i] = static_cast(u[i]); - if (inner_direction != Flat) { - // default tile size to 4 - m_tile[i] = 4; - } else { - m_tile[i] = 1; + // Host + if ( true + #if defined(KOKKOS_ENABLE_CUDA) + && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value + #endif + ) + { + index_type span; + for (int i=0; i 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = span; + } + } + m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; } - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; } - } - - template - MDRangePolicy( std::initializer_list corner_a - , std::initializer_list corner_b - ) - { - static_assert( std::is_integral::value, "Kokkos Error: corner A defined with non-integral type" ); - static_assert( std::is_integral::value, "Kokkos Error: corner B defined with non-integral type" ); - - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); - //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); - - - using A = typename std::make_signed::type; - using B = typename std::make_signed::type; - - const auto a = [=](int i) { return static_cast(corner_a.begin()[i]); }; - const auto b = [=](int i) { return static_cast(corner_b.begin()[i]); }; - - m_num_tiles = 1; - for (int i=0; i(a(i) <= b(i) ? a(i) : b(i)); - m_dim[i] = static_cast(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); - if (inner_direction != Flat) { - // default tile size to 4 - m_tile[i] = 4; - } else { - m_tile[i] = 1; + #if defined(KOKKOS_ENABLE_CUDA) + else // Cuda + { + index_type span; + for (int i=0; i 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = 16; + } + } + m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } + index_type total_tile_size_check = 1; + for (int i=0; i= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit + printf(" Tile dimensions exceed Cuda limits\n"); + Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); } - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; } + #endif } - template - MDRangePolicy( std::initializer_list corner_a - , std::initializer_list corner_b - , std::initializer_list tile - ) + + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) { - static_assert( std::is_integral::value, "Kokkos Error: corner A defined with non-integral type" ); - static_assert( std::is_integral::value, "Kokkos Error: corner B defined with non-integral type" ); - static_assert( std::is_integral::value, "Kokkos Error: tile defined with non-integral type" ); - static_assert( inner_direction != Flat, "Kokkos Error: tiling not support with flat iteration" ); +#if 0 + // This should work, less duplicated code but not yet extensively tested + point_type lower_tmp, upper_tmp; + tile_type tile_tmp; + for ( auto i = 0; i < rank; ++i ) { + lower_tmp[i] = static_cast(lower.begin()[i]); + upper_tmp[i] = static_cast(upper.begin()[i]); + tile_tmp[i] = static_cast(tile.begin()[i]); + } - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); - //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); - //static_assert( tile.size() == rank, "Kokkos Error: tile has incorrect rank" ); + MDRangePolicy( lower_tmp, upper_tmp, tile_tmp ); - using A = typename std::make_signed::type; - using B = typename std::make_signed::type; +#else + if(m_lower.size()!=rank || m_upper.size() != rank) + Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size"); - const auto a = [=](int i) { return static_cast(corner_a.begin()[i]); }; - const auto b = [=](int i) { return static_cast(corner_b.begin()[i]); }; - const auto t = tile.begin(); + for ( auto i = 0; i < rank; ++i ) { + m_lower[i] = static_cast(lower.begin()[i]); + m_upper[i] = static_cast(upper.begin()[i]); + if(tile.size()==rank) + m_tile[i] = static_cast(tile.begin()[i]); + else + m_tile[i] = 0; + } m_num_tiles = 1; - for (int i=0; i(a(i) <= b(i) ? a(i) : b(i)); - m_dim[i] = static_cast(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); - m_tile[i] = static_cast(t[i] > (T)0 ? t[i] : (T)1 ); - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; + + + // Host + if ( true + #if defined(KOKKOS_ENABLE_CUDA) + && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value + #endif + ) + { + index_type span; + for (int i=0; i 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = span; + } + } + m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } } + #if defined(KOKKOS_ENABLE_CUDA) + else // Cuda + { + index_type span; + for (int i=0; i 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = 16; + } + } + m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } + index_type total_tile_size_check = 1; + for (int i=0; i= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit + printf(" Tile dimensions exceed Cuda limits\n"); + Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + } + } + #endif +#endif } - index_type m_offset[rank]; - index_type m_dim[rank]; - int m_tile[rank]; - index_type m_tile_dim[rank]; - size_type m_num_tiles; // product of tile dims + + point_type m_lower; + point_type m_upper; + tile_type m_tile; + point_type m_tile_end; + index_type m_num_tiles; }; +// ------------------------------------------------------------------ // -namespace Impl { - -// Serial, Threads, OpenMP -// use enable_if to overload for Cuda -template < typename MDRange, typename Functor, typename Enable = void > -struct MDForFunctor -{ - using work_tag = typename MDRange::work_tag; - using index_type = typename MDRange::index_type; - using size_type = typename MDRange::size_type; - - MDRange m_range; - Functor m_func; - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange const& range, Functor const& f ) - : m_range(range) - , m_func( f ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange const& range, Functor && f ) - : m_range(range) - , m_func( std::forward(f) ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange && range, Functor const& f ) - : m_range( std::forward(range) ) - , m_func( f ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange && range, Functor && f ) - : m_range( std::forward(range) ) - , m_func( std::forward(f) ) - {} - - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDForFunctor const& ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor& operator=( MDForFunctor const& ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDForFunctor && ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor& operator=( MDForFunctor && ) = default; - - // Rank-2, Flat, No Tag - template - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && std::is_same::value - && MDRange::rank == 2 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - m_func( m_range.m_offset[0] + ( t / m_range.m_dim[1] ) - , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); - } else { - m_func( m_range.m_offset[0] + ( t % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); - } - } - - // Rank-2, Flat, Tag - template - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && !std::is_same::value - && MDRange::rank == 2 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - m_func( work_tag{}, m_range.m_offset[0] + ( t / m_range.m_dim[1] ) - , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); - } else { - m_func( work_tag{}, m_range.m_offset[0] + ( t % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); - } - } - - // Rank-2, Not Flat, No Tag - template - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && std::is_same::value - && MDRange::rank == 2 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - index_type t0, t1; - if ( MDRange::outer_direction == MDRange::Right ) { - t0 = t / m_range.m_tile_dim[1]; - t1 = t % m_range.m_tile_dim[1]; - } else { - t0 = t % m_range.m_tile_dim[0]; - t1 = t / m_range.m_tile_dim[0]; - } - - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0 - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && !std::is_same::value - && MDRange::rank == 2 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - work_tag tag; - - index_type t0, t1; - if ( MDRange::outer_direction == MDRange::Right ) { - t0 = t / m_range.m_tile_dim[1]; - t1 = t % m_range.m_tile_dim[1]; - } else { - t0 = t % m_range.m_tile_dim[0]; - t1 = t / m_range.m_tile_dim[0]; - } - - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0 - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && std::is_same::value - && MDRange::rank == 3 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; - m_func( m_range.m_offset[0] + ( t / tmp_prod ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) - , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) - ); - } else { - const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; - m_func( m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) - , m_range.m_offset[2] + ( t / tmp_prod ) - ); - } - } - - // Rank-3, Flat, Tag - template - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && !std::is_same::value - && MDRange::rank == 3 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; - m_func( work_tag{} - , m_range.m_offset[0] + ( t / tmp_prod ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) - , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) - ); - } else { - const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; - m_func( work_tag{} - , m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) - , m_range.m_offset[2] + ( t / tmp_prod ) - ); - } - } - - // Rank-3, Not Flat, No Tag - template - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && std::is_same::value - && MDRange::rank == 3 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - index_type t0, t1, t2; - if ( MDRange::outer_direction == MDRange::Right ) { - const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); - t0 = t / tmp_prod; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; - t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; - } else { - const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); - t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; - t2 = t / tmp_prod; - } - - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0 - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral::value - && !std::is_same::value - && MDRange::rank == 3 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - work_tag tag; - - index_type t0, t1, t2; - if ( MDRange::outer_direction == MDRange::Right ) { - const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); - t0 = t / tmp_prod; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; - t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; - } else { - const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); - t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; - t2 = t / tmp_prod; - } - - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0 +// ------------------------------------------------------------------ // +//md_parallel_for +// ------------------------------------------------------------------ // +template void md_parallel_for( MDRange const& range , Functor const& f , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 ) { - Impl::MDForFunctor g(range, f); + Impl::MDFunctor g(range, f); - using range_policy = typename MDRange::range_policy; + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); } @@ -596,15 +345,132 @@ template void md_parallel_for( const std::string& str , MDRange const& range , Functor const& f + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 ) { - Impl::MDForFunctor g(range, f); + Impl::MDFunctor g(range, f); - using range_policy = typename MDRange::range_policy; + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); } +// Cuda specialization +#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) +template +void md_parallel_for( const std::string& str + , MDRange const& range + , Functor const& f + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile closure(range, f); + closure.execute(); +} + +template +void md_parallel_for( MDRange const& range + , Functor const& f + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile closure(range, f); + closure.execute(); +} +#endif +// ------------------------------------------------------------------ // + +// ------------------------------------------------------------------ // +//md_parallel_reduce +// ------------------------------------------------------------------ // +template +void md_parallel_reduce( MDRange const& range + , Functor const& f + , ValueType & v + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::MDFunctor g(range, f, v); + + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; + Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v ); +} + +template +void md_parallel_reduce( const std::string& str + , MDRange const& range + , Functor const& f + , ValueType & v + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::MDFunctor g(range, f, v); + + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; + + Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v ); +} + +// Cuda - parallel_reduce not implemented yet +/* +template +void md_parallel_reduce( MDRange const& range + , Functor const& f + , ValueType & v + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile closure(range, f, v); + closure.execute(); +} + +template +void md_parallel_reduce( const std::string& str + , MDRange const& range + , Functor const& f + , ValueType & v + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile closure(range, f, v); + closure.execute(); +} +*/ + }} // namespace Kokkos::Experimental #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp index 8deb5142c4..abb263b7cc 100644 --- a/lib/kokkos/core/src/Kokkos_Array.hpp +++ b/lib/kokkos/core/src/Kokkos_Array.hpp @@ -59,8 +59,14 @@ template< class T = void , class Proxy = void > struct Array { -private: - T m_elem[N]; +public: + /** + * The elements of this C array shall not be accessed directly. The data + * member has to be declared public to enable aggregate initialization as for + * std::array. We mark it as private in the documentation. + * @private + */ + T m_internal_implementation_private_member_data[N]; public: typedef T & reference ; @@ -78,25 +84,32 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral::value , "Must be integral argument" ); - return m_elem[i]; + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); + return m_internal_implementation_private_member_data[i]; } template< typename iType > KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral::value , "Must be integral argument" ); - return m_elem[i]; + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); + return m_internal_implementation_private_member_data[i]; } - KOKKOS_INLINE_FUNCTION pointer data() { return & m_elem[0] ; } - KOKKOS_INLINE_FUNCTION const_pointer data() const { return & m_elem[0] ; } + KOKKOS_INLINE_FUNCTION pointer data() + { + return & m_internal_implementation_private_member_data[0]; + } + KOKKOS_INLINE_FUNCTION const_pointer data() const + { + return & m_internal_implementation_private_member_data[0]; + } - ~Array() = default ; - Array() = default ; - Array( const Array & ) = default ; - Array & operator = ( const Array & ) = default ; + // Do not default unless move and move-assignment are also defined + // ~Array() = default ; + // Array() = default ; + // Array( const Array & ) = default ; + // Array & operator = ( const Array & ) = default ; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. @@ -124,7 +137,7 @@ public: KOKKOS_INLINE_FUNCTION value_type operator[]( const iType & ) { - static_assert( std::is_integral::value , "Must be integer argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integer argument" ); return value_type(); } @@ -132,7 +145,7 @@ public: KOKKOS_INLINE_FUNCTION value_type operator[]( const iType & ) const { - static_assert( std::is_integral::value , "Must be integer argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integer argument" ); return value_type(); } @@ -181,7 +194,7 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral::value , "Must be integral argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); return m_elem[i]; } @@ -189,7 +202,7 @@ public: KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral::value , "Must be integral argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); return m_elem[i]; } @@ -250,7 +263,7 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral::value , "Must be integral argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); return m_elem[i*m_stride]; } @@ -258,7 +271,7 @@ public: KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral::value , "Must be integral argument" ); + static_assert( ( std::is_integral::value || std::is_enum::value ) , "Must be integral argument" ); return m_elem[i*m_stride]; } diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index 3f9bdea40d..cfcdabf95e 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -102,6 +102,7 @@ KOKKOS_IMPL_IS_CONCEPT( memory_traits ) KOKKOS_IMPL_IS_CONCEPT( execution_space ) KOKKOS_IMPL_IS_CONCEPT( execution_policy ) KOKKOS_IMPL_IS_CONCEPT( array_layout ) +KOKKOS_IMPL_IS_CONCEPT( reducer ) namespace Impl { diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 6d92f4bf61..16c1bce902 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -57,6 +57,10 @@ #include #endif +#if defined( KOKKOS_ENABLE_QTHREADS ) +#include +#endif + #if defined( KOKKOS_ENABLE_PTHREAD ) #include #endif @@ -76,6 +80,7 @@ #include +#include //---------------------------------------------------------------------------- @@ -105,6 +110,9 @@ void finalize_all(); void fence(); +/** \brief Print "Bill of Materials" */ +void print_configuration( std::ostream & , const bool detail = false ); + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -159,4 +167,3 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size ) //---------------------------------------------------------------------------- #endif - diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index e7e6a49d37..4029bf599c 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -63,7 +63,7 @@ namespace Kokkos { struct AUTO_t { KOKKOS_INLINE_FUNCTION - constexpr const AUTO_t & operator()() const { return *this ; } + constexpr const AUTO_t & operator()() const { return *this; } }; namespace { @@ -73,46 +73,49 @@ constexpr AUTO_t AUTO = Kokkos::AUTO_t(); struct InvalidType {}; -} +} // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Forward declarations for class inter-relationships namespace Kokkos { -class HostSpace ; ///< Memory space for main process and CPU execution spaces +class HostSpace; ///< Memory space for main process and CPU execution spaces #ifdef KOKKOS_ENABLE_HBWSPACE namespace Experimental { -class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor) +class HBWSpace; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor) } #endif #if defined( KOKKOS_ENABLE_SERIAL ) -class Serial ; ///< Execution space main process on CPU -#endif // defined( KOKKOS_ENABLE_SERIAL ) +class Serial; ///< Execution space main process on CPU. +#endif + +#if defined( KOKKOS_ENABLE_QTHREADS ) +class Qthreads; ///< Execution space with Qthreads back-end. +#endif #if defined( KOKKOS_ENABLE_PTHREAD ) -class Threads ; ///< Execution space with pthreads back-end +class Threads; ///< Execution space with pthreads back-end. #endif #if defined( KOKKOS_ENABLE_OPENMP ) -class OpenMP ; ///< OpenMP execution space +class OpenMP; ///< OpenMP execution space. #endif #if defined( KOKKOS_ENABLE_CUDA ) -class CudaSpace ; ///< Memory space on Cuda GPU -class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM -class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU -class Cuda ; ///< Execution space for Cuda GPU +class CudaSpace; ///< Memory space on Cuda GPU +class CudaUVMSpace; ///< Memory space on Cuda GPU with UVM +class CudaHostPinnedSpace; ///< Memory space on Host accessible to Cuda GPU +class Cuda; ///< Execution space for Cuda GPU #endif template struct Device; + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Set the default execution space. @@ -122,60 +125,66 @@ struct Device; namespace Kokkos { -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) - typedef Cuda DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef OpenMP DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Threads DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) - typedef Serial DefaultExecutionSpace ; +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) + typedef Cuda DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultExecutionSpace; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Qthreads DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultExecutionSpace; #else -# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." #endif -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef OpenMP DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Threads DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) - typedef Serial DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_OPENMP ) - typedef OpenMP DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_PTHREAD ) - typedef Threads DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_SERIAL ) - typedef Serial DefaultHostExecutionSpace ; +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultHostExecutionSpace; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_PTHREAD ) + typedef Threads DefaultHostExecutionSpace; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_SERIAL ) + typedef Serial DefaultHostExecutionSpace; #else -# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." #endif } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Detect the active execution space and define its memory space. // This is used to verify whether a running kernel can access // a given memory space. namespace Kokkos { + namespace Impl { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA) -typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ; +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined( KOKKOS_ENABLE_CUDA ) +typedef Kokkos::CudaSpace ActiveExecutionMemorySpace; #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -typedef Kokkos::HostSpace ActiveExecutionMemorySpace ; +typedef Kokkos::HostSpace ActiveExecutionMemorySpace; #else -typedef void ActiveExecutionMemorySpace ; +typedef void ActiveExecutionMemorySpace; #endif -template< class ActiveSpace , class MemorySpace > +template< class ActiveSpace, class MemorySpace > struct VerifyExecutionCanAccessMemorySpace { enum {value = 0}; }; template< class Space > -struct VerifyExecutionCanAccessMemorySpace< Space , Space > +struct VerifyExecutionCanAccessMemorySpace< Space, Space > { enum {value = 1}; KOKKOS_INLINE_FUNCTION static void verify(void) {} @@ -183,33 +192,33 @@ struct VerifyExecutionCanAccessMemorySpace< Space , Space > }; } // namespace Impl + } // namespace Kokkos -#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \ +#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE, DATA_PTR ) \ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR ) + Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE >::verify( DATA_PTR ) #define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify() + Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE >::verify() //---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- namespace Kokkos { void fence(); } -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template< class Functor , class Policy , class EnableFunctor = void - , class EnablePolicy = void + , class EnablePolicy = void > struct FunctorPolicyExecutionSpace; @@ -220,18 +229,18 @@ struct FunctorPolicyExecutionSpace; /// /// This is an implementation detail of parallel_for. Users should /// skip this and go directly to the nonmember function parallel_for. -template< class FunctorType , class ExecPolicy , class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelFor ; +template< class FunctorType, class ExecPolicy, class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelFor; /// \class ParallelReduce /// \brief Implementation detail of parallel_reduce. /// /// This is an implementation detail of parallel_reduce. Users should /// skip this and go directly to the nonmember function parallel_reduce. -template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType, class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelReduce ; +template< class FunctorType, class ExecPolicy, class ReducerType = InvalidType, class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelReduce; /// \class ParallelScan /// \brief Implementation detail of parallel_scan. @@ -239,10 +248,12 @@ template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType /// This is an implementation detail of parallel_scan. Users should /// skip this and go directly to the documentation of the nonmember /// template function Kokkos::parallel_scan. -template< class FunctorType , class ExecPolicy , class ExecutionSapce = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelScan ; +template< class FunctorType, class ExecPolicy, class ExecutionSapce = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelScan; + +} // namespace Impl + +} // namespace Kokkos -}} #endif /* #ifndef KOKKOS_CORE_FWD_HPP */ - diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index afccdb6c52..433cac5e51 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -62,7 +62,6 @@ #include #include -#include /*--------------------------------------------------------------------------*/ @@ -295,6 +294,7 @@ struct VerifyExecutionCanAccessMemorySpace #include #include +#include //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index d6bf8dcdf4..fc39ce0e5b 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,14 +44,16 @@ #ifndef KOKKOS_HBWSPACE_HPP #define KOKKOS_HBWSPACE_HPP - #include /*--------------------------------------------------------------------------*/ + #ifdef KOKKOS_ENABLE_HBWSPACE namespace Kokkos { + namespace Experimental { + namespace Impl { /// \brief Initialize lock array for arbitrary size atomics. @@ -67,7 +69,7 @@ void init_lock_array_hbw_space(); /// This function tries to aquire the lock for the hash value derived /// from the provided ptr. If the lock is successfully aquired the /// function returns true. Otherwise it returns false. -bool lock_address_hbw_space(void* ptr); +bool lock_address_hbw_space( void* ptr ); /// \brief Release lock for the address /// @@ -75,13 +77,16 @@ bool lock_address_hbw_space(void* ptr); /// from the provided ptr. This function should only be called /// after previously successfully aquiring a lock with /// lock_address. -void unlock_address_hbw_space(void* ptr); +void unlock_address_hbw_space( void* ptr ); } // namespace Impl -} // neamspace Experimental + +} // namespace Experimental + } // namespace Kokkos namespace Kokkos { + namespace Experimental { /// \class HBWSpace @@ -91,10 +96,9 @@ namespace Experimental { /// memory means the usual CPU-accessible memory. class HBWSpace { public: - //! Tag this class as a kokkos memory space - typedef HBWSpace memory_space ; - typedef size_t size_type ; + typedef HBWSpace memory_space; + typedef size_t size_type; /// \typedef execution_space /// \brief Default execution space for this memory space. @@ -103,21 +107,25 @@ public: /// useful for things like initializing a View (which happens in /// parallel using the View's default execution space). #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_PTHREAD ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) - typedef Kokkos::Serial execution_space ; + typedef Kokkos::Serial execution_space; #else -# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qhreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type - typedef Kokkos::Device device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; /*--------------------------------*/ /* Functions unique to the HBWSpace */ @@ -129,72 +137,73 @@ public: /**\brief Default memory space instance */ HBWSpace(); - HBWSpace( const HBWSpace & rhs ) = default ; - HBWSpace & operator = ( const HBWSpace & ) = default ; - ~HBWSpace() = default ; + HBWSpace( const HBWSpace & rhs ) = default; + HBWSpace & operator = ( const HBWSpace & ) = default; + ~HBWSpace() = default; /**\brief Non-default memory space instance to choose allocation mechansim, if available */ - enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + enum AllocationMechanism { STD_MALLOC, POSIX_MEMALIGN, POSIX_MMAP, INTEL_MM_ALLOC }; explicit HBWSpace( const AllocationMechanism & ); /**\brief Allocate untracked memory in the space */ - void * allocate( const size_t arg_alloc_size ) const ; + void * allocate( const size_t arg_alloc_size ) const; /**\brief Deallocate untracked memory in the space */ - void deallocate( void * const arg_alloc_ptr - , const size_t arg_alloc_size ) const ; + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ static constexpr const char* name(); private: - AllocationMechanism m_alloc_mech ; + AllocationMechanism m_alloc_mech; static constexpr const char* m_name = "HBW"; - friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void >; }; } // namespace Experimental + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template<> -class SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > - : public SharedAllocationRecord< void , void > +class SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void > + : public SharedAllocationRecord< void, void > { private: - friend Kokkos::Experimental::HBWSpace ; + friend Kokkos::Experimental::HBWSpace; - typedef SharedAllocationRecord< void , void > RecordBase ; + typedef SharedAllocationRecord< void, void > RecordBase; - SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; - SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete; static void deallocate( RecordBase * ); /**\brief Root record for tracked allocations from this HBWSpace instance */ - static RecordBase s_root_record ; + static RecordBase s_root_record; - const Kokkos::Experimental::HBWSpace m_space ; + const Kokkos::Experimental::HBWSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() = default ; + SharedAllocationRecord() = default; - SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size - , const RecordBase::function_type arg_dealloc = & deallocate + SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate ); public: @@ -206,23 +215,23 @@ public: } KOKKOS_INLINE_FUNCTION static - SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size + SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ) { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); + return new SharedAllocationRecord( arg_space, arg_label, arg_alloc_size ); #else - return (SharedAllocationRecord *) 0 ; + return (SharedAllocationRecord *) 0; #endif } /**\brief Allocate tracked memory in the space */ static void * allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size ); + , const std::string & arg_label + , const size_t arg_alloc_size ); /**\brief Reallocate tracked memory in the space */ static @@ -233,88 +242,93 @@ public: static void deallocate_tracked( void * const arg_alloc_ptr ); - static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); - static void print_records( std::ostream & , const Kokkos::Experimental::HBWSpace & , bool detail = false ); + static void print_records( std::ostream &, const Kokkos::Experimental::HBWSpace &, bool detail = false ); }; } // namespace Impl + } // namespace Kokkos - -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::Experimental::HBWSpace >::assignable , "" ); +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace, Kokkos::Experimental::HBWSpace >::assignable, "" ); template<> -struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > { +struct MemorySpaceAccess< Kokkos::HostSpace, Kokkos::Experimental::HBWSpace > { enum { assignable = true }; enum { accessible = true }; enum { deepcopy = true }; }; template<> -struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace> { +struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace, Kokkos::HostSpace > { enum { assignable = false }; enum { accessible = true }; enum { deepcopy = true }; }; -}} +} // namespace Impl + +} // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { - -template -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< Experimental::HBWSpace, Experimental::HBWSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; -template -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< HostSpace, Experimental::HBWSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; -template -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< Experimental::HBWSpace, HostSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; } // namespace Impl + } // namespace Kokkos namespace Kokkos { + namespace Impl { template<> -struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace, Kokkos::Experimental::HBWSpace > { enum { value = true }; inline static void verify( void ) { } @@ -322,7 +336,7 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experime }; template<> -struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace > +struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace, Kokkos::HostSpace > { enum { value = true }; inline static void verify( void ) { } @@ -330,8 +344,9 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kok }; } // namespace Impl + } // namespace Kokkos #endif -#endif /* #define KOKKOS_HBWSPACE_HPP */ +#endif // #define KOKKOS_HBWSPACE_HPP diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index e79de462bf..82006665ce 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -60,6 +60,7 @@ /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { /// \brief Initialize lock array for arbitrary size atomics. @@ -83,9 +84,10 @@ bool lock_address_host_space(void* ptr); /// from the provided ptr. This function should only be called /// after previously successfully aquiring a lock with /// lock_address. -void unlock_address_host_space(void* ptr); +void unlock_address_host_space( void* ptr ); } // namespace Impl + } // namespace Kokkos namespace Kokkos { @@ -97,10 +99,9 @@ namespace Kokkos { /// memory means the usual CPU-accessible memory. class HostSpace { public: - //! Tag this class as a kokkos memory space - typedef HostSpace memory_space ; - typedef size_t size_type ; + typedef HostSpace memory_space; + typedef size_t size_type; /// \typedef execution_space /// \brief Default execution space for this memory space. @@ -109,21 +110,25 @@ public: /// useful for things like initializing a View (which happens in /// parallel using the View's default execution space). #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_PTHREAD ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) - typedef Kokkos::Serial execution_space ; + typedef Kokkos::Serial execution_space; #else -# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type - typedef Kokkos::Device device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; /*--------------------------------*/ /* Functions unique to the HostSpace */ @@ -135,61 +140,57 @@ public: /**\brief Default memory space instance */ HostSpace(); - HostSpace( HostSpace && rhs ) = default ; - HostSpace( const HostSpace & rhs ) = default ; - HostSpace & operator = ( HostSpace && ) = default ; - HostSpace & operator = ( const HostSpace & ) = default ; - ~HostSpace() = default ; + HostSpace( HostSpace && rhs ) = default; + HostSpace( const HostSpace & rhs ) = default; + HostSpace & operator = ( HostSpace && ) = default; + HostSpace & operator = ( const HostSpace & ) = default; + ~HostSpace() = default; /**\brief Non-default memory space instance to choose allocation mechansim, if available */ - enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + enum AllocationMechanism { STD_MALLOC, POSIX_MEMALIGN, POSIX_MMAP, INTEL_MM_ALLOC }; explicit HostSpace( const AllocationMechanism & ); /**\brief Allocate untracked memory in the space */ - void * allocate( const size_t arg_alloc_size ) const ; + void * allocate( const size_t arg_alloc_size ) const; /**\brief Deallocate untracked memory in the space */ - void deallocate( void * const arg_alloc_ptr - , const size_t arg_alloc_size ) const ; + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ static constexpr const char* name(); private: - - AllocationMechanism m_alloc_mech ; + AllocationMechanism m_alloc_mech; static constexpr const char* m_name = "Host"; - friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace, void >; }; } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); - +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::HostSpace >::assignable, "" ); template< typename S > struct HostMirror { private: - // If input execution space can access HostSpace then keep it. // Example: Kokkos::OpenMP can access, Kokkos::Cuda cannot enum { keep_exe = Kokkos::Impl::MemorySpaceAccess - < typename S::execution_space::memory_space , Kokkos::HostSpace > - ::accessible }; + < typename S::execution_space::memory_space, Kokkos::HostSpace >::accessible }; // If HostSpace can access memory space then keep it. // Example: Cannot access Kokkos::CudaSpace, can access Kokkos::CudaUVMSpace enum { keep_mem = Kokkos::Impl::MemorySpaceAccess - < Kokkos::HostSpace , typename S::memory_space >::accessible }; + < Kokkos::HostSpace, typename S::memory_space >::accessible }; public: @@ -202,42 +203,41 @@ public: , typename S::memory_space > , Kokkos::HostSpace >::type - >::type Space ; + >::type Space; }; } // namespace Impl + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template<> -class SharedAllocationRecord< Kokkos::HostSpace , void > - : public SharedAllocationRecord< void , void > +class SharedAllocationRecord< Kokkos::HostSpace, void > + : public SharedAllocationRecord< void, void > { private: + friend Kokkos::HostSpace; - friend Kokkos::HostSpace ; + typedef SharedAllocationRecord< void, void > RecordBase; - typedef SharedAllocationRecord< void , void > RecordBase ; - - SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; - SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete; static void deallocate( RecordBase * ); /**\brief Root record for tracked allocations from this HostSpace instance */ - static RecordBase s_root_record ; + static RecordBase s_root_record; - const Kokkos::HostSpace m_space ; + const Kokkos::HostSpace m_space; protected: - ~SharedAllocationRecord(); - SharedAllocationRecord() = default ; + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::HostSpace & arg_space , const std::string & arg_label @@ -249,22 +249,23 @@ public: inline std::string get_label() const - { - return std::string( RecordBase::head()->m_label ); - } + { + return std::string( RecordBase::head()->m_label ); + } KOKKOS_INLINE_FUNCTION static SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space , const std::string & arg_label , const size_t arg_alloc_size ) - { + { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); + return new SharedAllocationRecord( arg_space, arg_label, arg_alloc_size ); #else - return (SharedAllocationRecord *) 0 ; + return (SharedAllocationRecord *) 0; #endif - } + } + /**\brief Allocate tracked memory in the space */ static @@ -281,37 +282,37 @@ public: static void deallocate_tracked( void * const arg_alloc_ptr ); - static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); - static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false ); + static void print_records( std::ostream &, const Kokkos::HostSpace &, bool detail = false ); }; } // namespace Impl + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space> struct DeepCopy ; +template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space > struct DeepCopy; -template -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< HostSpace, HostSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; } // namespace Impl + } // namespace Kokkos - -#endif /* #define KOKKOS_HOSTSPACE_HPP */ - +#endif // #define KOKKOS_HOSTSPACE_HPP diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 52845b9e09..c138b08c94 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -45,22 +45,20 @@ #define KOKKOS_MACROS_HPP //---------------------------------------------------------------------------- -/** Pick up configure/build options via #define macros: +/** Pick up configure / build options via #define macros: * * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces * KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space - * KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space - * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space - * KOKKOS_ENABLE_HWLOC HWLOC library is available - * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive! - * - * KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions - * - * KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space + * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space + * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space + * KOKKOS_ENABLE_HWLOC HWLOC library is available. + * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK Insert array bounds checks, is expensive! + * KOKKOS_ENABLE_MPI Negotiate MPI/execution space interactions. + * KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space. */ #ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H -#include + #include #endif #include @@ -86,7 +84,7 @@ * KOKKOS_ENABLE_INTEL_ATOMICS * KOKKOS_ENABLE_OPENMP_ATOMICS * - * A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use. + * A suite of 'KOKKOS_ENABLE_PRAGMA_...' are defined for internal use. * * Macros for marking functions to run in an execution space: * @@ -98,64 +96,63 @@ //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) + // Compiling with a CUDA compiler. + // + // Include to pick up the CUDA_VERSION macro defined as: + // CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) + // + // When generating device code the __CUDA_ARCH__ macro is defined as: + // __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) -/* Compiling with a CUDA compiler. - * - * Include to pick up the CUDA_VERSION macro defined as: - * CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) - * - * When generating device code the __CUDA_ARCH__ macro is defined as: - * __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) - */ + #include + #include -#include -#include - -#if ! defined( CUDA_VERSION ) -#error "#include did not define CUDA_VERSION" -#endif - -#if ( CUDA_VERSION < 7000 ) -// CUDA supports C++11 in device code starting with -// version 7.0. This includes auto type and device code internal -// lambdas. -#error "Cuda version 7.0 or greater required" -#endif - -#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) -/* Compiling with CUDA compiler for device code. */ -#error "Cuda device capability >= 3.0 is required" -#endif - -#ifdef KOKKOS_ENABLE_CUDA_LAMBDA -#if ( CUDA_VERSION < 7050 ) - // CUDA supports C++11 lambdas generated in host code to be given - // to the device starting with version 7.5. But the release candidate (7.5.6) - // still identifies as 7.0 - #error "Cuda version 7.5 or greater required for host-to-device Lambda support" -#endif -#if ( CUDA_VERSION < 8000 ) && defined(__NVCC__) - #define KOKKOS_LAMBDA [=]__device__ -#else - #define KOKKOS_LAMBDA [=]__host__ __device__ - #if defined( KOKKOS_ENABLE_CXX1Z ) - #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ + #if !defined( CUDA_VERSION ) + #error "#include did not define CUDA_VERSION." #endif -#endif -#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 -#endif -#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */ + #if ( CUDA_VERSION < 7000 ) + // CUDA supports C++11 in device code starting with version 7.0. + // This includes auto type and device code internal lambdas. + #error "Cuda version 7.0 or greater required." + #endif -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) + // Compiling with CUDA compiler for device code. + #error "Cuda device capability >= 3.0 is required." + #endif + + #ifdef KOKKOS_ENABLE_CUDA_LAMBDA + #if ( CUDA_VERSION < 7050 ) + // CUDA supports C++11 lambdas generated in host code to be given + // to the device starting with version 7.5. But the release candidate (7.5.6) + // still identifies as 7.0. + #error "Cuda version 7.5 or greater required for host-to-device Lambda support." + #endif + + #if ( CUDA_VERSION < 8000 ) && defined( __NVCC__ ) + #define KOKKOS_LAMBDA [=]__device__ + #else + #define KOKKOS_LAMBDA [=]__host__ __device__ + + #if defined( KOKKOS_ENABLE_CXX1Z ) + #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ + #endif + #endif + + #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 + #endif +#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) // Cuda version 8.0 still needs the functor wrapper - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__) + #if /* ( CUDA_VERSION < 8000 ) && */ defined( __NVCC__ ) #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER #endif #endif -/*--------------------------------------------------------------------------*/ -/* Language info: C++, CUDA, OPENMP */ +//---------------------------------------------------------------------------- +// Language info: C++, CUDA, OPENMP #if defined( KOKKOS_ENABLE_CUDA ) // Compiling Cuda code to 'ptx' @@ -163,20 +160,17 @@ #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline #define KOKKOS_FUNCTION __device__ __host__ -#endif /* #if defined( __CUDA_ARCH__ ) */ +#endif // #if defined( __CUDA_ARCH__ ) #if defined( _OPENMP ) + // Compiling with OpenMP. + // The value of _OPENMP is an integer value YYYYMM + // where YYYY and MM are the year and month designation + // of the supported OpenMP API version. +#endif // #if defined( _OPENMP ) - /* Compiling with OpenMP. - * The value of _OPENMP is an integer value YYYYMM - * where YYYY and MM are the year and month designation - * of the supported OpenMP API version. - */ - -#endif /* #if defined( _OPENMP ) */ - -/*--------------------------------------------------------------------------*/ -/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */ +//---------------------------------------------------------------------------- +// Mapping compiler built-ins to KOKKOS_COMPILER_*** macros #if defined( __NVCC__ ) // NVIDIA compiler is being used. @@ -184,29 +178,28 @@ // Host code is compiled again with another compiler. // Device code is compile to 'ptx'. #define KOKKOS_COMPILER_NVCC __NVCC__ - #else -#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - #if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either - // CUDA (including version 6.5) does not support giving lambdas as - // arguments to global functions. Thus its not currently possible - // to dispatch lambdas from the host. - #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 + #if !defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + #if !defined( KOKKOS_ENABLE_CUDA ) // Compiling with clang for Cuda does not work with LAMBDAs either + // CUDA (including version 6.5) does not support giving lambdas as + // arguments to global functions. Thus its not currently possible + // to dispatch lambdas from the host. + #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 #endif #endif -#endif /* #if defined( __NVCC__ ) */ +#endif // #if defined( __NVCC__ ) -#if !defined (KOKKOS_LAMBDA) +#if !defined( KOKKOS_LAMBDA ) #define KOKKOS_LAMBDA [=] #endif -#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA) +#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined( KOKKOS_CLASS_LAMBDA ) #define KOKKOS_CLASS_LAMBDA [=,*this] #endif -//#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */ +//#if !defined( __CUDA_ARCH__ ) // Not compiling Cuda code to 'ptx'. -/* Intel compiler for host code */ +// Intel compiler for host code. #if defined( __INTEL_COMPILER ) #define KOKKOS_COMPILER_INTEL __INTEL_COMPILER @@ -218,7 +211,7 @@ #define KOKKOS_COMPILER_INTEL __ECC #endif -/* CRAY compiler for host code */ +// CRAY compiler for host code #if defined( _CRAYC ) #define KOKKOS_COMPILER_CRAYC _CRAYC #endif @@ -234,50 +227,53 @@ #define KOKKOS_COMPILER_APPLECC __APPLE_CC__ #endif -#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL) +#if defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL ) #define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__ #endif -#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ ) +#if !defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ ) #define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__ + #if ( 472 > KOKKOS_COMPILER_GNU ) #error "Compiling with GCC version earlier than 4.7.2 is not supported." #endif #endif -#if defined( __PGIC__ ) && ! defined( __GNUC__ ) +#if defined( __PGIC__ ) && !defined( __GNUC__ ) #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ + #if ( 1540 > KOKKOS_COMPILER_PGI ) #error "Compiling with PGI version earlier than 15.4 is not supported." #endif #endif -//#endif /* #if ! defined( __CUDA_ARCH__ ) */ +//#endif // #if !defined( __CUDA_ARCH__ ) -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ -/* Intel compiler macros */ +//---------------------------------------------------------------------------- +// Intel compiler macros #if defined( KOKKOS_COMPILER_INTEL ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 - #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 #define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 #define KOKKOS_ENABLE_PRAGMA_VECTOR 1 #define KOKKOS_ENABLE_PRAGMA_SIMD 1 + #if ( __INTEL_COMPILER > 1400 ) + #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 + #endif + #define KOKKOS_RESTRICT __restrict__ #ifndef KOKKOS_ALIGN - #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) + #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) #endif #ifndef KOKKOS_ALIGN_PTR - #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size))) + #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size))) #endif #ifndef KOKKOS_ALIGN_SIZE - #define KOKKOS_ALIGN_SIZE 64 + #define KOKKOS_ALIGN_SIZE 64 #endif #if ( 1400 > KOKKOS_COMPILER_INTEL ) @@ -287,12 +283,13 @@ #warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0." #endif #endif - #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 ) + + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( _WIN32 ) #define KOKKOS_ENABLE_ASM 1 #endif - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) - #if !defined (_WIN32) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( _WIN32 ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #else #define KOKKOS_FORCEINLINE_FUNCTION inline @@ -302,192 +299,170 @@ #if defined( __MIC__ ) // Compiling for Xeon Phi #endif - #endif -/*--------------------------------------------------------------------------*/ -/* Cray compiler macros */ +//---------------------------------------------------------------------------- +// Cray compiler macros #if defined( KOKKOS_COMPILER_CRAYC ) - - #endif -/*--------------------------------------------------------------------------*/ -/* IBM Compiler macros */ +//---------------------------------------------------------------------------- +// IBM Compiler macros #if defined( KOKKOS_COMPILER_IBM ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #endif -/*--------------------------------------------------------------------------*/ -/* CLANG compiler macros */ +//---------------------------------------------------------------------------- +// CLANG compiler macros #if defined( KOKKOS_COMPILER_CLANG ) - //#define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif - #endif -/*--------------------------------------------------------------------------*/ -/* GNU Compiler macros */ +//---------------------------------------------------------------------------- +// GNU Compiler macros #if defined( KOKKOS_COMPILER_GNU ) - //#define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif - #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( __PGIC__ ) && \ - ( defined( __amd64 ) || \ - defined( __amd64__ ) || \ - defined( __x86_64 ) || \ - defined( __x86_64__ ) ) + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \ + ( defined( __amd64 ) || defined( __amd64__ ) || \ + defined( __x86_64 ) || defined( __x86_64__ ) ) #define KOKKOS_ENABLE_ASM 1 #endif - #endif -/*--------------------------------------------------------------------------*/ +//---------------------------------------------------------------------------- #if defined( KOKKOS_COMPILER_PGI ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 #define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #endif -/*--------------------------------------------------------------------------*/ +//---------------------------------------------------------------------------- #if defined( KOKKOS_COMPILER_NVCC ) - - #if defined(__CUDA_ARCH__ ) + #if defined( __CUDA_ARCH__ ) #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 #endif - #endif //---------------------------------------------------------------------------- -/** Define function marking macros if compiler specific macros are undefined: */ +// Define function marking macros if compiler specific macros are undefined: -#if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) -#define KOKKOS_FORCEINLINE_FUNCTION inline +#if !defined( KOKKOS_FORCEINLINE_FUNCTION ) + #define KOKKOS_FORCEINLINE_FUNCTION inline #endif -#if ! defined( KOKKOS_INLINE_FUNCTION ) -#define KOKKOS_INLINE_FUNCTION inline +#if !defined( KOKKOS_INLINE_FUNCTION ) + #define KOKKOS_INLINE_FUNCTION inline #endif -#if ! defined( KOKKOS_FUNCTION ) -#define KOKKOS_FUNCTION /**/ -#endif - - -//---------------------------------------------------------------------------- -///** Define empty macro for restrict if necessary: */ - -#if ! defined(KOKKOS_RESTRICT) -#define KOKKOS_RESTRICT +#if !defined( KOKKOS_FUNCTION ) + #define KOKKOS_FUNCTION /**/ #endif //---------------------------------------------------------------------------- -/** Define Macro for alignment: */ -#if ! defined KOKKOS_ALIGN_SIZE -#define KOKKOS_ALIGN_SIZE 16 -#endif +// Define empty macro for restrict if necessary: -#if ! defined(KOKKOS_ALIGN) -#define KOKKOS_ALIGN(size) __attribute__((aligned(size))) -#endif - -#if ! defined(KOKKOS_ALIGN_PTR) -#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size))) +#if !defined( KOKKOS_RESTRICT ) + #define KOKKOS_RESTRICT #endif //---------------------------------------------------------------------------- -/** Determine the default execution space for parallel dispatch. - * There is zero or one default execution space specified. - */ - -#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) - -#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ; +// Define Macro for alignment: +#if !defined KOKKOS_ALIGN_SIZE + #define KOKKOS_ALIGN_SIZE 16 #endif -/** If default is not specified then chose from enabled execution spaces. - * Priority: CUDA, OPENMP, THREADS, SERIAL - */ -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) -#elif defined ( KOKKOS_ENABLE_CUDA ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA -#elif defined ( KOKKOS_ENABLE_OPENMP ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP -#elif defined ( KOKKOS_ENABLE_PTHREAD ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS +#if !defined( KOKKOS_ALIGN ) + #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) +#endif + +#if !defined( KOKKOS_ALIGN_PTR ) + #define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size))) +#endif + +//---------------------------------------------------------------------------- +// Determine the default execution space for parallel dispatch. +// There is zero or one default execution space specified. + +#if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) + #error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified." +#endif + +// If default is not specified then chose from enabled execution spaces. +// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) +#elif defined( KOKKOS_ENABLE_CUDA ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA +#elif defined( KOKKOS_ENABLE_OPENMP ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP +#elif defined( KOKKOS_ENABLE_PTHREAD ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS #else -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL #endif //---------------------------------------------------------------------------- -/** Determine for what space the code is being compiled: */ +// Determine for what space the code is being compiled: -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA) -#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA +#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA ) + #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA #else -#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST #endif -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \ ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 ) -#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN) -#define KOKKOS_ENABLE_POSIX_MEMALIGN 1 -#endif + #if defined( KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN ) + #define KOKKOS_ENABLE_POSIX_MEMALIGN 1 + #endif #endif //---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -/**Enable Profiling by default**/ +// Enable Profiling by default #ifndef KOKKOS_ENABLE_PROFILING -#define KOKKOS_ENABLE_PROFILING 1 + #define KOKKOS_ENABLE_PROFILING 1 #endif -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_MACROS_HPP */ - +#endif // #ifndef KOKKOS_MACROS_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index 2d45926e76..eadad10b49 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -1294,6 +1294,7 @@ public: KOKKOS_INLINE_FUNCTION size_t get_min_block_size() const { return MIN_BLOCK_SIZE; } + KOKKOS_INLINE_FUNCTION size_t get_mem_size() const { return m_data_size; } private: diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index a337d1a9d4..c0c43b92f4 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -66,7 +66,6 @@ #include #include -#include /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -196,6 +195,7 @@ struct VerifyExecutionCanAccessMemorySpace #include #include +#include /*--------------------------------------------------------------------------*/ #endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */ diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 83436826f4..067767f2f8 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -78,16 +78,14 @@ struct pair /// This calls the default constructors of T1 and T2. It won't /// compile if those default constructors are not defined and /// public. - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first(), second() - {} + KOKKOS_FORCEINLINE_FUNCTION constexpr + pair() = default ; /// \brief Constructor that takes both elements of the pair. /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type const& f, second_type const& s) : first(f), second(s) {} @@ -97,7 +95,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair &p) : first(p.first), second(p.second) {} @@ -107,7 +105,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const volatile pair &p) : first(p.first), second(p.second) {} @@ -183,7 +181,7 @@ struct pair /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type f, second_type s) : first(f), second(s) {} @@ -193,7 +191,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair &p) : first(p.first), second(p.second) {} @@ -247,7 +245,7 @@ struct pair /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type const& f, second_type s) : first(f), second(s) {} @@ -257,7 +255,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair &p) : first(p.first), second(p.second) {} @@ -311,7 +309,7 @@ struct pair /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type f, second_type const& s) : first(f), second(s) {} @@ -321,7 +319,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair &p) : first(p.first), second(p.second) {} @@ -366,31 +364,31 @@ bool operator== (const pair& lhs, const pair& rhs) //! Inequality operator for Kokkos::pair. template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!= (const pair& lhs, const pair& rhs) { return !(lhs==rhs); } //! Less-than operator for Kokkos::pair. template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator< (const pair& lhs, const pair& rhs) { return lhs.first -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<= (const pair& lhs, const pair& rhs) { return !(rhs -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator> (const pair& lhs, const pair& rhs) { return rhs -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair& lhs, const pair& rhs) { return !(lhs= (const pair& lhs, const pair& rhs) /// This is a "nonmember constructor" for Kokkos::pair. It works just /// like std::make_pair. template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr pair make_pair (T1 x, T2 y) { return ( pair(x,y) ); } @@ -460,23 +458,21 @@ struct pair first_type first; enum { second = 0 }; - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first() - {} + KOKKOS_FORCEINLINE_FUNCTION constexpr + pair() = default ; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const first_type & f) : first(f) {} - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const first_type & f, int) : first(f) {} template - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair &p) : first(p.first) {} @@ -495,32 +491,32 @@ struct pair // template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator== (const pair& lhs, const pair& rhs) { return lhs.first==rhs.first; } template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!= (const pair& lhs, const pair& rhs) { return !(lhs==rhs); } template -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator< (const pair& lhs, const pair& rhs) { return lhs.first -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<= (const pair& lhs, const pair& rhs) { return !(rhs -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator> (const pair& lhs, const pair& rhs) { return rhs -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair& lhs, const pair& rhs) { return !(lhs= (const pair& lhs, const pair& rhs) #endif //KOKKOS_PAIR_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index 64b1502bcc..e412e608b2 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -52,13 +52,14 @@ #include #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #include #endif #include #include +#include #include #ifdef KOKKOS_DEBUG @@ -175,7 +176,7 @@ void parallel_for( const ExecPolicy & policy , typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0 ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); @@ -185,10 +186,10 @@ void parallel_for( const ExecPolicy & policy Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); - + closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -207,20 +208,20 @@ void parallel_for( const size_t work_count execution_space ; typedef RangePolicy< execution_space > policy ; -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif - + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -417,7 +418,7 @@ void parallel_scan( const ExecutionPolicy & policy , typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0 ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); @@ -430,7 +431,7 @@ void parallel_scan( const ExecutionPolicy & policy closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelScan(kpID); } @@ -450,20 +451,20 @@ void parallel_scan( const size_t work_count typedef Kokkos::RangePolicy< execution_space > policy ; -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif - + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelScan(kpID); } diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index a3649b4422..900dce19fe 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -1094,7 +1094,7 @@ namespace Impl { const PolicyType& policy, const FunctorType& functor, ReturnType& return_value) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelReduce("" == label ? typeid(FunctorType).name() : label, 0, &kpID); @@ -1116,7 +1116,7 @@ namespace Impl { Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelReduce(kpID); } diff --git a/lib/kokkos/core/src/Kokkos_Qthread.hpp b/lib/kokkos/core/src/Kokkos_Qthreads.hpp similarity index 72% rename from lib/kokkos/core/src/Kokkos_Qthread.hpp rename to lib/kokkos/core/src/Kokkos_Qthreads.hpp index c58518b065..0507552c3f 100644 --- a/lib/kokkos/core/src/Kokkos_Qthread.hpp +++ b/lib/kokkos/core/src/Kokkos_Qthreads.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,57 +36,75 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#ifndef KOKKOS_QTHREAD_HPP -#define KOKKOS_QTHREAD_HPP +#ifndef KOKKOS_QTHREADS_HPP +#define KOKKOS_QTHREADS_HPP + +#include + +#ifdef KOKKOS_ENABLE_QTHREADS + +// Defines to enable experimental Qthreads functionality. +#define QTHREAD_LOCAL_PRIORITY +#define CLONED_TASKS + +#include #include #include -#include -#include -#include + #include -#include +#include +#include +//#include +//#include +//#include // Uncomment when Tasking working. +#include #include +#include /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { -class QthreadExec ; + +class QthreadsExec; + } // namespace Impl + } // namespace Kokkos /*--------------------------------------------------------------------------*/ namespace Kokkos { -/** \brief Execution space supported by Qthread */ -class Qthread { +/** \brief Execution space supported by Qthreads */ +class Qthreads { public: //! \name Type declarations that all Kokkos devices must provide. //@{ //! Tag this class as an execution space - typedef Qthread execution_space ; - typedef Kokkos::HostSpace memory_space ; + typedef Qthreads execution_space; + typedef Kokkos::HostSpace memory_space; //! This execution space preferred device_type - typedef Kokkos::Device device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; - typedef Kokkos::LayoutRight array_layout ; - typedef memory_space::size_type size_type ; + typedef Kokkos::LayoutRight array_layout; + typedef memory_space::size_type size_type; - typedef ScratchMemorySpace< Qthread > scratch_memory_space ; + typedef ScratchMemorySpace< Qthreads > scratch_memory_space; //@} /*------------------------------------------------------------------------*/ /** \brief Initialization will construct one or more instances */ - static Qthread & instance( int = 0 ); + static Qthreads & instance( int = 0 ); /** \brief Set the execution space to a "sleep" state. * @@ -100,14 +118,14 @@ public: bool sleep(); /** \brief Wake from the sleep state. - * + * * \return True if enters or is in the "ready" state. * False if functions are currently executing. */ static bool wake(); /** \brief Wait until all dispatched functions to complete. - * + * * The parallel_for or parallel_reduce dispatch of a functor may * return asynchronously, before the functor completes. This * method does not return until all dispatched functors on this @@ -128,26 +146,24 @@ public: static void finalize(); /** \brief Print configuration information to the given output stream. */ - static void print_configuration( std::ostream & , const bool detail = false ); + static void print_configuration( std::ostream &, const bool detail = false ); - int shepherd_size() const ; - int shepherd_worker_size() const ; + int shepherd_size() const; + int shepherd_worker_size() const; }; -/*--------------------------------------------------------------------------*/ - } // namespace Kokkos -/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { template<> -struct MemorySpaceAccess - < Kokkos::Qthread::memory_space - , Kokkos::Qthread::scratch_memory_space +struct MemorySpaceAccess + < Kokkos::Qthreads::memory_space + , Kokkos::Qthreads::scratch_memory_space > { enum { assignable = false }; @@ -157,27 +173,26 @@ struct MemorySpaceAccess template<> struct VerifyExecutionCanAccessMemorySpace - < Kokkos::Qthread::memory_space - , Kokkos::Qthread::scratch_memory_space + < Kokkos::Qthreads::memory_space + , Kokkos::Qthreads::scratch_memory_space > { enum { value = true }; - inline static void verify( void ) { } - inline static void verify( const void * ) { } + inline static void verify( void ) {} + inline static void verify( const void * ) {} }; } // namespace Impl + } // namespace Kokkos -/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -#include -#include -#include +#include +#include +//#include // Uncomment when Tasking working. +//#include // Uncomment when Tasking working. -#endif /* #define KOKKOS_QTHREAD_HPP */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- +#endif // #define KOKKOS_ENABLE_QTHREADS +#endif // #define KOKKOS_QTHREADS_HPP diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index f262535910..72710e8167 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -56,6 +56,8 @@ #include #include #include +#include +#include #include #include @@ -138,30 +140,15 @@ public: static void initialize( unsigned threads_count = 1 , unsigned use_numa_count = 0 , unsigned use_cores_per_numa = 0 , - bool allow_asynchronous_threadpool = false) { - (void) threads_count; - (void) use_numa_count; - (void) use_cores_per_numa; - (void) allow_asynchronous_threadpool; + bool allow_asynchronous_threadpool = false); - // Init the array of locks used for arbitrarily sized atomics - Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) - Kokkos::Profiling::initialize(); - #endif - } - - static int is_initialized() { return 1 ; } + static int is_initialized(); /** \brief Return the maximum amount of concurrency. */ static int concurrency() {return 1;}; //! Free any resources being consumed by the device. - static void finalize() { - #if (KOKKOS_ENABLE_PROFILING) - Kokkos::Profiling::finalize(); - #endif - } + static void finalize(); //! Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool /* detail */ = false ) {} @@ -177,10 +164,6 @@ public: inline static unsigned max_hardware_threads() { return thread_pool_size(0); } //-------------------------------------------------------------------------- - - static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size ); - - //-------------------------------------------------------------------------- }; } // namespace Kokkos @@ -192,7 +175,7 @@ namespace Kokkos { namespace Impl { template<> -struct MemorySpaceAccess +struct MemorySpaceAccess < Kokkos::Serial::memory_space , Kokkos::Serial::scratch_memory_space > @@ -213,22 +196,6 @@ struct VerifyExecutionCanAccessMemorySpace inline static void verify( const void * ) { } }; -namespace SerialImpl { - -struct Sentinel { - - void * m_scratch ; - unsigned m_reduce_end ; - unsigned m_shared_end ; - - Sentinel(); - ~Sentinel(); - static Sentinel & singleton(); -}; - -inline -unsigned align( unsigned n ); -} } // namespace Impl } // namespace Kokkos @@ -238,89 +205,26 @@ unsigned align( unsigned n ); namespace Kokkos { namespace Impl { -class SerialTeamMember { -private: - typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ; - const scratch_memory_space m_space ; - const int m_league_rank ; - const int m_league_size ; +// Resize thread team data scratch memory +void serial_resize_thread_team_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ); - SerialTeamMember & operator = ( const SerialTeamMember & ); +HostThreadTeamData * serial_get_thread_team_data(); -public: +} /* namespace Impl */ +} /* namespace Kokkos */ - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_shmem() const { return m_space ; } - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_scratch(int) const - { return m_space ; } - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & thread_scratch(int) const - { return m_space ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - - template - KOKKOS_INLINE_FUNCTION - void team_broadcast(const ValueType& , const int& ) const {} - - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION - ValueType team_reduce( const ValueType & value , const JoinOp & ) const - { - return value ; - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const - { - const Type tmp = global_accum ? *global_accum : Type(0) ; - if ( global_accum ) { *global_accum += value ; } - return tmp ; - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const - { return Type(0); } - - //---------------------------------------- - // Execution space specific: - - SerialTeamMember( int arg_league_rank - , int arg_league_size - , int arg_shared_size - ); -}; - -} // namespace Impl +namespace Kokkos { +namespace Impl { /* * < Kokkos::Serial , WorkArgTag > * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type > * */ -namespace Impl { template< class ... Properties > class TeamPolicyInternal< Kokkos::Serial , Properties ... >:public PolicyTraits { @@ -441,14 +345,11 @@ public: return p; }; - typedef Impl::SerialTeamMember member_type ; + typedef Impl::HostThreadTeamMember< Kokkos::Serial > member_type ; }; } /* namespace Impl */ } /* namespace Kokkos */ -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* Parallel patterns for Kokkos::Serial with RangePolicy */ @@ -521,11 +422,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -535,34 +437,25 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( i , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( t , i , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } public: @@ -570,10 +463,29 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize - ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor , m_reducer) ); + const size_t team_reduce_size = 0 ; // Never shrinks + const size_t team_shared_size = 0 ; // Never shrinks + const size_t thread_local_size = 0 ; // Never shrinks - this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + pointer_type ptr = + m_result_ptr ? m_result_ptr : pointer_type(data.pool_reduce_local()); + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + this-> template exec< WorkTag >( update ); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class HostViewType > @@ -587,7 +499,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) - , m_result_ptr( arg_result_view.ptr_on_device() ) + , m_result_ptr( arg_result_view.data() ) { static_assert( Kokkos::is_view< HostViewType >::value , "Kokkos::Serial reduce result must be a View" ); @@ -623,11 +535,13 @@ private: typedef Kokkos::RangePolicy< Traits ... > Policy ; typedef typename Policy::work_tag WorkTag ; - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ; + + typedef FunctorAnalysis< FunctorPatternInterface::SCAN , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -635,10 +549,8 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { - reference_type update = ValueInit::init( m_functor , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( i , update , true ); @@ -648,11 +560,9 @@ private: template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( m_functor , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( t , i , update , true ); @@ -664,9 +574,22 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) - Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 ); - this-> template exec< WorkTag >( ptr ); + const size_t pool_reduce_size = Analysis::value_size( m_functor ); + const size_t team_reduce_size = 0 ; // Never shrinks + const size_t team_shared_size = 0 ; // Never shrinks + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + reference_type update = + ValueInit::init( m_functor , pointer_type(data.pool_reduce_local()) ); + + this-> template exec< WorkTag >( update ); } inline @@ -696,6 +619,8 @@ class ParallelFor< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef TeamPolicyInternal< Kokkos::Serial , Properties ...> Policy ; typedef typename Policy::member_type Member ; @@ -706,21 +631,21 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec() const + exec( HostThreadTeamData & data ) const { for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( Member(ileague,m_league,m_shared) ); + m_functor( Member(data,ileague,m_league) ); } } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec() const + exec( HostThreadTeamData & data ) const { const TagType t{} ; for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( t , Member(ileague,m_league,m_shared) ); + m_functor( t , Member(data,ileague,m_league) ); } } @@ -729,15 +654,28 @@ public: inline void execute() const { - Kokkos::Serial::scratch_memory_resize( 0 , m_shared ); - this-> template exec< typename Policy::work_tag >(); + const size_t pool_reduce_size = 0 ; // Never shrinks + const size_t team_reduce_size = TEAM_REDUCE_SIZE ; + const size_t team_shared_size = m_shared ; + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + this->template exec< typename Policy::work_tag >( data ); } ParallelFor( const FunctorType & arg_functor , const Policy & arg_policy ) : m_functor( arg_functor ) , m_league( arg_policy.league_size() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) { } }; @@ -752,18 +690,22 @@ class ParallelReduce< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef TeamPolicyInternal< Kokkos::Serial, Properties ... > Policy ; + + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const int m_league ; @@ -774,33 +716,23 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( HostThreadTeamData & data , reference_type update ) const { - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( Member(ileague,m_league,m_shared) , update ); + m_functor( Member(data,ileague,m_league) , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( HostThreadTeamData & data , reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( t , Member(ileague,m_league,m_shared) , update ); + m_functor( t , Member(data,ileague,m_league) , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } public: @@ -808,10 +740,31 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize - ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , m_shared ); + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); - this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + const size_t team_reduce_size = TEAM_REDUCE_SIZE ; + const size_t team_shared_size = m_shared ; + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + pointer_type ptr = + m_result_ptr ? m_result_ptr : pointer_type(data.pool_reduce_local()); + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + this-> template exec< WorkTag >( data , update ); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class ViewType > @@ -825,8 +778,10 @@ public: : m_functor( arg_functor ) , m_league( arg_policy.league_size() ) , m_reducer( InvalidType() ) - , m_result_ptr( arg_result.ptr_on_device() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) ) + , m_result_ptr( arg_result.data() ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) ) { static_assert( Kokkos::is_view< ViewType >::value , "Reduction result on Kokkos::Serial must be a Kokkos::View" ); @@ -838,13 +793,15 @@ public: inline ParallelReduce( const FunctorType & arg_functor - , Policy arg_policy - , const ReducerType& reducer ) - : m_functor( arg_functor ) - , m_league( arg_policy.league_size() ) - , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_league( arg_policy.league_size() ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -858,261 +815,6 @@ public: /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -/* Nested parallel patterns for Kokkos::Serial with TeamPolicy */ - -namespace Kokkos { -namespace Impl { - -template -struct TeamThreadRangeBoundariesStruct { - typedef iType index_type; - const iType begin ; - const iType end ; - enum {increment = 1}; - const SerialTeamMember& thread; - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count) - : begin(0) - , end(arg_count) - , thread(arg_thread) - {} - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end ) - : begin( arg_begin ) - , end( arg_end) - , thread( arg_thread ) - {} -}; - - template - struct ThreadVectorRangeBoundariesStruct { - typedef iType index_type; - enum {start = 0}; - const iType end; - enum {increment = 1}; - - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count): - end( count ) - {} - }; - -} // namespace Impl - -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct -TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, count ); -} - -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::SerialTeamMember > -TeamThreadRange( const Impl::SerialTeamMember& thread, const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, iType(begin), iType(end) ); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct(thread,count); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct PerTeam(const Impl::SerialTeamMember& thread) { - return Impl::ThreadSingleStruct(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct PerThread(const Impl::SerialTeamMember& thread) { - return Impl::VectorSingleStruct(thread); -} - -} // namespace Kokkos - -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, ValueType& result) { - - result = ValueType(); - - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } - - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd()); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter(join)); -} - -} //namespace Kokkos - -namespace Kokkos { -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda& lambda) { - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - init_result = result; -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) - * for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. - * Depending on the target execution space the operator might be called twice: once with final=false - * and once with final=true. When final==true val contains the prefix sum value. The contribution of this - * "i" needs to be added to val no matter whether final==true or not. In a serial execution - * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set - * to the final sum value over all vector lanes. - * This functionality requires C++11 support.*/ -template< typename iType, class FunctorType > -KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const FunctorType & lambda) { - - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; - typedef typename ValueTraits::value_type value_type ; - - value_type scan_val = value_type(); - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,scan_val,true); - } -} - -} // namespace Kokkos - -namespace Kokkos { - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& , const FunctorType& lambda) { - lambda(); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& , const FunctorType& lambda) { - lambda(); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& , const FunctorType& lambda, ValueType& val) { - lambda(val); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& , const FunctorType& lambda, ValueType& val) { - lambda(val); -} -} - -//---------------------------------------------------------------------------- #include diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index e4271aa188..e25039d236 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -82,6 +82,15 @@ class Future ; template< typename Space > class TaskScheduler ; +template< typename Space > +void wait( TaskScheduler< Space > const & ); + +template< typename Space > +struct is_scheduler : public std::false_type {}; + +template< typename Space > +struct is_scheduler< TaskScheduler< Space > > : public std::true_type {}; + } // namespace Kokkos #include @@ -109,9 +118,6 @@ namespace Impl { template< typename Space , typename ResultType , typename FunctorType > class TaskBase ; -template< typename Space > -class TaskExec ; - } // namespace Impl } // namespace Kokkos @@ -312,6 +318,19 @@ public: } }; +// Is a Future with the given execution space +template< typename , typename ExecSpace = void > +struct is_future : public std::false_type {}; + +template< typename Arg1 , typename Arg2 , typename ExecSpace > +struct is_future< Future , ExecSpace > + : public std::integral_constant + < bool , + ( std::is_same< ExecSpace , void >::value || + std::is_same< ExecSpace + , typename Future::execution_space >::value ) + > {}; + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -319,18 +338,59 @@ public: namespace Kokkos { -enum TaskType { TaskTeam = Impl::TaskBase::TaskTeam - , TaskSingle = Impl::TaskBase::TaskSingle }; - -enum TaskPriority { TaskHighPriority = 0 - , TaskRegularPriority = 1 - , TaskLowPriority = 2 }; - -template< typename Space > -void wait( TaskScheduler< Space > const & ); +enum class TaskPriority : int { High = 0 + , Regular = 1 + , Low = 2 }; } // namespace Kokkos +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< int TaskEnum , typename DepFutureType > +struct TaskPolicyData +{ + using execution_space = typename DepFutureType::execution_space ; + using scheduler_type = TaskScheduler< execution_space > ; + + enum : int { m_task_type = TaskEnum }; + + scheduler_type const * m_scheduler ; + DepFutureType const m_dependence ; + int m_priority ; + + TaskPolicyData() = delete ; + TaskPolicyData( TaskPolicyData && ) = default ; + TaskPolicyData( TaskPolicyData const & ) = default ; + TaskPolicyData & operator = ( TaskPolicyData && ) = default ; + TaskPolicyData & operator = ( TaskPolicyData const & ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskPolicyData( DepFutureType && arg_future + , Kokkos::TaskPriority const & arg_priority ) + : m_scheduler( 0 ) + , m_dependence( arg_future ) + , m_priority( static_cast( arg_priority ) ) + {} + + KOKKOS_INLINE_FUNCTION + TaskPolicyData( scheduler_type const & arg_scheduler + , Kokkos::TaskPriority const & arg_priority ) + : m_scheduler( & arg_scheduler ) + , m_dependence() + , m_priority( static_cast( arg_priority ) ) + {} +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { @@ -348,52 +408,13 @@ private: queue_type * m_queue ; //---------------------------------------- - // Process optional arguments to spawn and respawn functions - - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const ) {} - - // TaskTeam or TaskSingle - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskType const & arg - , Options const & ... opts ) - { - task->m_task_type = arg ; - assign( task , opts ... ); - } - - // TaskHighPriority or TaskRegularPriority or TaskLowPriority - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskPriority const & arg - , Options const & ... opts ) - { - task->m_priority = arg ; - assign( task , opts ... ); - } - - // Future for a dependence - template< typename A1 , typename A2 , typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , Future< A1 , A2 > const & arg - , Options const & ... opts ) - { - task->add_dependence( arg.m_task ); - assign( task , opts ... ); - } - - //---------------------------------------- public: - using execution_policy = TaskScheduler ; using execution_space = ExecSpace ; using memory_space = typename queue_type::memory_space ; - using member_type = Kokkos::Impl::TaskExec< ExecSpace > ; + using member_type = + typename Kokkos::Impl::TaskQueueSpecialization< ExecSpace >::member_type ; KOKKOS_INLINE_FUNCTION TaskScheduler() : m_track(), m_queue(0) {} @@ -460,18 +481,13 @@ public: //---------------------------------------- - /**\brief A task spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - KOKKOS_FUNCTION - Future< typename FunctorType::value_type , ExecSpace > - task_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const + template< int TaskEnum , typename DepFutureType , typename FunctorType > + KOKKOS_FUNCTION static + Kokkos::Future< typename FunctorType::value_type , execution_space > + spawn( Impl::TaskPolicyData const & arg_policy + , typename task_base::function_type arg_function + , FunctorType && arg_functor + ) { using value_type = typename FunctorType::value_type ; using future_type = Future< value_type , execution_space > ; @@ -479,11 +495,21 @@ public: , value_type , FunctorType > ; + queue_type * const queue = + arg_policy.m_scheduler ? arg_policy.m_scheduler->m_queue : ( + arg_policy.m_dependence.m_task + ? arg_policy.m_dependence.m_task->m_queue + : (queue_type*) 0 ); + + if ( 0 == queue ) { + Kokkos::abort("Kokkos spawn given null Future" ); + } + //---------------------------------------- // Give single-thread back-ends an opportunity to clear // queue of ready tasks before allocating a new task - m_queue->iff_single_thread_recursive_execute(); + queue->iff_single_thread_recursive_execute(); //---------------------------------------- @@ -491,176 +517,129 @@ public: // Allocate task from memory pool f.m_task = - reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type))); + reinterpret_cast< task_type * >(queue->allocate(sizeof(task_type))); if ( f.m_task ) { // Placement new construction - new ( f.m_task ) task_type( arg_functor ); + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + new ( f.m_task ) + task_type( arg_function + , queue + , arg_policy.m_dependence.m_task /* dependence */ + , 2 /* reference count */ + , int(sizeof(task_type)) /* allocation size */ + , int(arg_policy.m_task_type) + , int(arg_policy.m_priority) + , std::move(arg_functor) ); - // Reference count starts at two - // +1 for matching decrement when task is complete - // +1 for future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); + // The dependence (if any) is processed immediately + // within the schedule function, as such the dependence's + // reference count does not need to be incremented for + // the assignment. - assign( f.m_task , arg_options... ); - - // Spawning from within the execution space so the - // apply function pointer is guaranteed to be valid - f.m_task->m_apply = task_type::apply ; - - m_queue->schedule( f.m_task ); - // this task may be updated or executed at any moment + queue->schedule_runnable( f.m_task ); + // This task may be updated or executed at any moment, + // even during the call to 'schedule'. } return f ; } - /**\brief The host process spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - inline - Future< typename FunctorType::value_type , ExecSpace > - host_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const + template< typename FunctorType , typename A1 , typename A2 > + KOKKOS_FUNCTION static + void + respawn( FunctorType * arg_self + , Future const & arg_dependence + , TaskPriority const & arg_priority + ) { + // Precondition: task is in Executing state + using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; using task_type = Impl::TaskBase< execution_space , value_type , FunctorType > ; - if ( m_queue == 0 ) { - Kokkos::abort("Kokkos::TaskScheduler not initialized"); - } + task_type * const task = static_cast< task_type * >( arg_self ); - future_type f ; + task->m_priority = static_cast(arg_priority); - // Allocate task from memory pool - f.m_task = - reinterpret_cast( m_queue->allocate(sizeof(task_type)) ); + task->add_dependence( arg_dependence.m_task ); - if ( f.m_task ) { - - // Placement new construction - new( f.m_task ) task_type( arg_functor ); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); - - assign( f.m_task , arg_options... ); - - // Potentially spawning outside execution space so the - // apply function pointer must be obtained from execution space. - // Required for Cuda execution space function pointer. - m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply ); - - m_queue->schedule( f.m_task ); - } - return f ; + // Postcondition: task is in Executing-Respawn state } + //---------------------------------------- /**\brief Return a future that is complete * when all input futures are complete. */ template< typename A1 , typename A2 > - KOKKOS_FUNCTION - Future< ExecSpace > - when_all( int narg , Future< A1 , A2 > const * const arg ) const + KOKKOS_FUNCTION static + Future< execution_space > + when_all( Future< A1 , A2 > const arg[] , int narg ) { - static_assert - ( std::is_same< execution_space - , typename Future< A1 , A2 >::execution_space - >::value - , "Future must have same execution space" ); - - using future_type = Future< ExecSpace > ; - using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + using future_type = Future< execution_space > ; + using task_base = Kokkos::Impl::TaskBase< execution_space , void , void > ; future_type f ; - size_t const size = sizeof(task_base) + narg * sizeof(task_base*); + if ( narg ) { - f.m_task = - reinterpret_cast< task_base * >( m_queue->allocate( size ) ); - - if ( f.m_task ) { - - new( f.m_task ) task_base(); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; - - task_base ** const dep = f.m_task->aggregate_dependences(); - - // Assign dependences to increment their reference count - // The futures may be destroyed upon returning from this call - // so increment reference count to track this assignment. + queue_type * queue = 0 ; for ( int i = 0 ; i < narg ; ++i ) { - task_base * const t = dep[i] = arg[i].m_task ; + task_base * const t = arg[i].m_task ; if ( 0 != t ) { + // Increment reference count to track subsequent assignment. Kokkos::atomic_increment( &(t->m_ref_count) ); + if ( queue == 0 ) { + queue = t->m_queue ; + } + else if ( queue != t->m_queue ) { + Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); + } } } - m_queue->schedule( f.m_task ); - // this when_all may be processed at any moment + if ( queue != 0 ) { + + size_t const size = sizeof(task_base) + narg * sizeof(task_base*); + + f.m_task = + reinterpret_cast< task_base * >( queue->allocate( size ) ); + + if ( f.m_task ) { + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + new( f.m_task ) task_base( queue + , 2 /* reference count */ + , size /* allocation size */ + , narg /* dependence count */ + ); + + // Assign dependences, reference counts were already incremented + + task_base ** const dep = f.m_task->aggregate_dependences(); + + for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } + + queue->schedule_aggregate( f.m_task ); + // this when_all may be processed at any moment + } + } } return f ; } - /**\brief An executing task respawns itself with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - */ - template< class FunctorType , typename ... Options > - KOKKOS_FUNCTION - void respawn( FunctorType * task_self - , Options const & ... arg_options ) const - { - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; - - task_type * const task = static_cast< task_type * >( task_self ); - - // Reschedule task with no dependences. - m_queue->reschedule( task ); - - // Dependences, if requested, are added here through parsing the arguments. - assign( task , arg_options... ); - } - //---------------------------------------- - template< typename S > - friend - void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); - - //---------------------------------------- - - inline + KOKKOS_INLINE_FUNCTION int allocation_capacity() const noexcept { return m_queue->m_memory.get_mem_size(); } @@ -676,12 +655,192 @@ public: long allocated_task_count_accum() const noexcept { return m_queue->m_accum_alloc ; } + //---------------------------------------- + + template< typename S > + friend + void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + }; +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +//---------------------------------------------------------------------------- +// Construct a TaskTeam execution policy + +template< typename T > +Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase::TaskTeam + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + > +KOKKOS_INLINE_FUNCTION +TaskTeam( T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future::value || + Kokkos::is_scheduler::value + , "Kokkos TaskTeam argument must be Future or TaskScheduler" ); + + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase::TaskTeam + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + >( arg , arg_priority ); +} + +// Construct a TaskSingle execution policy + +template< typename T > +Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase::TaskSingle + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + > +KOKKOS_INLINE_FUNCTION +TaskSingle( T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future::value || + Kokkos::is_scheduler::value + , "Kokkos TaskSingle argument must be Future or TaskScheduler" ); + + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase::TaskSingle + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + >( arg , arg_priority ); +} + +//---------------------------------------------------------------------------- + +/**\brief A host control thread spawns a task with options + * + * 1) Team or Serial + * 2) With scheduler or dependence + * 3) High, Normal, or Low priority + */ +template< int TaskEnum + , typename DepFutureType + , typename FunctorType > +Future< typename FunctorType::value_type + , typename DepFutureType::execution_space > +host_spawn( Impl::TaskPolicyData const & arg_policy + , FunctorType && arg_functor + ) +{ + using exec_space = typename DepFutureType::execution_space ; + using scheduler = TaskScheduler< exec_space > ; + + typedef Impl::TaskBase< exec_space + , typename FunctorType::value_type + , FunctorType + > task_type ; + + static_assert( TaskEnum == task_type::TaskTeam || + TaskEnum == task_type::TaskSingle + , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + + // May be spawning a Cuda task, must use the specialization + // to query on-device function pointer. + typename task_type::function_type const ptr = + Kokkos::Impl::TaskQueueSpecialization< exec_space >:: + template get_function_pointer< task_type >(); + + return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); +} + +/**\brief A task spawns a task with options + * + * 1) Team or Serial + * 2) With scheduler or dependence + * 3) High, Normal, or Low priority + */ +template< int TaskEnum + , typename DepFutureType + , typename FunctorType > +Future< typename FunctorType::value_type + , typename DepFutureType::execution_space > +KOKKOS_INLINE_FUNCTION +task_spawn( Impl::TaskPolicyData const & arg_policy + , FunctorType && arg_functor + ) +{ + using exec_space = typename DepFutureType::execution_space ; + using scheduler = TaskScheduler< exec_space > ; + + typedef Impl::TaskBase< exec_space + , typename FunctorType::value_type + , FunctorType + > task_type ; + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ + defined( KOKKOS_ENABLE_CUDA ) + + static_assert( ! std::is_same< Kokkos::Cuda , exec_space >::value + , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); + +#endif + + static_assert( TaskEnum == task_type::TaskTeam || + TaskEnum == task_type::TaskSingle + , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + + typename task_type::function_type const ptr = task_type::apply ; + + return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); +} + +/**\brief A task respawns itself with options + * + * 1) With scheduler or dependence + * 2) High, Normal, or Low priority + */ +template< typename FunctorType , typename T > +void +KOKKOS_INLINE_FUNCTION +respawn( FunctorType * arg_self + , T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future::value || + Kokkos::is_scheduler::value + , "Kokkos respawn argument must be Future or TaskScheduler" ); + + TaskScheduler< typename T::execution_space >:: + respawn( arg_self , arg , arg_priority ); +} + +//---------------------------------------------------------------------------- + +template< typename A1 , typename A2 > +KOKKOS_INLINE_FUNCTION +Future< typename Future< A1 , A2 >::execution_space > +when_all( Future< A1 , A2 > const arg[] + , int narg + ) +{ + return TaskScheduler< typename Future::execution_space >:: + when_all( arg , narg ); +} + +//---------------------------------------------------------------------------- +// Wait for all runnable tasks to complete + template< typename ExecSpace > inline -void wait( TaskScheduler< ExecSpace > const & policy ) -{ policy.m_queue->execute(); } +void wait( TaskScheduler< ExecSpace > const & scheduler ) +{ scheduler.m_queue->execute(); } } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index aca482b427..8aa968d053 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -230,4 +230,3 @@ struct VerifyExecutionCanAccessMemorySpace #endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */ #endif /* #define KOKKOS_THREADS_HPP */ - diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index 316f61fd4d..0668f89c86 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -31,23 +31,23 @@ KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) CONDITIONAL_COPIES = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - CONDITIONAL_COPIES += copy-cuda + KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + CONDITIONAL_COPIES += copy-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) - CONDITIONAL_COPIES += copy-threads + KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) + CONDITIONAL_COPIES += copy-threads endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_HEADERS_QTHREAD += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) - CONDITIONAL_COPIES += copy-qthread +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) + CONDITIONAL_COPIES += copy-qthreads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - CONDITIONAL_COPIES += copy-openmp + KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + CONDITIONAL_COPIES += copy-openmp endif ifeq ($(KOKKOS_OS),CYGWIN) @@ -60,6 +60,12 @@ ifeq ($(KOKKOS_OS),Darwin) COPY_FLAG = endif +ifeq ($(KOKKOS_DEBUG),"no") + KOKKOS_DEBUG_CMAKE = OFF +else + KOKKOS_DEBUG_CMAKE = ON +endif + messages: echo "Start Build" @@ -91,6 +97,7 @@ build-makefile-kokkos: echo "" >> Makefile.kokkos echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos + echo "KOKKOS_INTERNAL_USE_QTHREADS = ${KOKKOS_INTERNAL_USE_QTHREADS}" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos echo "" >> Makefile.kokkos @@ -107,7 +114,55 @@ build-makefile-kokkos: > Makefile.kokkos.tmp mv -f Makefile.kokkos.tmp Makefile.kokkos -build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS) +build-cmake-kokkos: + rm -f kokkos.cmake + echo "#Global Settings used to generate this library" >> kokkos.cmake + echo "set(KOKKOS_PATH $(PREFIX) CACHE PATH \"Kokkos installation path\")" >> kokkos.cmake + echo "set(KOKKOS_DEVICES $(KOKKOS_DEVICES) CACHE STRING \"Kokkos devices list\")" >> kokkos.cmake + echo "set(KOKKOS_ARCH $(KOKKOS_ARCH) CACHE STRING \"Kokkos architecture flags\")" >> kokkos.cmake + echo "set(KOKKOS_DEBUG $(KOKKOS_DEBUG_CMAKE) CACHE BOOL \"Kokkos debug enabled ?)\")" >> kokkos.cmake + echo "set(KOKKOS_USE_TPLS $(KOKKOS_USE_TPLS) CACHE STRING \"Kokkos templates list\")" >> kokkos.cmake + echo "set(KOKKOS_CXX_STANDARD $(KOKKOS_CXX_STANDARD) CACHE STRING \"Kokkos C++ standard\")" >> kokkos.cmake + echo "set(KOKKOS_OPTIONS $(KOKKOS_OPTIONS) CACHE STRING \"Kokkos options\")" >> kokkos.cmake + echo "set(KOKKOS_CUDA_OPTIONS $(KOKKOS_CUDA_OPTIONS) CACHE STRING \"Kokkos Cuda options\")" >> kokkos.cmake + echo "if(NOT $ENV{CXX})" >> kokkos.cmake + echo ' message(WARNING "You are currently using compiler $${CMAKE_CXX_COMPILER} while Kokkos was built with $(CXX) ; make sure this is the behavior you intended to be.")' >> kokkos.cmake + echo "endif()" >> kokkos.cmake + echo "if(NOT DEFINED ENV{NVCC_WRAPPER})" >> kokkos.cmake + echo " set(NVCC_WRAPPER \"$(NVCC_WRAPPER)\" CACHE FILEPATH \"Path to command nvcc_wrapper\")" >> kokkos.cmake + echo "else()" >> kokkos.cmake + echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake + echo "endif()" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake + echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake + echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Variables used in application Makefiles" >> kokkos.cmake + echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_CPPFLAGS \"$(KOKKOS_CPPFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LINK_DEPENDS \"$(KOKKOS_LINK_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LIBS \"$(KOKKOS_LIBS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LDFLAGS \"$(KOKKOS_LDFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Internal settings which need to propagated for Kokkos examples" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_CUDA \"${KOKKOS_INTERNAL_USE_CUDA}\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_OPENMP \"${KOKKOS_INTERNAL_USE_OPENMP}\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_PTHREADS \"${KOKKOS_INTERNAL_USE_PTHREADS}\" CACHE STRING \"\")" >> kokkos.cmake + echo "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)" >> kokkos.cmake + echo "" >> kokkos.cmake + sed \ + -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ + -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ + -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ + -e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' kokkos.cmake \ + > kokkos.cmake.tmp + mv -f kokkos.cmake.tmp kokkos.cmake + +build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS) mkdir: mkdir -p $(PREFIX) @@ -124,9 +179,9 @@ copy-threads: mkdir mkdir -p $(PREFIX)/include/Threads cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads -copy-qthread: mkdir - mkdir -p $(PREFIX)/include/Qthread - cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread +copy-qthreads: mkdir + mkdir -p $(PREFIX)/include/Qthreads + cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP @@ -137,6 +192,7 @@ install: mkdir $(CONDITIONAL_COPIES) build-lib cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl cp $(COPY_FLAG) Makefile.kokkos $(PREFIX) + cp $(COPY_FLAG) kokkos.cmake $(PREFIX) cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index a61791ca9c..ecacffb773 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -46,7 +46,6 @@ #include #include -#include #include #include @@ -107,58 +106,41 @@ private: public: - inline void execute() const { - this->template execute_schedule(); - } - - template - inline - typename std::enable_if< std::is_same::value >::type - execute_schedule() const + inline void execute() const { + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); - ParallelFor::template exec_range< WorkTag >( m_functor , range.begin() , range.end() ); - } -/* END #pragma omp parallel */ - } - - template - inline - typename std::enable_if< std::is_same::value >::type - execute_schedule() const - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - - exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); - exec.reset_steal_target(); - #pragma omp barrier - - long work_index = exec.get_work_index(); - - while(work_index != -1) { - const Member begin = static_cast(work_index) * m_policy.chunk_size(); - const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); - ParallelFor::template exec_range< WorkTag >( m_functor , begin, end ); - work_index = exec.get_work_index(); + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); } + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelFor::template + exec_range< WorkTag >( m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() ); + + } while ( is_dynamic && 0 <= range.first ); } -/* END #pragma omp parallel */ + // END #pragma omp parallel } inline @@ -193,17 +175,18 @@ private: typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::member_type Member ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; // Static Assert WorkTag void if ReducerType not InvalidType - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -247,92 +230,70 @@ private: public: - inline void execute() const { - this->template execute_schedule(); - } - - template - inline - typename std::enable_if< std::is_same::value >::type - execute_schedule() const + inline void execute() const { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value }; - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + + const size_t pool_reduce_bytes = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); + + OpenMPexec::resize_thread_data( pool_reduce_bytes + , 0 // team_reduce_bytes + , 0 // team_shared_bytes + , 0 // thread_local_bytes + ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - ParallelReduce::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueInit::init( ReducerConditional::select(m_functor , m_reducer), exec.scratch_reduce() ) ); - } -/* END #pragma omp parallel */ + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - // Reduction: + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); - - for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); - } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); - - if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); - - for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } - } - } - - template - inline - typename std::enable_if< std::is_same::value >::type - execute_schedule() const - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); - - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - - exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); - exec.reset_steal_target(); - #pragma omp barrier - - long work_index = exec.get_work_index(); - - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ); - while(work_index != -1) { - const Member begin = static_cast(work_index) * m_policy.chunk_size(); - const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); - ParallelReduce::template exec_range< WorkTag > - ( m_functor , begin,end - , update ); - work_index = exec.get_work_index(); + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); } + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); + + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelReduce::template + exec_range< WorkTag >( m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() + , update ); + + } while ( is_dynamic && 0 <= range.first ); } -/* END #pragma omp parallel */ +// END #pragma omp parallel // Reduction: - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) + , ptr + , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } } @@ -394,17 +355,18 @@ private: typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef FunctorAnalysis< FunctorPatternInterface::SCAN , Policy , FunctorType > Analysis ; + typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::member_type Member ; - typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ; typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -452,53 +414,63 @@ public: OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan"); - OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 ); + const int value_count = Analysis::value_count( m_functor ); + const size_t pool_reduce_bytes = 2 * Analysis::value_size( m_functor ); + + OpenMPexec::resize_thread_data( pool_reduce_bytes + , 0 // team_reduce_bytes + , 0 // team_shared_bytes + , 0 // thread_local_bytes + ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - const pointer_type ptr = - pointer_type( exec.scratch_reduce() ) + - ValueTraits::value_count( m_functor ); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + + const WorkRange range( m_policy, data.pool_rank(), data.pool_size() ); + + reference_type update_sum = + ValueInit::init( m_functor , data.pool_reduce_local() ); + ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueInit::init( m_functor , ptr ) , false ); - } -/* END #pragma omp parallel */ + ( m_functor , range.begin() , range.end() , update_sum , false ); - { - const unsigned thread_count = OpenMPexec::pool_size(); - const unsigned value_count = ValueTraits::value_count( m_functor ); + if ( data.pool_rendezvous() ) { - pointer_type ptr_prev = 0 ; + pointer_type ptr_prev = 0 ; - for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) { + const int n = data.pool_size(); - pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() ); + for ( int i = 0 ; i < n ; ++i ) { - if ( ptr_prev ) { - for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; } - ValueJoin::join( m_functor , ptr + value_count , ptr ); - } - else { - ValueInit::init( m_functor , ptr ); + pointer_type ptr = (pointer_type) + data.pool_member(i)->pool_reduce_local(); + + if ( i ) { + for ( int j = 0 ; j < value_count ; ++j ) { + ptr[j+value_count] = ptr_prev[j+value_count] ; + } + ValueJoin::join( m_functor , ptr + value_count , ptr_prev ); + } + else { + ValueInit::init( m_functor , ptr + value_count ); + } + + ptr_prev = ptr ; } - ptr_prev = ptr ; + data.pool_rendezvous_release(); } - } -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - const pointer_type ptr = pointer_type( exec.scratch_reduce() ); + reference_type update_base = + ValueOps::reference + ( ((pointer_type)data.pool_reduce_local()) + value_count ); + ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueOps::reference( ptr ) , true ); + ( m_functor , range.begin() , range.end() , update_base , true ); } /* END #pragma omp parallel */ + } //---------------------------------------- @@ -530,55 +502,59 @@ class ParallelFor< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; - typedef typename Policy::work_tag WorkTag ; - typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::schedule_type::type SchedTag ; + typedef typename Policy::member_type Member ; const FunctorType m_functor ; const Policy m_policy ; const int m_shmem_size ; - template< class TagType, class Schedule > + template< class TagType > inline static - typename std::enable_if< std::is_same< TagType , void >::value && std::is_same::value>::type - exec_team( const FunctorType & functor , Member member ) + typename std::enable_if< ( std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - for ( ; member.valid_static() ; member.next_static() ) { - functor( member ); + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( Member( data, r , league_size ) ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } - template< class TagType, class Schedule > - inline static - typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same::value >::type - exec_team( const FunctorType & functor , Member member ) - { - const TagType t{} ; - for ( ; member.valid_static() ; member.next_static() ) { - functor( t , member ); - } - } - template< class TagType, class Schedule > + template< class TagType > inline static - typename std::enable_if< std::is_same< TagType , void >::value && std::is_same::value>::type - exec_team( const FunctorType & functor , Member member ) + typename std::enable_if< ( ! std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - #pragma omp barrier - for ( ; member.valid_dynamic() ; member.next_dynamic() ) { - functor( member ); - } - } + const TagType t{}; - template< class TagType, class Schedule > - inline static - typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same::value >::type - exec_team( const FunctorType & functor , Member member ) - { - #pragma omp barrier - const TagType t{} ; - for ( ; member.valid_dynamic() ; member.next_dynamic() ) { - functor( t , member ); + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( t , Member( data, r , league_size ) ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } @@ -587,31 +563,75 @@ public: inline void execute() const { + enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); - const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + const size_t pool_reduce_size = 0 ; // Never shrinks + const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); + const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); + const size_t thread_local_size = 0 ; // Never shrinks - OpenMPexec::resize_scratch( 0 , team_reduce_size + m_shmem_size + m_policy.scratch_size(1)); + OpenMPexec::resize_thread_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); #pragma omp parallel { - ParallelFor::template exec_team< WorkTag, typename Policy::schedule_type::type> - ( m_functor - , Member( * OpenMPexec::get_thread_omp(), m_policy, m_shmem_size, 0) ); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + + const int active = data.organize_team( m_policy.team_size() ); + + if ( active ) { + data.set_work_partition( m_policy.league_size() + , ( 0 < m_policy.chunk_size() + ? m_policy.chunk_size() + : m_policy.team_iter() ) ); + } + + if ( is_dynamic ) { + // Must synchronize to make sure each team has set its + // partition before begining the work stealing loop. + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } + + if ( active ) { + + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelFor::template exec_team< WorkTag > + ( m_functor , data + , range.first , range.second , m_policy.league_size() ); + + } while ( is_dynamic && 0 <= range.first ); + } + + data.disband_team(); } -/* END #pragma omp parallel */ +// END #pragma omp parallel } + inline ParallelFor( const FunctorType & arg_functor , const Policy & arg_policy ) : m_functor( arg_functor ) , m_policy( arg_policy ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) {} }; +//---------------------------------------------------------------------------- template< class FunctorType , class ReducerType, class ... Properties > class ParallelReduce< FunctorType @@ -622,20 +642,26 @@ class ParallelReduce< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; - typedef typename Policy::work_tag WorkTag ; - typedef typename Policy::member_type Member ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::schedule_type::type SchedTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same::value + , FunctorType, ReducerType> ReducerConditional; - typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -645,22 +671,48 @@ private: template< class TagType > inline static - typename std::enable_if< std::is_same< TagType , void >::value >::type - exec_team( const FunctorType & functor , Member member , reference_type update ) + typename std::enable_if< ( std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , reference_type & update + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - for ( ; member.valid_static() ; member.next_static() ) { - functor( member , update ); + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( Member( data, r , league_size ) , update ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } + template< class TagType > inline static - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec_team( const FunctorType & functor , Member member , reference_type update ) + typename std::enable_if< ( ! std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , reference_type & update + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - const TagType t{} ; - for ( ; member.valid_static() ; member.next_static() ) { - functor( t , member , update ); + const TagType t{}; + + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( t , Member( data, r , league_size ) , update ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } @@ -669,44 +721,89 @@ public: inline void execute() const { + enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); - const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , team_reduce_size + m_shmem_size ); + const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); + const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); + const size_t thread_local_size = 0 ; // Never shrinks + + OpenMPexec::resize_thread_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - ParallelReduce::template exec_team< WorkTag > - ( m_functor - , Member( exec , m_policy , m_shmem_size, 0 ) - , ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ) ); + const int active = data.organize_team( m_policy.team_size() ); + + if ( active ) { + data.set_work_partition( m_policy.league_size() + , ( 0 < m_policy.chunk_size() + ? m_policy.chunk_size() + : m_policy.team_iter() ) ); + } + + if ( is_dynamic ) { + // Must synchronize to make sure each team has set its + // partition before begining the work stealing loop. + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } + + if ( active ) { + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); + + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelReduce::template exec_team< WorkTag > + ( m_functor , data , update + , range.first , range.second , m_policy.league_size() ); + + } while ( is_dynamic && 0 <= range.first ); + } else { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); + } + + data.disband_team(); } -/* END #pragma omp parallel */ +// END #pragma omp parallel - { - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + // Reduction: - int max_active_threads = OpenMPexec::pool_size(); - if( max_active_threads > m_policy.league_size()* m_policy.team_size() ) - max_active_threads = m_policy.league_size()* m_policy.team_size(); + const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); - for ( int i = 1 ; i < max_active_threads ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); - } + for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) + , ptr + , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); + } - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); - if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + if ( m_result_ptr ) { + const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); - for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } - } + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } } } + //---------------------------------------- + template< class ViewType > inline ParallelReduce( const FunctorType & arg_functor , @@ -720,7 +817,10 @@ public: , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.ptr_on_device() ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) {} inline @@ -731,7 +831,10 @@ public: , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.result_view().data() ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 5b3e9873e1..9144d8c279 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -46,6 +46,7 @@ #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,105 +56,46 @@ namespace Impl { template class TaskQueue< Kokkos::OpenMP > ; -//---------------------------------------------------------------------------- +class HostThreadTeamDataSingleton : private HostThreadTeamData { +private: -TaskExec< Kokkos::OpenMP >:: -TaskExec() - : m_self_exec( 0 ) - , m_team_exec( 0 ) - , m_sync_mask( 0 ) - , m_sync_value( 0 ) - , m_sync_step( 0 ) - , m_group_rank( 0 ) - , m_team_rank( 0 ) - , m_team_size( 1 ) -{ -} + HostThreadTeamDataSingleton() : HostThreadTeamData() + { + Kokkos::OpenMP::memory_space space ; + const size_t num_pool_reduce_bytes = 32 ; + const size_t num_team_reduce_bytes = 32 ; + const size_t num_team_shared_bytes = 1024 ; + const size_t num_thread_local_bytes = 1024 ; + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); -TaskExec< Kokkos::OpenMP >:: -TaskExec( Kokkos::Impl::OpenMPexec & arg_exec , int const arg_team_size ) - : m_self_exec( & arg_exec ) - , m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ) - , m_sync_mask( 0 ) - , m_sync_value( 0 ) - , m_sync_step( 0 ) - , m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ) - , m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ) - , m_team_size( arg_team_size ) -{ - // This team spans - // m_self_exec->pool_rev( team_size * group_rank ) - // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) + HostThreadTeamData::scratch_assign + ( space.allocate( alloc_bytes ) + , alloc_bytes + , num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); + } - int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); + ~HostThreadTeamDataSingleton() + { + Kokkos::OpenMP::memory_space space ; + space.deallocate( HostThreadTeamData::scratch_buffer() + , HostThreadTeamData::scratch_bytes() ); + } - sync[0] = int64_t(0) ; - sync[1] = int64_t(0) ; +public: - for ( int i = 0 ; i < m_team_size ; ++i ) { - m_sync_value |= int64_t(1) << (8*i); - m_sync_mask |= int64_t(3) << (8*i); - } - - Kokkos::memory_fence(); -} - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - -void TaskExec< Kokkos::OpenMP >::team_barrier_impl() const -{ - if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { - Kokkos::abort("TaskQueue scratch_reduce memory too small"); - } - - // Use team shared memory to synchronize. - // Alternate memory locations between barriers to avoid a sequence - // of barriers overtaking one another. - - int64_t volatile * const sync = - ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); - - // This team member sets one byte within the sync variable - int8_t volatile * const sync_self = - ((int8_t *) sync) + m_team_rank ; - -#if 0 -fprintf( stdout - , "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n" - , m_group_rank - , m_team_rank - , m_sync_step - , m_sync_value - , *sync - ); -fflush(stdout); -#endif - - *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival - - while ( m_sync_value != *sync ); // wait for team to arrive - -#if 0 -fprintf( stdout - , "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n" - , m_group_rank - , m_team_rank - , m_sync_step - , m_sync_value - , *sync - ); -fflush(stdout); -#endif - - ++m_sync_step ; - - if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step - m_sync_value ^= m_sync_mask ; - if ( 1000 < m_sync_step ) m_sync_step = 0 ; - } -} - -#endif + static HostThreadTeamData & singleton() + { + static HostThreadTeamDataSingleton s ; + return s ; + } +}; //---------------------------------------------------------------------------- @@ -163,123 +105,165 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::execute using execution_space = Kokkos::OpenMP ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using PoolExec = Kokkos::Impl::OpenMPexec ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + static task_root_type * const end = + (task_root_type *) task_root_type::EndTag ; - // Required: team_size <= 8 + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); - const int team_size = PoolExec::pool_size(2); // Threads per core - // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + const int team_size = Impl::OpenMPexec::pool_size(2); // Threads per core + // const int team_size = Impl::OpenMPexec::pool_size(1); // Threads per NUMA + +#if 0 +fprintf(stdout,"TaskQueue execute %d\n", team_size ); +fflush(stdout); +#endif - if ( 8 < team_size ) { - Kokkos::abort("TaskQueue unsupported team size"); - } #pragma omp parallel { - PoolExec & self = *PoolExec::get_thread_omp(); + Impl::HostThreadTeamData & self = *Impl::OpenMPexec::get_thread_data(); - Member single_exec ; - Member team_exec( self , team_size ); + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. - // Team shared memory - task_root_type * volatile * const task_shared = - (task_root_type **) team_exec.m_team_exec->scratch_thread(); + if ( self.organize_team( team_size ) ) { -// Barrier across entire OpenMP thread pool to insure initialization -#pragma omp barrier - - // Loop until all queues are empty and no tasks in flight - - do { - - task_root_type * task = 0 ; - - // Each team lead attempts to acquire either a thread team task - // or a single thread task for the team. - - if ( 0 == team_exec.team_rank() ) { - - task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); - } - } - } - - // Team lead broadcast acquired task to team members: - - if ( 1 < team_exec.team_size() ) { - - if ( 0 == team_exec.team_rank() ) *task_shared = task ; - - // Fence to be sure task_shared is stored before the barrier - Kokkos::memory_fence(); - - // Whole team waits for every team member to reach this statement - team_exec.team_barrier(); - - // Fence to be sure task_shared is stored - Kokkos::memory_fence(); - - task = *task_shared ; - } + Member single_exec( team_data_single ); + Member team_exec( self ); #if 0 -fprintf( stdout - , "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n" - , team_exec.m_group_rank - , team_exec.m_team_rank - , uintptr_t(task_shared) - , uintptr_t(task) +fprintf(stdout,"TaskQueue pool(%d of %d) team(%d of %d) league(%d of %d) running\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() ); fflush(stdout); #endif - if ( 0 == task ) break ; // 0 == m_ready_count + // Loop until all queues are empty and no tasks in flight - if ( end == task ) { - // All team members wait for whole team to reach this statement. - // Is necessary to prevent task_shared from being updated - // before it is read by all threads. - team_exec.team_barrier(); - } - else if ( task_root_type::TaskTeam == task->m_task_type ) { - // Thread Team Task - (*task->m_apply)( task , & team_exec ); + task_root_type * task = 0 ; - // The m_apply function performs a barrier - - if ( 0 == team_exec.team_rank() ) { - // team member #0 completes the task, which may delete the task - queue->complete( task ); - } - } - else { - // Single Thread Task + do { + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. if ( 0 == team_exec.team_rank() ) { - (*task->m_apply)( task , & single_exec ); + bool leader_loop = false ; - queue->complete( task ); + do { + + if ( 0 != task && end != task ) { + // team member #0 completes the previously executed task, + // completion may delete the task + queue->complete( task ); + } + + // If 0 == m_ready_count then set task = 0 + + task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } + + // If still tasks are still executing + // and no task could be acquired + // then continue this leader loop + leader_loop = end == task ; + + if ( ( ! leader_loop ) && + ( 0 != task ) && + ( task_root_type::TaskSingle == task->m_task_type ) ) { + + // if a single thread task then execute now + +#if 0 +fprintf(stdout,"TaskQueue pool(%d of %d) executing single task 0x%lx\n" + , self.pool_rank() + , self.pool_size() + , int64_t(task) + ); +fflush(stdout); +#endif + + (*task->m_apply)( task , & single_exec ); + + leader_loop = true ; + } + } while ( leader_loop ); } - // All team members wait for whole team to reach this statement. - // Not necessary to complete the task. - // Is necessary to prevent task_shared from being updated - // before it is read by all threads. - team_exec.team_barrier(); - } - } while(1); + // Team lead either found 0 == m_ready_count or a team task + // Team lead broadcast acquired task: + + team_exec.team_broadcast( task , 0); + + if ( 0 != task ) { // Thread Team Task + +#if 0 +fprintf(stdout,"TaskQueue pool(%d of %d) team((%d of %d) league(%d of %d) executing team task 0x%lx\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() + , int64_t(task) + ); +fflush(stdout); +#endif + + (*task->m_apply)( task , & team_exec ); + + // The m_apply function performs a barrier + } + } while( 0 != task ); + +#if 0 +fprintf(stdout,"TaskQueue pool(%d of %d) team(%d of %d) league(%d of %d) ending\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() + ); +fflush(stdout); +#endif + + } + + self.disband_team(); + +#if 0 +fprintf(stdout,"TaskQueue pool(%d of %d) disbanded\n" + , self.pool_rank() + , self.pool_size() + ); +fflush(stdout); +#endif + } // END #pragma omp parallel +#if 0 +fprintf(stdout,"TaskQueue execute %d end\n", team_size ); +fflush(stdout); +#endif + } void TaskQueueSpecialization< Kokkos::OpenMP >:: @@ -289,13 +273,16 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: using execution_space = Kokkos::OpenMP ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; if ( 1 == omp_get_num_threads() ) { task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member single_exec ; + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + Member single_exec( team_data_single ); task_root_type * task = end ; @@ -306,7 +293,7 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: // Loop by priority and then type for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 15dbb77c26..3cfdf790bf 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -60,6 +60,7 @@ public: using execution_space = Kokkos::OpenMP ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; // Must specify memory space using memory_space = Kokkos::HostSpace ; @@ -70,296 +71,19 @@ public: // Must provide task queue execution function static void execute( queue_type * const ); - // Must provide mechanism to set function pointer in - // execution space from the host process. - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( task_base_type::function_type * ptr ) - { - using TaskType = TaskBase< Kokkos::OpenMP - , typename FunctorType::value_type - , FunctorType - > ; - *ptr = TaskType::apply ; - } + typename TaskType::function_type + get_function_pointer() { return TaskType::apply ; } }; extern template class TaskQueue< Kokkos::OpenMP > ; -//---------------------------------------------------------------------------- - -template<> -class TaskExec< Kokkos::OpenMP > -{ -private: - - TaskExec( TaskExec && ) = delete ; - TaskExec( TaskExec const & ) = delete ; - TaskExec & operator = ( TaskExec && ) = delete ; - TaskExec & operator = ( TaskExec const & ) = delete ; - - - using PoolExec = Kokkos::Impl::OpenMPexec ; - - friend class Kokkos::Impl::TaskQueue< Kokkos::OpenMP > ; - friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::OpenMP > ; - - PoolExec * const m_self_exec ; ///< This thread's thread pool data structure - PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure - int64_t m_sync_mask ; - int64_t mutable m_sync_value ; - int mutable m_sync_step ; - int m_group_rank ; ///< Which "team" subset of thread pool - int m_team_rank ; ///< Which thread within a team - int m_team_size ; - - TaskExec(); - TaskExec( PoolExec & arg_exec , int arg_team_size ); - - void team_barrier_impl() const ; - -public: - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - void * team_shared() const - { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } - - int team_shared_size() const - { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } - - /**\brief Whole team enters this function call - * before any teeam member returns from - * this function call. - */ - void team_barrier() const { if ( 1 < m_team_size ) team_barrier_impl(); } -#else - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } -#endif - - KOKKOS_INLINE_FUNCTION - int team_rank() const { return m_team_rank ; } - - KOKKOS_INLINE_FUNCTION - int team_size() const { return m_team_size ; } -}; - }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct > -TeamThreadRange - ( Impl::TaskExec< Kokkos::OpenMP > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct >(thread,count); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::TaskExec< Kokkos::OpenMP > > -TeamThreadRange - ( Impl:: TaskExec< Kokkos::OpenMP > & thread, const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type::type iType; - return Impl::TeamThreadRangeBoundariesStruct >(thread, begin, end); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::TaskExec< Kokkos::OpenMP > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); -} - -/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support. -*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for - ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries - , const Lambda& lambda - ) -{ - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i); - } -} - -template -KOKKOS_INLINE_FUNCTION -void parallel_reduce - ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries - , const Lambda& lambda - , ValueType& initialized_result) -{ - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i, result); - } - - if ( 1 < loop_boundaries.thread.team_size() ) { - - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - - loop_boundaries.thread.team_barrier(); - shared[team_rank] = result; - - loop_boundaries.thread.team_barrier(); - - // reduce across threads to thread 0 - if (team_rank == 0) { - for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { - shared[0] += shared[i]; - } - } - - loop_boundaries.thread.team_barrier(); - - // broadcast result - initialized_result = shared[0]; - } - else { - initialized_result = result ; - } -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i, result); - } - - if ( 1 < loop_boundaries.thread.team_size() ) { - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - - loop_boundaries.thread.team_barrier(); - shared[team_rank] = result; - - loop_boundaries.thread.team_barrier(); - - // reduce across threads to thread 0 - if (team_rank == 0) { - for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { - join(shared[0], shared[i]); - } - } - - loop_boundaries.thread.team_barrier(); - - // broadcast result - initialized_result = shared[0]; - } - else { - initialized_result = result ; - } -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ -} - -template< typename ValueType, typename iType, class Lambda > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) -{ - ValueType accum = 0 ; - ValueType val, local_total; - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - int team_size = loop_boundaries.thread.team_size(); - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - - // Intra-member scan - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } - - shared[team_rank] = accum; - loop_boundaries.thread.team_barrier(); - - // Member 0 do scan on accumulated totals - if (team_rank == 0) { - for( iType i = 1; i < team_size; i+=1) { - shared[i] += shared[i-1]; - } - accum = 0; // Member 0 set accum to 0 in preparation for inter-member scan - } - - loop_boundaries.thread.team_barrier(); - - // Inter-member scan adding in accumulated totals - if (team_rank != 0) { accum = shared[team_rank-1]; } - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) -{ -} - - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp index 34cf581a47..2d50c6e548 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp @@ -86,7 +86,7 @@ int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; int OpenMPexec::m_pool_topo[ 4 ] = { 0 }; -OpenMPexec * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; +HostThreadTeamData * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; void OpenMPexec::verify_is_process( const char * const label ) { @@ -113,67 +113,110 @@ void OpenMPexec::verify_initialized( const char * const label ) } -void OpenMPexec::clear_scratch() +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void OpenMPexec::clear_thread_data() { + const size_t member_bytes = + sizeof(int64_t) * + HostThreadTeamData::align_to_int64( sizeof(HostThreadTeamData) ); + + const int old_alloc_bytes = + m_pool[0] ? ( member_bytes + m_pool[0]->scratch_bytes() ) : 0 ; + + Kokkos::HostSpace space ; + #pragma omp parallel { - const int rank_rev = m_map_rank[ omp_get_thread_num() ]; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; - if ( m_pool[ rank_rev ] ) { - Record * const r = Record::get_record( m_pool[ rank_rev ] ); - m_pool[ rank_rev ] = 0 ; - Record::decrement( r ); + const int rank = m_map_rank[ omp_get_thread_num() ]; + + if ( 0 != m_pool[rank] ) { + + m_pool[rank]->disband_pool(); + + space.deallocate( m_pool[rank] , old_alloc_bytes ); + + m_pool[rank] = 0 ; } } /* END #pragma omp parallel */ } -void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size ) +void OpenMPexec::resize_thread_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ) { - enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 }; - enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK }; + const size_t member_bytes = + sizeof(int64_t) * + HostThreadTeamData::align_to_int64( sizeof(HostThreadTeamData) ); - const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ; - const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ; + HostThreadTeamData * root = m_pool[0] ; - reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ; - thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ; + const size_t old_pool_reduce = root ? root->pool_reduce_bytes() : 0 ; + const size_t old_team_reduce = root ? root->team_reduce_bytes() : 0 ; + const size_t old_team_shared = root ? root->team_shared_bytes() : 0 ; + const size_t old_thread_local = root ? root->thread_local_bytes() : 0 ; + const size_t old_alloc_bytes = root ? ( member_bytes + root->scratch_bytes() ) : 0 ; - // Requesting allocation and old allocation is too small: + // Allocate if any of the old allocation is tool small: - const bool allocate = ( old_reduce_size < reduce_size ) || - ( old_thread_size < thread_size ); - - if ( allocate ) { - if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; } - if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; } - } - - const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ; - const int pool_size = m_pool_topo[0] ; + const bool allocate = ( old_pool_reduce < pool_reduce_bytes ) || + ( old_team_reduce < team_reduce_bytes ) || + ( old_team_shared < team_shared_bytes ) || + ( old_thread_local < thread_local_bytes ); if ( allocate ) { - clear_scratch(); + if ( pool_reduce_bytes < old_pool_reduce ) { pool_reduce_bytes = old_pool_reduce ; } + if ( team_reduce_bytes < old_team_reduce ) { team_reduce_bytes = old_team_reduce ; } + if ( team_shared_bytes < old_team_shared ) { team_shared_bytes = old_team_shared ; } + if ( thread_local_bytes < old_thread_local ) { thread_local_bytes = old_thread_local ; } + + const size_t alloc_bytes = + member_bytes + + HostThreadTeamData::scratch_size( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + const int pool_size = omp_get_max_threads(); + + Kokkos::HostSpace space ; #pragma omp parallel { - const int rank_rev = m_map_rank[ omp_get_thread_num() ]; - const int rank = pool_size - ( rank_rev + 1 ); + const int rank = m_map_rank[ omp_get_thread_num() ]; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + if ( 0 != m_pool[rank] ) { - Record * const r = Record::allocate( Kokkos::HostSpace() - , "openmp_scratch" - , alloc_size ); + m_pool[rank]->disband_pool(); - Record::increment( r ); + space.deallocate( m_pool[rank] , old_alloc_bytes ); + } - m_pool[ rank_rev ] = reinterpret_cast( r->data() ); + void * const ptr = space.allocate( alloc_bytes ); - new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size ); + m_pool[ rank ] = new( ptr ) HostThreadTeamData(); + + m_pool[ rank ]-> + scratch_assign( ((char *)ptr) + member_bytes + , alloc_bytes + , pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); } /* END #pragma omp parallel */ + + HostThreadTeamData::organize_pool( m_pool , pool_size ); } } @@ -197,14 +240,14 @@ void OpenMP::initialize( unsigned thread_count , // Before any other call to OMP query the maximum number of threads // and save the value for re-initialization unit testing. - //Using omp_get_max_threads(); is problematic in conjunction with - //Hwloc on Intel (essentially an initial call to the OpenMP runtime - //without a parallel region before will set a process mask for a single core - //The runtime will than bind threads for a parallel region to other cores on the - //entering the first parallel region and make the process mask the aggregate of - //the thread masks. The intend seems to be to make serial code run fast, if you - //compile with OpenMP enabled but don't actually use parallel regions or so - //static int omp_max_threads = omp_get_max_threads(); + // Using omp_get_max_threads(); is problematic in conjunction with + // Hwloc on Intel (essentially an initial call to the OpenMP runtime + // without a parallel region before will set a process mask for a single core + // The runtime will than bind threads for a parallel region to other cores on the + // entering the first parallel region and make the process mask the aggregate of + // the thread masks. The intend seems to be to make serial code run fast, if you + // compile with OpenMP enabled but don't actually use parallel regions or so + // static int omp_max_threads = omp_get_max_threads(); int nthreads = 0; #pragma omp parallel { @@ -268,8 +311,6 @@ void OpenMP::initialize( unsigned thread_count , // Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region. // Call to 'new' may not be thread safe as well. - // Reverse the rank for threads so that the scan operation reduces to the highest rank thread. - const unsigned omp_rank = omp_get_thread_num(); const unsigned thread_r = Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) @@ -286,7 +327,19 @@ void OpenMP::initialize( unsigned thread_count , Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; - Impl::OpenMPexec::resize_scratch( 1024 , 1024 ); + // New, unified host thread team data: + { + size_t pool_reduce_bytes = 32 * thread_count ; + size_t team_reduce_bytes = 32 * thread_count ; + size_t team_shared_bytes = 1024 * thread_count ; + size_t thread_local_bytes = 1024 ; + + Impl::OpenMPexec::resize_thread_data( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes + ); + } } } @@ -309,7 +362,7 @@ void OpenMP::initialize( unsigned thread_count , // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -321,7 +374,8 @@ void OpenMP::finalize() Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" ); Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" ); - Impl::OpenMPexec::clear_scratch(); + // New, unified host thread team data: + Impl::OpenMPexec::clear_thread_data(); Impl::OpenMPexec::m_pool_topo[0] = 0 ; Impl::OpenMPexec::m_pool_topo[1] = 0 ; @@ -333,7 +387,7 @@ void OpenMP::finalize() hwloc::unbind_this_thread(); } - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp index 63f7234da3..39ace31319 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp @@ -44,13 +44,22 @@ #ifndef KOKKOS_OPENMPEXEC_HPP #define KOKKOS_OPENMPEXEC_HPP +#include + #include -#include +#include #include + #include #include #include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + namespace Kokkos { namespace Impl { @@ -60,41 +69,19 @@ namespace Impl { class OpenMPexec { public: + friend class Kokkos::OpenMP ; + enum { MAX_THREAD_COUNT = 4096 }; private: - static OpenMPexec * m_pool[ MAX_THREAD_COUNT ]; // Indexed by: m_pool_rank_rev - static int m_pool_topo[ 4 ]; static int m_map_rank[ MAX_THREAD_COUNT ]; - friend class Kokkos::OpenMP ; + static HostThreadTeamData * m_pool[ MAX_THREAD_COUNT ]; - int const m_pool_rank ; - int const m_pool_rank_rev ; - int const m_scratch_exec_end ; - int const m_scratch_reduce_end ; - int const m_scratch_thread_end ; - - int volatile m_barrier_state ; - - // Members for dynamic scheduling - // Which thread am I stealing from currently - int m_current_steal_target; - // This thread's owned work_range - Kokkos::pair m_work_range KOKKOS_ALIGN(16); - // Team Offset if one thread determines work_range for others - long m_team_work_index; - - // Is this thread stealing (i.e. its owned work_range is exhausted - bool m_stealing; - - OpenMPexec(); - OpenMPexec( const OpenMPexec & ); - OpenMPexec & operator = ( const OpenMPexec & ); - - static void clear_scratch(); + static + void clear_thread_data(); public: @@ -108,47 +95,9 @@ public: inline static int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; } - inline static - OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; } - - inline int pool_rank() const { return m_pool_rank ; } - inline int pool_rank_rev() const { return m_pool_rank_rev ; } - - inline long team_work_index() const { return m_team_work_index ; } - - inline int scratch_reduce_size() const - { return m_scratch_reduce_end - m_scratch_exec_end ; } - - inline int scratch_thread_size() const - { return m_scratch_thread_end - m_scratch_reduce_end ; } - - inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; } - inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; } - - inline - void state_wait( int state ) - { Impl::spinwait( m_barrier_state , state ); } - - inline - void state_set( int state ) { m_barrier_state = state ; } - - ~OpenMPexec() {} - - OpenMPexec( const int arg_poolRank - , const int arg_scratch_exec_size - , const int arg_scratch_reduce_size - , const int arg_scratch_thread_size ) - : m_pool_rank( arg_poolRank ) - , m_pool_rank_rev( pool_size() - ( arg_poolRank + 1 ) ) - , m_scratch_exec_end( arg_scratch_exec_size ) - , m_scratch_reduce_end( m_scratch_exec_end + arg_scratch_reduce_size ) - , m_scratch_thread_end( m_scratch_reduce_end + arg_scratch_thread_size ) - , m_barrier_state(0) - {} - static void finalize(); - static void initialize( const unsigned team_count , + static void initialize( const unsigned team_count , const unsigned threads_per_team , const unsigned numa_count , const unsigned cores_per_numa ); @@ -156,133 +105,20 @@ public: static void verify_is_process( const char * const ); static void verify_initialized( const char * const ); - static void resize_scratch( size_t reduce_size , size_t thread_size ); + + static + void resize_thread_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ); inline static - OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } - - /* Dynamic Scheduling related functionality */ - // Initialize the work range for this thread - inline void set_work_range(const long& begin, const long& end, const long& chunk_size) { - m_work_range.first = (begin+chunk_size-1)/chunk_size; - m_work_range.second = end>0?(end+chunk_size-1)/chunk_size:m_work_range.first; - } - - // Claim and index from this thread's range from the beginning - inline long get_work_index_begin () { - Kokkos::pair work_range_new = m_work_range; - Kokkos::pair work_range_old = work_range_new; - if(work_range_old.first>=work_range_old.second) - return -1; - - work_range_new.first+=1; - - bool success = false; - while(!success) { - work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); - success = ( (work_range_new == work_range_old) || - (work_range_new.first>=work_range_new.second)); - work_range_old = work_range_new; - work_range_new.first+=1; - } - if(work_range_old.first work_range_new = m_work_range; - Kokkos::pair work_range_old = work_range_new; - if(work_range_old.first>=work_range_old.second) - return -1; - work_range_new.second-=1; - bool success = false; - while(!success) { - work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); - success = ( (work_range_new == work_range_old) || - (work_range_new.first>=work_range_new.second) ); - work_range_old = work_range_new; - work_range_new.second-=1; - } - if(work_range_old.first=m_pool_topo[0]) - m_current_steal_target = 0;//m_pool_topo[0]-1; - m_stealing = false; - } - - // Get a steal target; start with my-rank + 1 and go round robin, until arriving at this threads rank - // Returns -1 fi no active steal target available - inline int get_steal_target() { - while(( m_pool[m_current_steal_target]->m_work_range.second <= - m_pool[m_current_steal_target]->m_work_range.first ) && - (m_current_steal_target!=m_pool_rank) ) { - m_current_steal_target = (m_current_steal_target+1)%m_pool_topo[0]; - } - if(m_current_steal_target == m_pool_rank) - return -1; - else - return m_current_steal_target; - } - - inline int get_steal_target(int team_size) { - - while(( m_pool[m_current_steal_target]->m_work_range.second <= - m_pool[m_current_steal_target]->m_work_range.first ) && - (m_current_steal_target!=m_pool_rank_rev) ) { - if(m_current_steal_target + team_size < m_pool_topo[0]) - m_current_steal_target = (m_current_steal_target+team_size); - else - m_current_steal_target = 0; - } - - if(m_current_steal_target == m_pool_rank_rev) - return -1; - else - return m_current_steal_target; - } - - inline long steal_work_index (int team_size = 0) { - long index = -1; - int steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); - while ( (steal_target != -1) && (index == -1)) { - index = m_pool[steal_target]->get_work_index_end(); - if(index == -1) - steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); - } - return index; - } - - // Get a work index. Claim from owned range until its exhausted, then steal from other thread - inline long get_work_index (int team_size = 0) { - long work_index = -1; - if(!m_stealing) work_index = get_work_index_begin(); - - if( work_index == -1) { - memory_fence(); - m_stealing = true; - work_index = steal_work_index(team_size); - } - m_team_work_index = work_index; - memory_fence(); - return work_index; - } + HostThreadTeamData * get_thread_data() noexcept + { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } + inline static + HostThreadTeamData * get_thread_data( int i ) noexcept + { return m_pool[i]; } }; } // namespace Impl @@ -294,356 +130,6 @@ public: namespace Kokkos { namespace Impl { -class OpenMPexecTeamMember { -public: - - enum { TEAM_REDUCE_SIZE = 512 }; - - /** \brief Thread states for team synchronization */ - enum { Active = 0 , Rendezvous = 1 }; - - typedef Kokkos::OpenMP execution_space ; - typedef execution_space::scratch_memory_space scratch_memory_space ; - - Impl::OpenMPexec & m_exec ; - scratch_memory_space m_team_shared ; - int m_team_scratch_size[2] ; - int m_team_base_rev ; - int m_team_rank_rev ; - int m_team_rank ; - int m_team_size ; - int m_league_rank ; - int m_league_end ; - int m_league_size ; - - int m_chunk_size; - int m_league_chunk_end; - Impl::OpenMPexec & m_team_lead_exec ; - int m_invalid_thread; - int m_team_alloc; - - // Fan-in team threads, root of the fan-in which does not block returns true - inline - bool team_fan_in() const - { - memory_fence(); - for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { - - m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active ); - } - - if ( m_team_rank_rev ) { - m_exec.state_set( Rendezvous ); - memory_fence(); - m_exec.state_wait( Rendezvous ); - } - - return 0 == m_team_rank_rev ; - } - - inline - void team_fan_out() const - { - memory_fence(); - for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { - m_exec.pool_rev( m_team_base_rev + j )->state_set( Active ); - memory_fence(); - } - } - -public: - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& team_shmem() const - { return m_team_shared.set_team_thread_mode(0,1,0) ; } - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& team_scratch(int) const - { return m_team_shared.set_team_thread_mode(0,1,0) ; } - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& thread_scratch(int) const - { return m_team_shared.set_team_thread_mode(0,team_size(),team_rank()) ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - {} -#else - { - if ( 1 < m_team_size && !m_invalid_thread) { - team_fan_in(); - team_fan_out(); - } - } -#endif - - template - KOKKOS_INLINE_FUNCTION - void team_broadcast(ValueType& value, const int& thread_id) const - { -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { } -#else - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE - , ValueType , void >::type type ; - - type volatile * const shared_value = - ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); - - if ( team_rank() == thread_id ) *shared_value = value; - memory_fence(); - team_barrier(); // Wait for 'thread_id' to write - value = *shared_value ; - team_barrier(); // Wait for team members to read -#endif - } - - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION ValueType - team_reduce( const ValueType & value - , const JoinOp & op_in ) const - #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ValueType(); } - #else - { - memory_fence(); - typedef ValueType value_type; - const JoinLambdaAdapter op(op_in); - #endif -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE - , value_type , void >::type type ; - - type * const local_value = ((type*) m_exec.scratch_thread()); - - // Set this thread's contribution - *local_value = value ; - - // Fence to make sure the base team member has access: - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); - - // Join to the team value: - for ( int i = 1 ; i < m_team_size ; ++i ) { - op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) ); - } - memory_fence(); - - // The base team member may "lap" the other team members, - // copy to their local value before proceeding. - for ( int i = 1 ; i < m_team_size ; ++i ) { - *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ; - } - - // Fence to make sure all team members have access - memory_fence(); - } - - team_fan_out(); - - return *((type volatile const *)local_value); - } -#endif - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename ArgType > - KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ArgType(); } -#else - { - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ; - - volatile type * const work_value = ((type*) m_exec.scratch_thread()); - - *work_value = value ; - - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - // m_team_base[0] == highest ranking team member - // m_team_base[ m_team_size - 1 ] == lowest ranking team member - // - // 1) copy from lower to higher rank, initialize lowest rank to zero - // 2) prefix sum from lowest to highest rank, skipping lowest rank - - type accum = 0 ; - - if ( global_accum ) { - for ( int i = m_team_size ; i-- ; ) { - type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); - accum += val ; - } - accum = atomic_fetch_add( global_accum , accum ); - } - - for ( int i = m_team_size ; i-- ; ) { - type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); - const type offset = accum ; - accum += val ; - val = offset ; - } - - memory_fence(); - } - - team_fan_out(); - - return *work_value ; - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const - { return this-> template team_scan( value , 0 ); } - - //---------------------------------------- - // Private for the driver - -private: - - typedef execution_space::scratch_memory_space space ; - -public: - - template< class ... Properties > - inline - OpenMPexecTeamMember( Impl::OpenMPexec & exec - , const TeamPolicyInternal< OpenMP, Properties ...> & team - , const int shmem_size_L1 - , const int shmem_size_L2 - ) - : m_exec( exec ) - , m_team_shared(0,0) - , m_team_scratch_size{ shmem_size_L1 , shmem_size_L2 } - , m_team_base_rev(0) - , m_team_rank_rev(0) - , m_team_rank(0) - , m_team_size( team.team_size() ) - , m_league_rank(0) - , m_league_end(0) - , m_league_size( team.league_size() ) - , m_chunk_size( team.chunk_size()>0?team.chunk_size():team.team_iter() ) - , m_league_chunk_end(0) - , m_team_lead_exec( *exec.pool_rev( team.team_alloc() * (m_exec.pool_rank_rev()/team.team_alloc()) )) - , m_team_alloc( team.team_alloc()) - { - const int pool_rank_rev = m_exec.pool_rank_rev(); - const int pool_team_rank_rev = pool_rank_rev % team.team_alloc(); - const int pool_league_rank_rev = pool_rank_rev / team.team_alloc(); - const int pool_num_teams = OpenMP::thread_pool_size(0)/team.team_alloc(); - const int chunks_per_team = ( team.league_size() + m_chunk_size*pool_num_teams-1 ) / (m_chunk_size*pool_num_teams); - int league_iter_end = team.league_size() - pool_league_rank_rev * chunks_per_team * m_chunk_size; - int league_iter_begin = league_iter_end - chunks_per_team * m_chunk_size; - if (league_iter_begin < 0) league_iter_begin = 0; - if (league_iter_end>team.league_size()) league_iter_end = team.league_size(); - - if ((team.team_alloc()>m_team_size)? - (pool_team_rank_rev >= m_team_size): - (m_exec.pool_size() - pool_num_teams*m_team_size > m_exec.pool_rank()) - ) - m_invalid_thread = 1; - else - m_invalid_thread = 0; - - m_team_rank_rev = pool_team_rank_rev ; - if ( pool_team_rank_rev < m_team_size && !m_invalid_thread ) { - m_team_base_rev = team.team_alloc() * pool_league_rank_rev ; - m_team_rank_rev = pool_team_rank_rev ; - m_team_rank = m_team_size - ( m_team_rank_rev + 1 ); - m_league_end = league_iter_end ; - m_league_rank = league_iter_begin ; - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0 ); - } - - if ( (m_team_rank_rev == 0) && (m_invalid_thread == 0) ) { - m_exec.set_work_range(m_league_rank,m_league_end,m_chunk_size); - m_exec.reset_steal_target(m_team_size); - } - } - - bool valid_static() const - { - return m_league_rank < m_league_end ; - } - - void next_static() - { - if ( m_league_rank < m_league_end ) { - team_barrier(); - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0); - } - m_league_rank++; - } - - bool valid_dynamic() { - if(m_invalid_thread) - return false; - if ((m_league_rank < m_league_chunk_end) && (m_league_rank < m_league_size)) { - return true; - } - - if ( m_team_rank_rev == 0 ) { - m_team_lead_exec.get_work_index(m_team_alloc); - } - team_barrier(); - - long work_index = m_team_lead_exec.team_work_index(); - - m_league_rank = work_index * m_chunk_size; - m_league_chunk_end = (work_index +1 ) * m_chunk_size; - - if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size; - - if(m_league_rank>=0) - return true; - return false; - } - - void next_dynamic() { - if(m_invalid_thread) - return; - - if ( m_league_rank < m_league_chunk_end ) { - team_barrier(); - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0); - } - m_league_rank++; - } - - static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; } -}; - template< class ... Properties > class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits { @@ -671,8 +157,11 @@ public: template< class FunctorType > inline static - int team_size_max( const FunctorType & ) - { return traits::execution_space::thread_pool_size(1); } + int team_size_max( const FunctorType & ) { + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size inline static @@ -702,7 +191,8 @@ private: , const int team_size_request ) { const int pool_size = traits::execution_space::thread_pool_size(0); - const int team_max = traits::execution_space::thread_pool_size(1); + const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + const int team_max = pool_size member_type ; }; } // namespace Impl @@ -850,216 +340,6 @@ int OpenMP::thread_pool_rank() #endif } -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember > -TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, count ); -} - -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::OpenMPexecTeamMember > -TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType1& begin, const iType2& end ) { - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, iType(begin), iType(end) ); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct(thread,count); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct PerTeam(const Impl::OpenMPexecTeamMember& thread) { - return Impl::ThreadSingleStruct(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct PerThread(const Impl::OpenMPexecTeamMember& thread) { - return Impl::VectorSingleStruct(thread); -} - } // namespace Kokkos -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, ValueType& result) { - - result = ValueType(); - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } - - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd()); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,join); -} - -} //namespace Kokkos - -namespace Kokkos { -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda& lambda) { - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - init_result = result; -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) - * for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. - * Depending on the target execution space the operator might be called twice: once with final=false - * and once with final=true. When final==true val contains the prefix sum value. The contribution of this - * "i" needs to be added to val no matter whether final==true or not. In a serial execution - * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set - * to the final sum value over all vector lanes. - * This functionality requires C++11 support.*/ -template< typename iType, class FunctorType > -KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const FunctorType & lambda) { - - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; - typedef typename ValueTraits::value_type value_type ; - - value_type scan_val = value_type(); - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,scan_val,true); - } -} - -} // namespace Kokkos - -namespace Kokkos { - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda) { - lambda(); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda) { - if(single_struct.team_member.team_rank()==0) lambda(); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { - lambda(val); -} - -template -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { - if(single_struct.team_member.team_rank()==0) { - lambda(val); - } - single_struct.team_member.team_broadcast(val,0); -} -} - #endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp deleted file mode 100644 index b4df5e35bb..0000000000 --- a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp +++ /dev/null @@ -1,511 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -#if defined( KOKKOS_ENABLE_QTHREAD ) - -#include -#include -#include -#include -#include -#include -#include -#include - -// Defines to enable experimental Qthread functionality - -#define QTHREAD_LOCAL_PRIORITY -#define CLONED_TASKS - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { -namespace { - -enum { MAXIMUM_QTHREAD_WORKERS = 1024 }; - -/** s_exec is indexed by the reverse rank of the workers - * for faster fan-in / fan-out lookups - * [ n - 1 , n - 2 , ... , 0 ] - */ -QthreadExec * s_exec[ MAXIMUM_QTHREAD_WORKERS ]; - -int s_number_shepherds = 0 ; -int s_number_workers_per_shepherd = 0 ; -int s_number_workers = 0 ; - -inline -QthreadExec ** worker_exec() -{ - return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local(NULL) + 1 ); -} - -const int s_base_size = QthreadExec::align_alloc( sizeof(QthreadExec) ); - -int s_worker_reduce_end = 0 ; /* End of worker reduction memory */ -int s_worker_shared_end = 0 ; /* Total of worker scratch memory */ -int s_worker_shared_begin = 0 ; /* Beginning of worker shared memory */ - -QthreadExecFunctionPointer volatile s_active_function = 0 ; -const void * volatile s_active_function_arg = 0 ; - -} /* namespace */ -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -int Qthread::is_initialized() -{ - return Impl::s_number_workers != 0 ; -} - -int Qthread::concurrency() -{ - return Impl::s_number_workers_per_shepherd ; -} - -int Qthread::in_parallel() -{ - return Impl::s_active_function != 0 ; -} - -void Qthread::initialize( int thread_count ) -{ - // Environment variable: QTHREAD_NUM_SHEPHERDS - // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP - // Environment variable: QTHREAD_HWPAR - - { - char buffer[256]; - snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count); - putenv(buffer); - } - - const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && - ( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) && - ( thread_count == qthread_num_workers() ); - - bool ok_symmetry = true ; - - if ( ok_init ) { - Impl::s_number_shepherds = qthread_num_shepherds(); - Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); - Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ; - - for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) { - ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) ); - } - } - - if ( ! ok_init || ! ok_symmetry ) { - std::ostringstream msg ; - - msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ; - msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); - msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD); - msg << " : qthread_num_workers = " << qthread_num_workers(); - - if ( ! ok_symmetry ) { - msg << " : qthread_num_workers_local = {" ; - for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) { - msg << " " << qthread_num_workers_local(i) ; - } - msg << " }" ; - } - - Impl::s_number_workers = 0 ; - Impl::s_number_shepherds = 0 ; - Impl::s_number_workers_per_shepherd = 0 ; - - if ( ok_init ) { qthread_finalize(); } - - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - Impl::QthreadExec::resize_worker_scratch( 256 , 256 ); - - // Init the array for used for arbitrarily sized atomics - Impl::init_lock_array_host_space(); - -} - -void Qthread::finalize() -{ - Impl::QthreadExec::clear_workers(); - - if ( Impl::s_number_workers ) { - qthread_finalize(); - } - - Impl::s_number_workers = 0 ; - Impl::s_number_shepherds = 0 ; - Impl::s_number_workers_per_shepherd = 0 ; -} - -void Qthread::print_configuration( std::ostream & s , const bool detail ) -{ - s << "Kokkos::Qthread {" - << " num_shepherds(" << Impl::s_number_shepherds << ")" - << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")" - << " }" << std::endl ; -} - -Qthread & Qthread::instance( int ) -{ - static Qthread q ; - return q ; -} - -void Qthread::fence() -{ -} - -int Qthread::shepherd_size() const { return Impl::s_number_shepherds ; } -int Qthread::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd ; } - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { -namespace { - -aligned_t driver_exec_all( void * arg ) -{ - QthreadExec & exec = **worker_exec(); - - (*s_active_function)( exec , s_active_function_arg ); - -/* - fprintf( stdout - , "QthreadExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" - , exec.worker_rank() - , exec.worker_size() - , exec.shepherd_rank() - , exec.shepherd_size() - , exec.shepherd_worker_rank() - , exec.shepherd_worker_size() - ); - fflush(stdout); -*/ - - return 0 ; -} - -aligned_t driver_resize_worker_scratch( void * arg ) -{ - static volatile int lock_begin = 0 ; - static volatile int lock_end = 0 ; - - QthreadExec ** const exec = worker_exec(); - - //---------------------------------------- - // Serialize allocation for thread safety - - while ( ! atomic_compare_exchange_strong( & lock_begin , 0 , 1 ) ); // Spin wait to claim lock - - const bool ok = 0 == *exec ; - - if ( ok ) { *exec = (QthreadExec *) malloc( s_base_size + s_worker_shared_end ); } - - lock_begin = 0 ; // release lock - - if ( ok ) { new( *exec ) QthreadExec(); } - - //---------------------------------------- - // Wait for all calls to complete to insure that each worker has executed. - - if ( s_number_workers == 1 + atomic_fetch_add( & lock_end , 1 ) ) { lock_end = 0 ; } - - while ( lock_end ); - -/* - fprintf( stdout - , "QthreadExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" - , (**exec).worker_rank() - , (**exec).worker_size() - , (**exec).shepherd_rank() - , (**exec).shepherd_size() - , (**exec).shepherd_worker_rank() - , (**exec).shepherd_worker_size() - ); - fflush(stdout); -*/ - - //---------------------------------------- - - if ( ! ok ) { - fprintf( stderr , "Kokkos::QthreadExec resize failed\n" ); - fflush( stderr ); - } - - return 0 ; -} - -void verify_is_process( const char * const label , bool not_active = false ) -{ - const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local(NULL); - const bool is_active = not_active && ( s_active_function || s_active_function_arg ); - - if ( not_process || is_active ) { - std::string msg( label ); - msg.append( " : FAILED" ); - if ( not_process ) msg.append(" : not called by main process"); - if ( is_active ) msg.append(" : parallel execution in progress"); - Kokkos::Impl::throw_runtime_exception( msg ); - } -} - -} - -int QthreadExec::worker_per_shepherd() -{ - return s_number_workers_per_shepherd ; -} - -QthreadExec::QthreadExec() -{ - const int shepherd_rank = qthread_shep(); - const int shepherd_worker_rank = qthread_worker_local(NULL); - const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank ; - - m_worker_base = s_exec ; - m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) ); - m_scratch_alloc = ( (unsigned char *) this ) + s_base_size ; - m_reduce_end = s_worker_reduce_end ; - m_shepherd_rank = shepherd_rank ; - m_shepherd_size = s_number_shepherds ; - m_shepherd_worker_rank = shepherd_worker_rank ; - m_shepherd_worker_size = s_number_workers_per_shepherd ; - m_worker_rank = worker_rank ; - m_worker_size = s_number_workers ; - m_worker_state = QthreadExec::Active ; -} - -void QthreadExec::clear_workers() -{ - for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { - QthreadExec * const exec = s_exec[iwork] ; - s_exec[iwork] = 0 ; - free( exec ); - } -} - -void QthreadExec::shared_reset( Qthread::scratch_memory_space & space ) -{ - new( & space ) - Qthread::scratch_memory_space( - ((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin , - s_worker_shared_end - s_worker_shared_begin - ); -} - -void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size ) -{ - const int exec_all_reduce_alloc = align_alloc( reduce_size ); - const int shepherd_scan_alloc = align_alloc( 8 ); - const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); - - if ( s_worker_reduce_end < exec_all_reduce_alloc || - s_worker_shared_end < shepherd_shared_end ) { - -/* - fprintf( stdout , "QthreadExec::resize\n"); - fflush(stdout); -*/ - - // Clear current worker memory before allocating new worker memory - clear_workers(); - - // Increase the buffers to an aligned allocation - s_worker_reduce_end = exec_all_reduce_alloc ; - s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ; - s_worker_shared_end = shepherd_shared_end ; - - // Need to query which shepherd this main 'process' is running... - - const int main_shep = qthread_shep(); - - // Have each worker resize its memory for proper first-touch -#if 0 - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) { - qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep ); - }} -#else - // If this function is used before the 'qthread.task_policy' unit test - // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; - - if ( num_clone ) { - const int ret = qthread_fork_clones_to_local_priority - ( driver_resize_worker_scratch /* function */ - , NULL /* function data block */ - , NULL /* pointer to return value feb */ - , jshep /* shepherd number */ - , num_clone - 1 /* number of instances - 1 */ - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_resize_worker_scratch( NULL ); - - // Verify all workers allocated - - bool ok = true ; - for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; } - - if ( ! ok ) { - std::ostringstream msg ; - msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ; - for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { - if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } - } - msg << " }" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - } -} - -void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg ) -{ - verify_is_process("QthreadExec::exec_all(...)",true); - -/* - fprintf( stdout , "QthreadExec::exec_all\n"); - fflush(stdout); -*/ - - s_active_function = func ; - s_active_function_arg = arg ; - - // Need to query which shepherd this main 'process' is running... - - const int main_shep = qthread_shep(); - -#if 0 - for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) { - qthread_fork_to( driver_exec_all , NULL , NULL , jshep ); - }} -#else - // If this function is used before the 'qthread.task_policy' unit test - // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; - - if ( num_clone ) { - const int ret = qthread_fork_clones_to_local_priority - ( driver_exec_all /* function */ - , NULL /* function data block */ - , NULL /* pointer to return value feb */ - , jshep /* shepherd number */ - , num_clone - 1 /* number of instances - 1 */ - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_exec_all( NULL ); - - s_active_function = 0 ; - s_active_function_arg = 0 ; -} - -void * QthreadExec::exec_all_reduce_result() -{ - return s_exec[0]->m_scratch_alloc ; -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -namespace Kokkos { -namespace Impl { - -QthreadTeamPolicyMember::QthreadTeamPolicyMember() - : m_exec( **worker_exec() ) - , m_team_shared(0,0) - , m_team_size( 1 ) - , m_team_rank( 0 ) - , m_league_size(1) - , m_league_end(1) - , m_league_rank(0) -{ - m_exec.shared_reset( m_team_shared ); -} - -QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember::TaskTeam & ) - : m_exec( **worker_exec() ) - , m_team_shared(0,0) - , m_team_size( s_number_workers_per_shepherd ) - , m_team_rank( m_exec.shepherd_worker_rank() ) - , m_league_size(1) - , m_league_end(1) - , m_league_rank(0) -{ - m_exec.shared_reset( m_team_shared ); -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */ - diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp deleted file mode 100644 index f948eb2903..0000000000 --- a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp +++ /dev/null @@ -1,620 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADEXEC_HPP -#define KOKKOS_QTHREADEXEC_HPP - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -class QthreadExec ; - -typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * ); - -class QthreadExec { -private: - - enum { Inactive = 0 , Active = 1 }; - - const QthreadExec * const * m_worker_base ; - const QthreadExec * const * m_shepherd_base ; - - void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ] - int m_reduce_end ; ///< End of scratch reduction memory - - int m_shepherd_rank ; - int m_shepherd_size ; - - int m_shepherd_worker_rank ; - int m_shepherd_worker_size ; - - /* - * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank - * m_worker_size = m_shepherd_size * m_shepherd_worker_size - */ - int m_worker_rank ; - int m_worker_size ; - - int mutable volatile m_worker_state ; - - - friend class Kokkos::Qthread ; - - ~QthreadExec(); - QthreadExec( const QthreadExec & ); - QthreadExec & operator = ( const QthreadExec & ); - -public: - - QthreadExec(); - - /** Execute the input function on all available Qthread workers */ - static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * ); - - //---------------------------------------- - /** Barrier across all workers participating in the 'exec_all' */ - void exec_all_barrier() const - { - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - /** Barrier across workers within the shepherd with rank < team_rank */ - void shepherd_barrier( const int team_size ) const - { - if ( m_shepherd_worker_rank < team_size ) { - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - } - } - - //---------------------------------------- - /** Reduce across all workers participating in the 'exec_all' */ - template< class FunctorType , class ReducerType , class ArgTag > - inline - void exec_all_reduce( const FunctorType & func, const ReducerType & reduce ) const - { - typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType > ReducerConditional; - typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, ArgTag > ValueJoin ; - - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - const QthreadExec & fan = *m_worker_base[j]; - - Impl::spinwait( fan.m_worker_state , QthreadExec::Active ); - - ValueJoin::join( ReducerConditional::select(func , reduce) , m_scratch_alloc , fan.m_scratch_alloc ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - //---------------------------------------- - /** Scall across all workers participating in the 'exec_all' */ - template< class FunctorType , class ArgTag > - inline - void exec_all_scan( const FunctorType & func ) const - { - typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; - typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ; - - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - // Root thread scans across values before releasing threads - // Worker data is in reverse order, so m_worker_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - for ( int i = 1 ; i < m_worker_size ; ++i ) { - ValueOps::copy( func - , m_worker_base[i-1]->m_scratch_alloc - , m_worker_base[i]->m_scratch_alloc - ); - } - - ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc ); - - // Join from lower ranking to higher ranking worker. - // Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2]. - for ( int i = m_worker_size - 1 ; --i > 0 ; ) { - ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc ); - } - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - //---------------------------------------- - - template< class Type> - inline - volatile Type * shepherd_team_scratch_value() const - { return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); } - - template< class Type > - inline - void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const - { - if ( m_shepherd_base ) { - Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value(); - if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; } - memory_fence(); - shepherd_barrier( team_size ); - value = *shared_value ; - } - } - - template< class Type > - inline - Type shepherd_reduce( const int team_size , const Type & value ) const - { - *shepherd_team_scratch_value() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value(); - for ( int i = 1 ; i < n ; ++i ) { - accum += * m_shepherd_base[i]->shepherd_team_scratch_value(); - } - for ( int i = 1 ; i < n ; ++i ) { - * m_shepherd_base[i]->shepherd_team_scratch_value() = accum ; - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value(); - } - - template< class JoinOp > - inline - typename JoinOp::value_type - shepherd_reduce( const int team_size - , const typename JoinOp::value_type & value - , const JoinOp & op ) const - { - typedef typename JoinOp::value_type Type ; - - *shepherd_team_scratch_value() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value(); - for ( int i = 1 ; i < team_size ; ++i ) { - op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value() ); - } - for ( int i = 1 ; i < team_size ; ++i ) { - * m_shepherd_base[i]->shepherd_team_scratch_value() = accum ; - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value(); - } - - template< class Type > - inline - Type shepherd_scan( const int team_size - , const Type & value - , Type * const global_value = 0 ) const - { - *shepherd_team_scratch_value() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - // Root thread scans across values before releasing threads - // Worker data is in reverse order, so m_shepherd_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - - Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value(); - for ( int i = 1 ; i < team_size ; ++i ) { - const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value(); - accum += tmp ; - * m_shepherd_base[i-1]->shepherd_team_scratch_value() = tmp ; - } - - * m_shepherd_base[team_size-1]->shepherd_team_scratch_value() = - global_value ? atomic_fetch_add( global_value , accum ) : 0 ; - - // Join from lower ranking to higher ranking worker. - for ( int i = team_size ; --i ; ) { - * m_shepherd_base[i-1]->shepherd_team_scratch_value() += * m_shepherd_base[i]->shepherd_team_scratch_value(); - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value(); - } - - //---------------------------------------- - - static inline - int align_alloc( int size ) - { - enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */}; - enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; - return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ; - } - - void shared_reset( Qthread::scratch_memory_space & ); - - void * exec_all_reduce_value() const { return m_scratch_alloc ; } - - static void * exec_all_reduce_result(); - - static void resize_worker_scratch( const int reduce_size , const int shared_size ); - static void clear_workers(); - - //---------------------------------------- - - inline int worker_rank() const { return m_worker_rank ; } - inline int worker_size() const { return m_worker_size ; } - inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; } - inline int shepherd_worker_size() const { return m_shepherd_worker_size ; } - inline int shepherd_rank() const { return m_shepherd_rank ; } - inline int shepherd_size() const { return m_shepherd_size ; } - - static int worker_per_shepherd(); -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -class QthreadTeamPolicyMember { -private: - - typedef Kokkos::Qthread execution_space ; - typedef execution_space::scratch_memory_space scratch_memory_space ; - - - Impl::QthreadExec & m_exec ; - scratch_memory_space m_team_shared ; - const int m_team_size ; - const int m_team_rank ; - const int m_league_size ; - const int m_league_end ; - int m_league_rank ; - -public: - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_shmem() const { return m_team_shared ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - {} -#else - { m_exec.shepherd_barrier( m_team_size ); } -#endif - - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_broadcast( value , m_team_size , rank ); } -#endif - - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_reduce( m_team_size , value ); } -#endif - - template< typename JoinOp > - KOKKOS_INLINE_FUNCTION typename JoinOp::value_type - team_reduce( const typename JoinOp::value_type & value - , const JoinOp & op ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return typename JoinOp::value_type(); } -#else - { return m_exec.template shepherd_reduce( m_team_size , value , op ); } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_scan( m_team_size , value ); } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_scan( m_team_size , value , global_accum ); } -#endif - - //---------------------------------------- - // Private driver for task-team parallel - - struct TaskTeam {}; - - QthreadTeamPolicyMember(); - explicit QthreadTeamPolicyMember( const TaskTeam & ); - - //---------------------------------------- - // Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... } - - // Initialize - template< class ... Properties > - QthreadTeamPolicyMember( Impl::QthreadExec & exec - , const Kokkos::Impl::TeamPolicyInternal & team ) - : m_exec( exec ) - , m_team_shared(0,0) - , m_team_size( team.m_team_size ) - , m_team_rank( exec.shepherd_worker_rank() ) - , m_league_size( team.m_league_size ) - , m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) ) - , m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 ) - { - m_exec.shared_reset( m_team_shared ); - } - - // Continue - operator bool () const { return m_league_rank < m_league_end ; } - - // iterate - void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); } -}; - - -template< class ... Properties > -class TeamPolicyInternal< Kokkos::Qthread , Properties ... > - : public PolicyTraits< Properties... > -{ -private: - - const int m_league_size ; - const int m_team_size ; - const int m_shepherd_iter ; - -public: - - //! Tag this class as a kokkos execution policy - typedef TeamPolicyInternal execution_policy ; - typedef Qthread execution_space ; - typedef PolicyTraits< Properties ... > traits ; - - //---------------------------------------- - - template< class FunctorType > - inline static - int team_size_max( const FunctorType & ) - { return Qthread::instance().shepherd_worker_size(); } - - template< class FunctorType > - static int team_size_recommended( const FunctorType & f ) - { return team_size_max( f ); } - - template< class FunctorType > - inline static - int team_size_recommended( const FunctorType & f , const int& ) - { return team_size_max( f ); } - - //---------------------------------------- - - inline int team_size() const { return m_team_size ; } - inline int league_size() const { return m_league_size ; } - - // One active team per shepherd - TeamPolicyInternal( Kokkos::Qthread & q - , const int league_size - , const int team_size - , const int /* vector_length */ = 0 - ) - : m_league_size( league_size ) - , m_team_size( team_size < q.shepherd_worker_size() - ? team_size : q.shepherd_worker_size() ) - , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) - { - } - - // One active team per shepherd - TeamPolicyInternal( const int league_size - , const int team_size - , const int /* vector_length */ = 0 - ) - : m_league_size( league_size ) - , m_team_size( team_size < Qthread::instance().shepherd_worker_size() - ? team_size : Qthread::instance().shepherd_worker_size() ) - , m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() ) - { - } - - typedef Impl::QthreadTeamPolicyMember member_type ; - - friend class Impl::QthreadTeamPolicyMember ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #define KOKKOS_QTHREADEXEC_HPP */ - diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp new file mode 100644 index 0000000000..1b92494084 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp @@ -0,0 +1,519 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#if defined( KOKKOS_ENABLE_QTHREADS ) + +#include +#include +#include +#include +#include + +#include +#include +#include + +// Defines to enable experimental Qthreads functionality. +//#define QTHREAD_LOCAL_PRIORITY +//#define CLONED_TASKS + +//#include + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +namespace { + +enum { MAXIMUM_QTHREADS_WORKERS = 1024 }; + +/** s_exec is indexed by the reverse rank of the workers + * for faster fan-in / fan-out lookups + * [ n - 1, n - 2, ..., 0 ] + */ +QthreadsExec * s_exec[ MAXIMUM_QTHREADS_WORKERS ]; + +int s_number_shepherds = 0; +int s_number_workers_per_shepherd = 0; +int s_number_workers = 0; + +inline +QthreadsExec ** worker_exec() +{ + return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local( NULL ) + 1 ); +} + +const int s_base_size = QthreadsExec::align_alloc( sizeof(QthreadsExec) ); + +int s_worker_reduce_end = 0; // End of worker reduction memory. +int s_worker_shared_end = 0; // Total of worker scratch memory. +int s_worker_shared_begin = 0; // Beginning of worker shared memory. + +QthreadsExecFunctionPointer volatile s_active_function = 0; +const void * volatile s_active_function_arg = 0; + +} // namespace + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +int Qthreads::is_initialized() +{ + return Impl::s_number_workers != 0; +} + +int Qthreads::concurrency() +{ + return Impl::s_number_workers_per_shepherd; +} + +int Qthreads::in_parallel() +{ + return Impl::s_active_function != 0; +} + +void Qthreads::initialize( int thread_count ) +{ + // Environment variable: QTHREAD_NUM_SHEPHERDS + // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP + // Environment variable: QTHREAD_HWPAR + + { + char buffer[256]; + snprintf( buffer, sizeof(buffer), "QTHREAD_HWPAR=%d", thread_count ); + putenv( buffer ); + } + + const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && + ( thread_count == qthread_num_shepherds() * qthread_num_workers_local( NO_SHEPHERD ) ) && + ( thread_count == qthread_num_workers() ); + + bool ok_symmetry = true; + + if ( ok_init ) { + Impl::s_number_shepherds = qthread_num_shepherds(); + Impl::s_number_workers_per_shepherd = qthread_num_workers_local( NO_SHEPHERD ); + Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd; + + for ( int i = 0; ok_symmetry && i < Impl::s_number_shepherds; ++i ) { + ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local( i ) ); + } + } + + if ( ! ok_init || ! ok_symmetry ) { + std::ostringstream msg; + + msg << "Kokkos::Qthreads::initialize(" << thread_count << ") FAILED"; + msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); + msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local( NO_SHEPHERD ); + msg << " : qthread_num_workers = " << qthread_num_workers(); + + if ( ! ok_symmetry ) { + msg << " : qthread_num_workers_local = {"; + for ( int i = 0; i < Impl::s_number_shepherds; ++i ) { + msg << " " << qthread_num_workers_local( i ); + } + msg << " }"; + } + + Impl::s_number_workers = 0; + Impl::s_number_shepherds = 0; + Impl::s_number_workers_per_shepherd = 0; + + if ( ok_init ) { qthread_finalize(); } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + Impl::QthreadsExec::resize_worker_scratch( 256, 256 ); + + // Init the array for used for arbitrarily sized atomics. + Impl::init_lock_array_host_space(); + +} + +void Qthreads::finalize() +{ + Impl::QthreadsExec::clear_workers(); + + if ( Impl::s_number_workers ) { + qthread_finalize(); + } + + Impl::s_number_workers = 0; + Impl::s_number_shepherds = 0; + Impl::s_number_workers_per_shepherd = 0; +} + +void Qthreads::print_configuration( std::ostream & s, const bool detail ) +{ + s << "Kokkos::Qthreads {" + << " num_shepherds(" << Impl::s_number_shepherds << ")" + << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")" + << " }" << std::endl; +} + +Qthreads & Qthreads::instance( int ) +{ + static Qthreads q; + return q; +} + +void Qthreads::fence() +{ +} + +int Qthreads::shepherd_size() const { return Impl::s_number_shepherds; } +int Qthreads::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd; } + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +namespace { + +aligned_t driver_exec_all( void * arg ) +{ + QthreadsExec & exec = **worker_exec(); + + (*s_active_function)( exec, s_active_function_arg ); + +/* + fprintf( stdout + , "QthreadsExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , exec.worker_rank() + , exec.worker_size() + , exec.shepherd_rank() + , exec.shepherd_size() + , exec.shepherd_worker_rank() + , exec.shepherd_worker_size() + ); + fflush(stdout); +*/ + + return 0; +} + +aligned_t driver_resize_worker_scratch( void * arg ) +{ + static volatile int lock_begin = 0; + static volatile int lock_end = 0; + + QthreadsExec ** const exec = worker_exec(); + + //---------------------------------------- + // Serialize allocation for thread safety. + + while ( ! atomic_compare_exchange_strong( & lock_begin, 0, 1 ) ); // Spin wait to claim lock. + + const bool ok = 0 == *exec; + + if ( ok ) { *exec = (QthreadsExec *) malloc( s_base_size + s_worker_shared_end ); } + + lock_begin = 0; // Release lock. + + if ( ok ) { new( *exec ) QthreadsExec(); } + + //---------------------------------------- + // Wait for all calls to complete to insure that each worker has executed. + + if ( s_number_workers == 1 + atomic_fetch_add( & lock_end, 1 ) ) { lock_end = 0; } + + while ( lock_end ); + +/* + fprintf( stdout + , "QthreadsExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , (**exec).worker_rank() + , (**exec).worker_size() + , (**exec).shepherd_rank() + , (**exec).shepherd_size() + , (**exec).shepherd_worker_rank() + , (**exec).shepherd_worker_size() + ); + fflush(stdout); +*/ + + //---------------------------------------- + + if ( ! ok ) { + fprintf( stderr, "Kokkos::QthreadsExec resize failed\n" ); + fflush( stderr ); + } + + return 0; +} + +void verify_is_process( const char * const label, bool not_active = false ) +{ + const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local( NULL ); + const bool is_active = not_active && ( s_active_function || s_active_function_arg ); + + if ( not_process || is_active ) { + std::string msg( label ); + msg.append( " : FAILED" ); + if ( not_process ) msg.append(" : not called by main process"); + if ( is_active ) msg.append(" : parallel execution in progress"); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +} // namespace + +int QthreadsExec::worker_per_shepherd() +{ + return s_number_workers_per_shepherd; +} + +QthreadsExec::QthreadsExec() +{ + const int shepherd_rank = qthread_shep(); + const int shepherd_worker_rank = qthread_worker_local( NULL ); + const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank; + + m_worker_base = s_exec; + m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) ); + m_scratch_alloc = ( (unsigned char *) this ) + s_base_size; + m_reduce_end = s_worker_reduce_end; + m_shepherd_rank = shepherd_rank; + m_shepherd_size = s_number_shepherds; + m_shepherd_worker_rank = shepherd_worker_rank; + m_shepherd_worker_size = s_number_workers_per_shepherd; + m_worker_rank = worker_rank; + m_worker_size = s_number_workers; + m_worker_state = QthreadsExec::Active; +} + +void QthreadsExec::clear_workers() +{ + for ( int iwork = 0; iwork < s_number_workers; ++iwork ) { + QthreadsExec * const exec = s_exec[iwork]; + s_exec[iwork] = 0; + free( exec ); + } +} + +void QthreadsExec::shared_reset( Qthreads::scratch_memory_space & space ) +{ + new( & space ) + Qthreads::scratch_memory_space( + ((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin, + s_worker_shared_end - s_worker_shared_begin + ); +} + +void QthreadsExec::resize_worker_scratch( const int reduce_size, const int shared_size ) +{ + const int exec_all_reduce_alloc = align_alloc( reduce_size ); + const int shepherd_scan_alloc = align_alloc( 8 ); + const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); + + if ( s_worker_reduce_end < exec_all_reduce_alloc || + s_worker_shared_end < shepherd_shared_end ) { + +/* + fprintf( stdout, "QthreadsExec::resize\n"); + fflush(stdout); +*/ + + // Clear current worker memory before allocating new worker memory. + clear_workers(); + + // Increase the buffers to an aligned allocation. + s_worker_reduce_end = exec_all_reduce_alloc; + s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc; + s_worker_shared_end = shepherd_shared_end; + + // Need to query which shepherd this main 'process' is running. + + const int main_shep = qthread_shep(); + + // Have each worker resize its memory for proper first-touch. +#if 0 + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i ) { + qthread_fork_to( driver_resize_worker_scratch, NULL, NULL, jshep ); + } + } +#else + // If this function is used before the 'qthreads.task_policy' unit test, + // the 'qthreads.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_resize_worker_scratch // Function + , NULL // Function data block + , NULL // Pointer to return value feb + , jshep // Shepherd number + , num_clone - 1 // Number of instances - 1 + ); + + assert( ret == QTHREAD_SUCCESS ); + } + } +#endif + + driver_resize_worker_scratch( NULL ); + + // Verify all workers allocated. + + bool ok = true; + for ( int iwork = 0; ok && iwork < s_number_workers; ++iwork ) { ok = 0 != s_exec[iwork]; } + + if ( ! ok ) { + std::ostringstream msg; + msg << "Kokkos::Impl::QthreadsExec::resize : FAILED for workers {"; + for ( int iwork = 0; iwork < s_number_workers; ++iwork ) { + if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } + } + msg << " }"; + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + } +} + +void QthreadsExec::exec_all( Qthreads &, QthreadsExecFunctionPointer func, const void * arg ) +{ + verify_is_process("QthreadsExec::exec_all(...)",true); + +/* + fprintf( stdout, "QthreadsExec::exec_all\n"); + fflush(stdout); +*/ + + s_active_function = func; + s_active_function_arg = arg; + + // Need to query which shepherd this main 'process' is running. + + const int main_shep = qthread_shep(); + +#if 0 + for ( int jshep = 0, iwork = 0; jshep < s_number_shepherds; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i, ++iwork ) { + qthread_fork_to( driver_exec_all, NULL, NULL, jshep ); + } + } +#else + // If this function is used before the 'qthreads.task_policy' unit test, + // the 'qthreads.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_exec_all // Function + , NULL // Function data block + , NULL // Pointer to return value feb + , jshep // Shepherd number + , num_clone - 1 // Number of instances - 1 + ); + + assert(ret == QTHREAD_SUCCESS); + } + } +#endif + + driver_exec_all( NULL ); + + s_active_function = 0; + s_active_function_arg = 0; +} + +void * QthreadsExec::exec_all_reduce_result() +{ + return s_exec[0]->m_scratch_alloc; +} + +} // namespace Impl + +} // namespace Kokkos + +namespace Kokkos { + +namespace Impl { + +QthreadsTeamPolicyMember::QthreadsTeamPolicyMember() + : m_exec( **worker_exec() ) + , m_team_shared( 0, 0 ) + , m_team_size( 1 ) + , m_team_rank( 0 ) + , m_league_size( 1 ) + , m_league_end( 1 ) + , m_league_rank( 0 ) +{ + m_exec.shared_reset( m_team_shared ); +} + +QthreadsTeamPolicyMember::QthreadsTeamPolicyMember( const QthreadsTeamPolicyMember::TaskTeam & ) + : m_exec( **worker_exec() ) + , m_team_shared( 0, 0 ) + , m_team_size( s_number_workers_per_shepherd ) + , m_team_rank( m_exec.shepherd_worker_rank() ) + , m_league_size( 1 ) + , m_league_end( 1 ) + , m_league_rank( 0 ) +{ + m_exec.shared_reset( m_team_shared ); +} + +} // namespace Impl + +} // namespace Kokkos + +#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp new file mode 100644 index 0000000000..64856eb99e --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp @@ -0,0 +1,640 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_QTHREADSEXEC_HPP +#define KOKKOS_QTHREADSEXEC_HPP + +#include + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +class QthreadsExec; + +typedef void (*QthreadsExecFunctionPointer)( QthreadsExec &, const void * ); + +class QthreadsExec { +private: + enum { Inactive = 0, Active = 1 }; + + const QthreadsExec * const * m_worker_base; + const QthreadsExec * const * m_shepherd_base; + + void * m_scratch_alloc; ///< Scratch memory [ reduce, team, shared ] + int m_reduce_end; ///< End of scratch reduction memory + + int m_shepherd_rank; + int m_shepherd_size; + + int m_shepherd_worker_rank; + int m_shepherd_worker_size; + + /* + * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank + * m_worker_size = m_shepherd_size * m_shepherd_worker_size + */ + int m_worker_rank; + int m_worker_size; + + int mutable volatile m_worker_state; + + friend class Kokkos::Qthreads; + + ~QthreadsExec(); + QthreadsExec( const QthreadsExec & ); + QthreadsExec & operator = ( const QthreadsExec & ); + +public: + QthreadsExec(); + + /** Execute the input function on all available Qthreads workers. */ + static void exec_all( Qthreads &, QthreadsExecFunctionPointer, const void * ); + + /** Barrier across all workers participating in the 'exec_all'. */ + void exec_all_barrier() const + { + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_worker_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + /** Barrier across workers within the shepherd with rank < team_rank. */ + void shepherd_barrier( const int team_size ) const + { + if ( m_shepherd_worker_rank < team_size ) { + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + } + } + + /** Reduce across all workers participating in the 'exec_all'. */ + template< class FunctorType, class ReducerType, class ArgTag > + inline + void exec_all_reduce( const FunctorType & func, const ReducerType & reduce ) const + { + typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType > ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, ArgTag > ValueJoin; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + const QthreadsExec & fan = *m_worker_base[j]; + + Impl::spinwait_while_equal( fan.m_worker_state, QthreadsExec::Active ); + + ValueJoin::join( ReducerConditional::select( func, reduce ), m_scratch_alloc, fan.m_scratch_alloc ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + /** Scan across all workers participating in the 'exec_all'. */ + template< class FunctorType, class ArgTag > + inline + void exec_all_scan( const FunctorType & func ) const + { + typedef Kokkos::Impl::FunctorValueInit< FunctorType, ArgTag > ValueInit; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType, ArgTag > ValueJoin; + typedef Kokkos::Impl::FunctorValueOps< FunctorType, ArgTag > ValueOps; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_worker_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads. + // Worker data is in reverse order, so m_worker_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + for ( int i = 1; i < m_worker_size; ++i ) { + ValueOps::copy( func + , m_worker_base[i-1]->m_scratch_alloc + , m_worker_base[i]->m_scratch_alloc + ); + } + + ValueInit::init( func, m_worker_base[m_worker_size-1]->m_scratch_alloc ); + + // Join from lower ranking to higher ranking worker. + // Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2]. + for ( int i = m_worker_size - 1; --i > 0; ) { + ValueJoin::join( func, m_worker_base[i-1]->m_scratch_alloc, m_worker_base[i]->m_scratch_alloc ); + } + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + //---------------------------------------- + + template< class Type > + inline + volatile Type * shepherd_team_scratch_value() const + { return (volatile Type*)( ( (unsigned char *) m_scratch_alloc ) + m_reduce_end ); } + + template< class Type > + inline + void shepherd_broadcast( Type & value, const int team_size, const int team_rank ) const + { + if ( m_shepherd_base ) { + Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value(); + if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value; } + memory_fence(); + shepherd_barrier( team_size ); + value = *shared_value; + } + } + + template< class Type > + inline + Type shepherd_reduce( const int team_size, const Type & value ) const + { + volatile Type * const shared_value = shepherd_team_scratch_value(); + *shared_value = value; +// *shepherd_team_scratch_value() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + Type & accum = *m_shepherd_base[0]->shepherd_team_scratch_value(); + for ( int i = 1; i < n; ++i ) { + accum += *m_shepherd_base[i]->shepherd_team_scratch_value(); + } + for ( int i = 1; i < n; ++i ) { + *m_shepherd_base[i]->shepherd_team_scratch_value() = accum; + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value(); + } + + template< class JoinOp > + inline + typename JoinOp::value_type + shepherd_reduce( const int team_size + , const typename JoinOp::value_type & value + , const JoinOp & op ) const + { + typedef typename JoinOp::value_type Type; + + volatile Type * const shared_value = shepherd_team_scratch_value(); + *shared_value = value; +// *shepherd_team_scratch_value() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + volatile Type & accum = *m_shepherd_base[0]->shepherd_team_scratch_value(); + for ( int i = 1; i < team_size; ++i ) { + op.join( accum, *m_shepherd_base[i]->shepherd_team_scratch_value() ); + } + for ( int i = 1; i < team_size; ++i ) { + *m_shepherd_base[i]->shepherd_team_scratch_value() = accum; + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value(); + } + + template< class Type > + inline + Type shepherd_scan( const int team_size + , const Type & value + , Type * const global_value = 0 ) const + { + *shepherd_team_scratch_value() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads. + // Worker data is in reverse order, so m_shepherd_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + + Type accum = *m_shepherd_base[0]->shepherd_team_scratch_value(); + for ( int i = 1; i < team_size; ++i ) { + const Type tmp = *m_shepherd_base[i]->shepherd_team_scratch_value(); + accum += tmp; + *m_shepherd_base[i-1]->shepherd_team_scratch_value() = tmp; + } + + *m_shepherd_base[team_size-1]->shepherd_team_scratch_value() = + global_value ? atomic_fetch_add( global_value, accum ) : 0; + + // Join from lower ranking to higher ranking worker. + for ( int i = team_size; --i; ) { + *m_shepherd_base[i-1]->shepherd_team_scratch_value() += *m_shepherd_base[i]->shepherd_team_scratch_value(); + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value(); + } + + //---------------------------------------- + + static inline + int align_alloc( int size ) + { + enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */ }; + enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; + return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK; + } + + void shared_reset( Qthreads::scratch_memory_space & ); + + void * exec_all_reduce_value() const { return m_scratch_alloc; } + + static void * exec_all_reduce_result(); + + static void resize_worker_scratch( const int reduce_size, const int shared_size ); + static void clear_workers(); + + //---------------------------------------- + + inline int worker_rank() const { return m_worker_rank; } + inline int worker_size() const { return m_worker_size; } + inline int shepherd_worker_rank() const { return m_shepherd_worker_rank; } + inline int shepherd_worker_size() const { return m_shepherd_worker_size; } + inline int shepherd_rank() const { return m_shepherd_rank; } + inline int shepherd_size() const { return m_shepherd_size; } + + static int worker_per_shepherd(); +}; + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +class QthreadsTeamPolicyMember { +private: + typedef Kokkos::Qthreads execution_space; + typedef execution_space::scratch_memory_space scratch_memory_space; + + Impl::QthreadsExec & m_exec; + scratch_memory_space m_team_shared; + const int m_team_size; + const int m_team_rank; + const int m_league_size; + const int m_league_end; + int m_league_rank; + +public: + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const { return m_team_shared; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + {} +#else + { m_exec.shepherd_barrier( m_team_size ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value, int rank ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_broadcast( value, m_team_size, rank ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_reduce( m_team_size, value ); } +#endif + + template< typename JoinOp > + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type + team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return typename JoinOp::value_type(); } +#else + { return m_exec.template shepherd_reduce( m_team_size, value, op ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan( m_team_size, value ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the league's + * parallel execution, be the scan's total. Parallel execution ordering of + * the league's teams is non-deterministic. As such the base value for each + * team's scan operation is similarly non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value, Type * const global_accum ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan( m_team_size, value, global_accum ); } +#endif + + //---------------------------------------- + // Private driver for task-team parallel. + + struct TaskTeam {}; + + QthreadsTeamPolicyMember(); + explicit QthreadsTeamPolicyMember( const TaskTeam & ); + + //---------------------------------------- + // Private for the driver ( for ( member_type i( exec, team ); i; i.next_team() ) { ... } + + // Initialize. + template< class ... Properties > + QthreadsTeamPolicyMember( Impl::QthreadsExec & exec + , const Kokkos::Impl::TeamPolicyInternal< Qthreads, Properties... > & team ) + : m_exec( exec ) + , m_team_shared( 0, 0 ) + , m_team_size( team.m_team_size ) + , m_team_rank( exec.shepherd_worker_rank() ) + , m_league_size( team.m_league_size ) + , m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) ) + , m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 ) + { + m_exec.shared_reset( m_team_shared ); + } + + // Continue. + operator bool () const { return m_league_rank < m_league_end; } + + // Iterate. + void next_team() { ++m_league_rank; m_exec.shared_reset( m_team_shared ); } +}; + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Qthreads, Properties ... > + : public PolicyTraits< Properties... > +{ +private: + const int m_league_size; + const int m_team_size; + const int m_shepherd_iter; + +public: + //! Tag this class as a kokkos execution policy. + typedef TeamPolicyInternal execution_policy; + typedef Qthreads execution_space; + typedef PolicyTraits< Properties ... > traits; + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return Qthreads::instance().shepherd_worker_size(); } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & f ) + { return team_size_max( f ); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType & f, const int& ) + { return team_size_max( f ); } + + //---------------------------------------- + + inline int team_size() const { return m_team_size; } + inline int league_size() const { return m_league_size; } + + // One active team per shepherd. + TeamPolicyInternal( Kokkos::Qthreads & q + , const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < q.shepherd_worker_size() + ? team_size : q.shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) + {} + + // TODO: Make sure this is correct. + // One active team per shepherd. + TeamPolicyInternal( Kokkos::Qthreads & q + , const int league_size + , const Kokkos::AUTO_t & /* team_size_request */ + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( q.shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) + {} + + // One active team per shepherd. + TeamPolicyInternal( const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < Qthreads::instance().shepherd_worker_size() + ? team_size : Qthreads::instance().shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + Qthreads::instance().shepherd_size() - 1 ) / Qthreads::instance().shepherd_size() ) + {} + + // TODO: Make sure this is correct. + // One active team per shepherd. + TeamPolicyInternal( const int league_size + , const Kokkos::AUTO_t & /* team_size_request */ + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( Qthreads::instance().shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + Qthreads::instance().shepherd_size() - 1 ) / Qthreads::instance().shepherd_size() ) + {} + + // TODO: Doesn't do anything yet. Fix this. + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; +// p.m_chunk_size = chunk_size_; + return p; + } + + typedef Impl::QthreadsTeamPolicyMember member_type; + + friend class Impl::QthreadsTeamPolicyMember; +}; + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif // #define KOKKOS_QTHREADSEXEC_HPP diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp similarity index 86% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index cb5b180948..9f99607540 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -41,8 +41,8 @@ //@HEADER */ -#ifndef KOKKOS_QTHREAD_PARALLEL_HPP -#define KOKKOS_QTHREAD_PARALLEL_HPP +#ifndef KOKKOS_QTHREADS_PARALLEL_HPP +#define KOKKOS_QTHREADS_PARALLEL_HPP #include @@ -51,7 +51,7 @@ #include #include -#include +#include //---------------------------------------------------------------------------- @@ -63,7 +63,7 @@ namespace Impl { template< class FunctorType , class ... Traits > class ParallelFor< FunctorType , Kokkos::RangePolicy< Traits ... > - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -99,7 +99,7 @@ private: } // Function is called once by every concurrent thread. - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelFor & self = * ((const ParallelFor *) arg ); @@ -116,7 +116,7 @@ public: inline void execute() const { - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelFor::exec , this ); } @@ -134,7 +134,7 @@ template< class FunctorType , class ReducerType , class ... Traits > class ParallelReduce< FunctorType , Kokkos::RangePolicy< Traits ... > , ReducerType - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -186,7 +186,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelReduce & self = * ((const ParallelReduce *) arg ); @@ -205,10 +205,10 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + QthreadsExec::resize_worker_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelReduce::exec , this ); - const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data ); @@ -246,11 +246,11 @@ public: template< class FunctorType , class ... Properties > class ParallelFor< FunctorType , TeamPolicy< Properties ... > - , Kokkos::Qthread > + , Kokkos::Qthreads > { private: - typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthreads , Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; @@ -282,7 +282,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelFor & self = * ((const ParallelFor *) arg ); @@ -297,10 +297,10 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch + QthreadsExec::resize_worker_scratch ( /* reduction memory */ 0 , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelFor::exec , this ); } ParallelFor( const FunctorType & arg_functor , @@ -316,12 +316,12 @@ template< class FunctorType , class ReducerType , class ... Properties > class ParallelReduce< FunctorType , TeamPolicy< Properties... > , ReducerType - , Kokkos::Qthread + , Kokkos::Qthreads > { private: - typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthreads , Properties ... > Policy ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::member_type Member ; @@ -365,7 +365,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelReduce & self = * ((const ParallelReduce *) arg ); @@ -383,13 +383,13 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch + QthreadsExec::resize_worker_scratch ( /* reduction memory */ ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelReduce::exec , this ); - const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data ); @@ -429,7 +429,7 @@ public: template< class FunctorType , class ... Traits > class ParallelScan< FunctorType , Kokkos::RangePolicy< Traits ... > - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -474,7 +474,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelScan & self = * ((const ParallelScan *) arg ); @@ -497,8 +497,8 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::exec , this ); + QthreadsExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelScan::exec , this ); } ParallelScan( const FunctorType & arg_functor @@ -521,37 +521,37 @@ namespace Kokkos { template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember > -TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType& count ) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember > +TeamThreadRange( const Impl::QthreadsTeamPolicyMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, count ); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember >( thread, count ); } template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::QthreadTeamPolicyMember > -TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType1 & begin, const iType2 & end ) + Impl::QthreadsTeamPolicyMember > +TeamThreadRange( const Impl::QthreadsTeamPolicyMember& thread, const iType1 & begin, const iType2 & end ) { typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, iType(begin), iType(end) ); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember >( thread, iType(begin), iType(end) ); } template KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct(thread,count); +Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::QthreadsTeamPolicyMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct PerTeam(const Impl::QthreadTeamPolicyMember& thread) { - return Impl::ThreadSingleStruct(thread); +Impl::ThreadSingleStruct PerTeam(const Impl::QthreadsTeamPolicyMember& thread) { + return Impl::ThreadSingleStruct(thread); } KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct PerThread(const Impl::QthreadTeamPolicyMember& thread) { - return Impl::VectorSingleStruct(thread); +Impl::VectorSingleStruct PerThread(const Impl::QthreadsTeamPolicyMember& thread) { + return Impl::VectorSingleStruct(thread); } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. @@ -560,7 +560,7 @@ Impl::VectorSingleStruct PerThread(const Impl::Qt * This functionality requires C++11 support.*/ template KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda& lambda) { +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda& lambda) { for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) lambda(i); } @@ -571,7 +571,7 @@ void parallel_for(const Impl::TeamThreadRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); @@ -595,7 +595,7 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { ValueType result = init_result; @@ -615,7 +615,7 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct& +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda& lambda) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep @@ -630,7 +630,7 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP @@ -652,7 +652,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { ValueType result = init_result; @@ -679,7 +679,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct& +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const FunctorType & lambda) { typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; @@ -697,25 +697,25 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda) { +void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda) { lambda(); } template KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda) { +void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda) { if(single_struct.team_member.team_rank()==0) lambda(); } template KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { +void single(const Impl::VectorSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { lambda(val); } template KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { +void single(const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { if(single_struct.team_member.team_rank()==0) { lambda(val); } @@ -724,4 +724,4 @@ void single(const Impl::ThreadSingleStruct& singl } // namespace Kokkos -#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */ +#endif /* #define KOKKOS_QTHREADS_PARALLEL_HPP */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp new file mode 100644 index 0000000000..614a2c03f0 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp @@ -0,0 +1,320 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::Qthreads > ; + +//---------------------------------------------------------------------------- + +TaskExec< Kokkos::Qthreads >::TaskExec() + : m_self_exec( 0 ), + m_team_exec( 0 ), + m_sync_mask( 0 ), + m_sync_value( 0 ), + m_sync_step( 0 ), + m_group_rank( 0 ), + m_team_rank( 0 ), + m_team_size( 1 ) +{} + +TaskExec< Kokkos::Qthreads >:: +TaskExec( Kokkos::Impl::QthreadsExec & arg_exec, int const arg_team_size ) + : m_self_exec( & arg_exec ), + m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ), + m_sync_mask( 0 ), + m_sync_value( 0 ), + m_sync_step( 0 ), + m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ), + m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ), + m_team_size( arg_team_size ) +{ + // This team spans + // m_self_exec->pool_rev( team_size * group_rank ) + // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) + + int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); + + sync[0] = int64_t(0) ; + sync[1] = int64_t(0) ; + + for ( int i = 0 ; i < m_team_size ; ++i ) { + m_sync_value |= int64_t(1) << (8*i); + m_sync_mask |= int64_t(3) << (8*i); + } + + Kokkos::memory_fence(); +} + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void TaskExec< Kokkos::Qthreads >::team_barrier() const +{ + if ( 1 < m_team_size ) { + + if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { + Kokkos::abort("TaskQueue scratch_reduce memory too small"); + } + + // Use team shared memory to synchronize. + // Alternate memory locations between barriers to avoid a sequence + // of barriers overtaking one another. + + int64_t volatile * const sync = + ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); + + // This team member sets one byte within the sync variable + int8_t volatile * const sync_self = + ((int8_t *) sync) + m_team_rank ; + +#if 0 +fprintf( stdout, + "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n", + m_group_rank, + m_team_rank, + m_sync_step, + m_sync_value, + *sync + ); +fflush(stdout); +#endif + + *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival + + while ( m_sync_value != *sync ); // wait for team to arrive + +#if 0 +fprintf( stdout, + "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n", + m_group_rank, + m_team_rank, + m_sync_step, + m_sync_value, + *sync + ); +fflush(stdout); +#endif + + ++m_sync_step ; + + if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step + m_sync_value ^= m_sync_mask ; + if ( 1000 < m_sync_step ) m_sync_step = 0 ; + } + } +} + +#endif + +//---------------------------------------------------------------------------- + +void TaskQueueSpecialization< Kokkos::Qthreads >::execute + ( TaskQueue< Kokkos::Qthreads > * const queue ) +{ + using execution_space = Kokkos::Qthreads ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space, void, void > ; + using PoolExec = Kokkos::Impl::QthreadsExec ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + // Required: team_size <= 8 + + const int team_size = PoolExec::pool_size(2); // Threads per core + // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + + if ( 8 < team_size ) { + Kokkos::abort("TaskQueue unsupported team size"); + } + +#pragma omp parallel + { + PoolExec & self = *PoolExec::get_thread_omp(); + + Member single_exec ; + Member team_exec( self, team_size ); + + // Team shared memory + task_root_type * volatile * const task_shared = + (task_root_type **) team_exec.m_team_exec->scratch_thread(); + +// Barrier across entire Qthreads thread pool to insure initialization +#pragma omp barrier + + // Loop until all queues are empty and no tasks in flight + + do { + + // Each team lead attempts to acquire either a thread team task + // or collection of single thread tasks for the team. + + if ( 0 == team_exec.team_rank() ) { + + task_root_type * tmp = + 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == tmp ; ++i ) { + for ( int j = 0 ; j < 2 && end == tmp ; ++j ) { + tmp = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + *task_shared = tmp ; + + // Fence to be sure shared_task_array is stored + Kokkos::memory_fence(); + } + + // Whole team waits for every team member to reach this statement + team_exec.team_barrier(); + + Kokkos::memory_fence(); + + task_root_type * const task = *task_shared ; + +#if 0 +fprintf( stdout, + "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n", + team_exec.m_group_rank, + team_exec.m_team_rank, + uintptr_t(task_shared), + uintptr_t(task) + ); +fflush(stdout); +#endif + + if ( 0 == task ) break ; // 0 == m_ready_count + + if ( end == task ) { + team_exec.team_barrier(); + } + else if ( task_root_type::TaskTeam == task->m_task_type ) { + // Thread Team Task + (*task->m_apply)( task, & team_exec ); + + // The m_apply function performs a barrier + + if ( 0 == team_exec.team_rank() ) { + // team member #0 completes the task, which may delete the task + queue->complete( task ); + } + } + else { + // Single Thread Task + + if ( 0 == team_exec.team_rank() ) { + + (*task->m_apply)( task, & single_exec ); + + queue->complete( task ); + } + + // All team members wait for whole team to reach this statement. + // Not necessary to complete the task. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + } while(1); + } +// END #pragma omp parallel + +} + +void TaskQueueSpecialization< Kokkos::Qthreads >:: + iff_single_thread_recursive_execute + ( TaskQueue< Kokkos::Qthreads > * const queue ) +{ + using execution_space = Kokkos::Qthreads ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space, void, void > ; + using Member = TaskExec< execution_space > ; + + if ( 1 == omp_get_num_threads() ) { + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member single_exec ; + + task_root_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task, & single_exec ); + + queue->complete( task ); + + } while(1); + } +} + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp new file mode 100644 index 0000000000..836452dde9 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP +#define KOKKOS_IMPL_QTHREADS_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskQueueSpecialization< Kokkos::Qthreads > +{ +public: + + using execution_space = Kokkos::Qthreads ; + using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; + using task_base_type = Kokkos::Impl::TaskBase< execution_space, void, void > ; + + // Must specify memory space + using memory_space = Kokkos::HostSpace ; + + static + void iff_single_thread_recursive_execute( queue_type * const ); + + // Must provide task queue execution function + static void execute( queue_type * const ); + + // Must provide mechanism to set function pointer in + // execution space from the host process. + template< typename FunctorType > + static + void proc_set_apply( task_base_type::function_type * ptr ) + { + using TaskType = TaskBase< execution_space, + typename FunctorType::value_type, + FunctorType + > ; + *ptr = TaskType::apply ; + } +}; + +extern template class TaskQueue< Kokkos::Qthreads > ; + +//---------------------------------------------------------------------------- + +template<> +class TaskExec< Kokkos::Qthreads > +{ +private: + + TaskExec( TaskExec && ) = delete ; + TaskExec( TaskExec const & ) = delete ; + TaskExec & operator = ( TaskExec && ) = delete ; + TaskExec & operator = ( TaskExec const & ) = delete ; + + + using PoolExec = Kokkos::Impl::QthreadsExec ; + + friend class Kokkos::Impl::TaskQueue< Kokkos::Qthreads > ; + friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Qthreads > ; + + PoolExec * const m_self_exec ; ///< This thread's thread pool data structure + PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure + int64_t m_sync_mask ; + int64_t mutable m_sync_value ; + int mutable m_sync_step ; + int m_group_rank ; ///< Which "team" subset of thread pool + int m_team_rank ; ///< Which thread within a team + int m_team_size ; + + TaskExec(); + TaskExec( PoolExec & arg_exec, int arg_team_size ); + +public: + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + void * team_shared() const + { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } + + int team_shared_size() const + { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } + + /**\brief Whole team enters this function call + * before any teeam member returns from + * this function call. + */ + void team_barrier() const ; +#else + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } +#endif + + KOKKOS_INLINE_FUNCTION + int team_rank() const { return m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const { return m_team_size ; } +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old similarity index 91% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old index 50444177ce..aa159cff6a 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old @@ -41,11 +41,11 @@ //@HEADER */ -// Experimental unified task-data parallel manycore LDRD +// Experimental unified task-data parallel manycore LDRD. #include -#if defined( KOKKOS_ENABLE_QTHREAD ) +#if defined( KOKKOS_ENABLE_QTHREADS ) #include @@ -56,17 +56,15 @@ #include #include -#include +#include #if defined( KOKKOS_ENABLE_TASKDAG ) -//---------------------------------------------------------------------------- - namespace Kokkos { namespace Experimental { namespace Impl { -typedef TaskMember< Kokkos::Qthread , void , void > Task ; +typedef TaskMember< Kokkos::Qthreads , void , void > Task ; namespace { @@ -173,16 +171,16 @@ Task::TaskMember( const function_dealloc_type arg_dealloc void Task::throw_error_add_dependence() const { - std::cerr << "TaskMember< Qthread >::add_dependence ERROR" + std::cerr << "TaskMember< Qthreads >::add_dependence ERROR" << " state(" << m_state << ")" << " dep_size(" << m_dep_size << ")" << std::endl ; - throw std::runtime_error("TaskMember< Qthread >::add_dependence ERROR"); + throw std::runtime_error("TaskMember< Qthreads >::add_dependence ERROR"); } void Task::throw_error_verify_type() { - throw std::runtime_error("TaskMember< Qthread >::verify_type ERROR"); + throw std::runtime_error("TaskMember< Qthreads >::verify_type ERROR"); } //---------------------------------------------------------------------------- @@ -190,7 +188,7 @@ void Task::throw_error_verify_type() #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) { - static const char msg_error_header[] = "Kokkos::Impl::TaskManager::assign ERROR" ; + static const char msg_error_header[] = "Kokkos::Impl::TaskManager::assign ERROR" ; static const char msg_error_count[] = ": negative reference count" ; static const char msg_error_complete[] = ": destroy task that is not complete" ; static const char msg_error_dependences[] = ": destroy task that has dependences" ; @@ -294,7 +292,7 @@ fflush(stdout); assign( & m_dep[i] , 0 ); } - // Set qthread FEB to full so that dependent tasks are allowed to execute. + // Set Qthreads FEB to full so that dependent tasks are allowed to execute. // This 'task' may be deleted immediately following this function call. qthread_fill( & m_qfeb ); @@ -319,10 +317,10 @@ aligned_t Task::qthread_func( void * arg ) ); if ( task->m_apply_team && ! task->m_apply_single ) { - Kokkos::Impl::QthreadTeamPolicyMember::TaskTeam task_team_tag ; + Kokkos::Impl::QthreadsTeamPolicyMember::TaskTeam task_team_tag ; // Initialize team size and rank with shephered info - Kokkos::Impl::QthreadTeamPolicyMember member( task_team_tag ); + Kokkos::Impl::QthreadsTeamPolicyMember member( task_team_tag ); (*task->m_apply_team)( task , member ); @@ -344,7 +342,7 @@ fflush(stdout); } else if ( task->m_apply_team && task->m_apply_single == reinterpret_cast(1) ) { // Team hard-wired to one, no cloning - Kokkos::Impl::QthreadTeamPolicyMember member ; + Kokkos::Impl::QthreadsTeamPolicyMember member ; (*task->m_apply_team)( task , member ); task->closeout(); } @@ -384,8 +382,8 @@ void Task::schedule() // Increment active task count before spawning. Kokkos::atomic_increment( m_active_count ); - // spawn in qthread. must malloc the precondition array and give to qthread. - // qthread will eventually free this allocation so memory will not be leaked. + // spawn in Qthreads. must malloc the precondition array and give to Qthreads. + // Qthreads will eventually free this allocation so memory will not be leaked. // concern with thread safety of malloc, does this need to be guarded? aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) ); @@ -393,7 +391,7 @@ void Task::schedule() qprecon[0] = reinterpret_cast( uintptr_t(m_dep_size) ); for ( int i = 0 ; i < m_dep_size ; ++i ) { - qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag + qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthreads precondition flag } if ( m_apply_team && ! m_apply_single ) { @@ -446,7 +444,7 @@ fflush(stdout); namespace Kokkos { namespace Experimental { -TaskPolicy< Kokkos::Qthread >:: +TaskPolicy< Kokkos::Qthreads >:: TaskPolicy ( const unsigned /* arg_task_max_count */ , const unsigned /* arg_task_max_size */ @@ -462,7 +460,7 @@ TaskPolicy if ( m_team_size != 1 && m_team_size != num_worker_per_shepherd ) { std::ostringstream msg ; - msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthread >( " + msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads >( " << "default_depedence = " << arg_task_default_dependence_capacity << " , team_size = " << arg_task_team_size << " ) ERROR, valid team_size arguments are { (omitted) , 1 , " << num_worker_per_shepherd << " }" ; @@ -470,14 +468,14 @@ TaskPolicy } } -TaskPolicy< Kokkos::Qthread >::member_type & -TaskPolicy< Kokkos::Qthread >::member_single() +TaskPolicy< Kokkos::Qthreads >::member_type & +TaskPolicy< Kokkos::Qthreads >::member_single() { static member_type s ; return s ; } -void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) +void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads > & policy ) { volatile int * const active_task_count = & policy.m_active_count ; while ( *active_task_count ) qthread_yield(); @@ -486,6 +484,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) } // namespace Experimental } // namespace Kokkos -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */ - +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) +#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old similarity index 90% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old index 565dbf7e61..1e5a4dc593 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old @@ -43,15 +43,15 @@ // Experimental unified task-data parallel manycore LDRD -#ifndef KOKKOS_QTHREAD_TASKSCHEDULER_HPP -#define KOKKOS_QTHREAD_TASKSCHEDULER_HPP +#ifndef KOKKOS_QTHREADS_TASKSCHEDULER_HPP +#define KOKKOS_QTHREADS_TASKSCHEDULER_HPP #include #include #include //---------------------------------------------------------------------------- -// Defines to enable experimental Qthread functionality +// Defines to enable experimental Qthreads functionality #define QTHREAD_LOCAL_PRIORITY #define CLONED_TASKS @@ -63,7 +63,7 @@ //---------------------------------------------------------------------------- -#include +#include #include #include @@ -78,13 +78,13 @@ namespace Experimental { namespace Impl { template<> -class TaskMember< Kokkos::Qthread , void , void > +class TaskMember< Kokkos::Qthreads , void , void > { public: typedef TaskMember * (* function_verify_type) ( TaskMember * ); typedef void (* function_single_type) ( TaskMember * ); - typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadTeamPolicyMember & ); + typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadsTeamPolicyMember & ); typedef void (* function_dealloc_type)( TaskMember * ); private: @@ -94,7 +94,7 @@ private: const function_single_type m_apply_single ; ///< Apply function const function_team_type m_apply_team ; ///< Apply function int volatile * const m_active_count ; ///< Count of active tasks on this policy - aligned_t m_qfeb ; ///< Qthread full/empty bit + aligned_t m_qfeb ; ///< Qthreads full/empty bit TaskMember ** const m_dep ; ///< Dependences const int m_dep_capacity ; ///< Capacity of dependences int m_dep_size ; ///< Actual count of dependences @@ -129,7 +129,7 @@ protected : ~TaskMember(); - // Used by TaskMember< Qthread , ResultType , void > + // Used by TaskMember< Qthreads , ResultType , void > TaskMember( const function_verify_type arg_verify , const function_dealloc_type arg_dealloc , const function_single_type arg_apply_single @@ -139,7 +139,7 @@ protected : , const unsigned arg_dependence_capacity ); - // Used for TaskMember< Qthread , void , void > + // Used for TaskMember< Qthreads , void , void > TaskMember( const function_dealloc_type arg_dealloc , const function_single_type arg_apply_single , const function_team_type arg_apply_team @@ -175,15 +175,15 @@ public: /* Inheritence Requirements on task types: * typedef FunctorType::value_type value_type ; * class DerivedTaskType - * : public TaskMember< Qthread , value_type , FunctorType > + * : public TaskMember< Qthreads , value_type , FunctorType > * { ... }; - * class TaskMember< Qthread , value_type , FunctorType > - * : public TaskMember< Qthread , value_type , void > + * class TaskMember< Qthreads , value_type , FunctorType > + * : public TaskMember< Qthreads , value_type , void > * , public Functor * { ... }; * If value_type != void - * class TaskMember< Qthread , value_type , void > - * : public TaskMember< Qthread , void , void > + * class TaskMember< Qthreads , value_type , void > + * : public TaskMember< Qthreads , void , void > * * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] * @@ -300,10 +300,10 @@ public: KOKKOS_INLINE_FUNCTION static void apply_single( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - // TaskMember< Kokkos::Qthread , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthread , ResultType , void > + // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthreads , ResultType , void > // , public FunctorType // { ... }; @@ -316,10 +316,10 @@ public: KOKKOS_INLINE_FUNCTION static void apply_single( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - // TaskMember< Kokkos::Qthread , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthread , ResultType , void > + // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthreads , ResultType , void > // , public FunctorType // { ... }; @@ -333,9 +333,9 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static void apply_team( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadTeamPolicyMember & member ) + , Kokkos::Impl::QthreadsTeamPolicyMember & member ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; derived_type & m = * static_cast< derived_type * >( t ); @@ -345,9 +345,9 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static void apply_team( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadTeamPolicyMember & member ) + , Kokkos::Impl::QthreadsTeamPolicyMember & member ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; derived_type & m = * static_cast< derived_type * >( t ); @@ -356,7 +356,7 @@ public: }; //---------------------------------------------------------------------------- -/** \brief Base class for tasks with a result value in the Qthread execution space. +/** \brief Base class for tasks with a result value in the Qthreads execution space. * * The FunctorType must be void because this class is accessed by the * Future class for the task and result value. @@ -365,8 +365,8 @@ public: * can correctly static_cast from the 'root class' to this class. */ template < class ResultType > -class TaskMember< Kokkos::Qthread , ResultType , void > - : public TaskMember< Kokkos::Qthread , void , void > +class TaskMember< Kokkos::Qthreads , ResultType , void > + : public TaskMember< Kokkos::Qthreads , void , void > { public: @@ -379,7 +379,7 @@ public: protected: - typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; + typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; typedef task_root_type::function_dealloc_type function_dealloc_type ; typedef task_root_type::function_single_type function_single_type ; typedef task_root_type::function_team_type function_team_type ; @@ -404,16 +404,16 @@ protected: }; template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Qthread , ResultType , FunctorType > - : public TaskMember< Kokkos::Qthread , ResultType , void > +class TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + : public TaskMember< Kokkos::Qthreads , ResultType , void > , public FunctorType { public: typedef FunctorType functor_type ; - typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; - typedef TaskMember< Kokkos::Qthread , ResultType , void > task_base_type ; + typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , void > task_base_type ; typedef task_root_type::function_dealloc_type function_dealloc_type ; typedef task_root_type::function_single_type function_single_type ; typedef task_root_type::function_team_type function_team_type ; @@ -447,16 +447,16 @@ public: namespace Kokkos { namespace Experimental { -void wait( TaskPolicy< Kokkos::Qthread > & ); +void wait( TaskPolicy< Kokkos::Qthreads > & ); template<> -class TaskPolicy< Kokkos::Qthread > +class TaskPolicy< Kokkos::Qthreads > { public: - typedef Kokkos::Qthread execution_space ; + typedef Kokkos::Qthreads execution_space ; typedef TaskPolicy execution_policy ; - typedef Kokkos::Impl::QthreadTeamPolicyMember member_type ; + typedef Kokkos::Impl::QthreadsTeamPolicyMember member_type ; private: @@ -650,7 +650,7 @@ public: static member_type & member_single(); - friend void wait( TaskPolicy< Kokkos::Qthread > & ); + friend void wait( TaskPolicy< Kokkos::Qthreads > & ); }; } /* namespace Experimental */ @@ -660,5 +660,5 @@ public: //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #define KOKKOS_QTHREAD_TASK_HPP */ +#endif /* #define KOKKOS_QTHREADS_TASK_HPP */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp new file mode 100644 index 0000000000..55235cd6d2 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp @@ -0,0 +1,319 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Manage task allocation, deallocation, and scheduling. + * + * Task execution is handled here directly for the Qthread implementation. + */ +template<> +class TaskQueue< Kokkos::Qthread > { +private: + + using execution_space = Kokkos::Qthread ; + using memory_space = Kokkos::HostSpace + using device_type = Kokkos::Device< execution_space, memory_space > ; + using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ; + using task_root_type = Kokkos::Impl::TaskBase< execution_space, void, void > ; + + friend class Kokkos::TaskScheduler< execution_space > ; + + struct Destroy { + TaskQueue * m_queue ; + void destroy_shared_allocation(); + }; + + //---------------------------------------- + + enum : int { TASK_STATE_NULL = 0, ///< Does not exist + TASK_STATE_CONSTRUCTING = 1, ///< Is under construction + TASK_STATE_WAITING = 2, ///< Is waiting for execution + TASK_STATE_EXECUTING = 4, ///< Is executing + TASK_STATE_RESPAWN = 8, ///< Requested respawn + TASK_STATE_COMPLETE = 16 ///< Execution is complete + }; + + // Queue is organized as [ priority ][ type ] + + memory_pool m_memory ; + unsigned m_team_size ; // Number of threads in a team + long m_accum_alloc ; // Accumulated number of allocations + int m_count_alloc ; // Current number of allocations + int m_max_alloc ; // Maximum number of allocations + int m_ready_count ; // Number of ready or executing + + //---------------------------------------- + + ~TaskQueue(); + TaskQueue() = delete ; + TaskQueue( TaskQueue && ) = delete ; + TaskQueue( TaskQueue const & ) = delete ; + TaskQueue & operator = ( TaskQueue && ) = delete ; + TaskQueue & operator = ( TaskQueue const & ) = delete ; + + TaskQueue + ( const memory_space & arg_space, + unsigned const arg_memory_pool_capacity, + unsigned const arg_memory_pool_superblock_capacity_log2 + ); + + // Schedule a task + // Precondition: + // task is not executing + // task->m_next is the dependence or zero + // Postcondition: + // task->m_next is linked list membership + KOKKOS_FUNCTION + void schedule( task_root_type * const ); + + // Reschedule a task + // Precondition: + // task is in Executing state + // task->m_next == LockTag + // Postcondition: + // task is in Executing-Respawn state + // task->m_next == 0 (no dependence) + KOKKOS_FUNCTION + void reschedule( task_root_type * ); + + // Complete a task + // Precondition: + // task is not executing + // task->m_next == LockTag => task is complete + // task->m_next != LockTag => task is respawn + // Postcondition: + // task->m_wait == LockTag => task is complete + // task->m_wait != LockTag => task is waiting + KOKKOS_FUNCTION + void complete( task_root_type * ); + +public: + + // If and only if the execution space is a single thread + // then execute ready tasks. + KOKKOS_INLINE_FUNCTION + void iff_single_thread_recursive_execute() + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + specialization::iff_single_thread_recursive_execute( this ); +#endif + } + + void execute() { specialization::execute( this ); } + + template< typename FunctorType > + void proc_set_apply( typename task_root_type::function_type * ptr ) + { + specialization::template proc_set_apply< FunctorType >( ptr ); + } + + // Assign task pointer with reference counting of assigned tasks + template< typename LV, typename RV > + KOKKOS_FUNCTION static + void assign( TaskBase< execution_space, LV, void > ** const lhs, + TaskBase< execution_space, RV, void > * const rhs ) + { + using task_lhs = TaskBase< execution_space, LV, void > ; +#if 0 + { + printf( "assign( 0x%lx { 0x%lx %d %d }, 0x%lx { 0x%lx %d %d } )\n", + uintptr_t( lhs ? *lhs : 0 ), + uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 ), + int( lhs && *lhs ? (*lhs)->m_task_type : 0 ), + int( lhs && *lhs ? (*lhs)->m_ref_count : 0 ), + uintptr_t(rhs), + uintptr_t( rhs ? rhs->m_next : 0 ), + int( rhs ? rhs->m_task_type : 0 ), + int( rhs ? rhs->m_ref_count : 0 ) + ); + fflush( stdout ); + } +#endif + + if ( *lhs ) + { + const int count = Kokkos::atomic_fetch_add( &((*lhs)->m_ref_count), -1 ); + + if ( ( 1 == count ) && ( (*lhs)->m_state == TASK_STATE_COMPLETE ) ) { + // Reference count is zero and task is complete, deallocate. + (*lhs)->m_queue->deallocate( *lhs, (*lhs)->m_alloc_size ); + } + else if ( count <= 1 ) { + Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" ); + } + + // GEM: Should I check that there are no dependences here? Can the state + // be set to complete while there are still dependences? + } + + if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count), 1 ); } + + // Force write of *lhs + + *static_cast< task_lhs * volatile * >(lhs) = rhs ; + + Kokkos::memory_fence(); + } + + KOKKOS_FUNCTION + size_t allocate_block_size( size_t n ); ///< Actual block size allocated + + KOKKOS_FUNCTION + void * allocate( size_t n ); ///< Allocate from the memory pool + + KOKKOS_FUNCTION + void deallocate( void * p, size_t n ); ///< Deallocate to the memory pool +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskBase< Kokkos::Qthread, void, void > +{ +public: + + enum : int16_t { TaskTeam = TaskBase< void, void, void >::TaskTeam, + TaskSingle = TaskBase< void, void, void >::TaskSingle, + Aggregate = TaskBase< void, void, void >::Aggregate }; + + enum : uintptr_t { LockTag = TaskBase< void, void, void >::LockTag, + EndTag = TaskBase< void, void, void >::EndTag }; + + using execution_space = Kokkos::Qthread ; + using queue_type = TaskQueue< execution_space > ; + + template< typename > friend class Kokkos::TaskScheduler ; + + typedef void (* function_type) ( TaskBase *, void * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply ; ///< Apply function pointer + queue_type * m_queue ; ///< Queue in which this task resides + TaskBase * m_dep ; ///< Dependence + int32_t m_ref_count ; ///< Reference count + int32_t m_alloc_size ; ///< Allocation size + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + aligned_t m_qfeb ; ///< Qthread full/empty bit + int m_state ; ///< State of the task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + + KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + + KOKKOS_INLINE_FUNCTION + constexpr TaskBase() noexcept + : m_apply(0), + m_queue(0), + m_dep(0), + m_ref_count(0), + m_alloc_size(0), + m_dep_count(0), + m_task_type( TaskSingle ), + m_priority( 1 /* TaskRegularPriority */ ), + m_qfeb(0), + m_state( queue_type::TASK_STATE_CONSTRUCTING ) + { + qthread_empty( & m_qfeb ); // Set to full when complete + } + + //---------------------------------------- + + static aligned_t qthread_func( void * arg ); + + KOKKOS_INLINE_FUNCTION + TaskBase ** aggregate_dependences() + { return reinterpret_cast( this + 1 ); } + + KOKKOS_INLINE_FUNCTION + void requested_respawn() + { return m_state == queue_type::TASK_STATE_RESPAWN; } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskBase* dep ) + { + // Assign dependence to m_dep. It will be processed in the subsequent + // call to schedule. Error if the dependence is reset. + if ( 0 != Kokkos::atomic_exchange( & m_dep, dep ) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != dep ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_fetch_add( &(dep->m_ref_count), 1 ); + } + } + + using get_return_type = void ; + + KOKKOS_INLINE_FUNCTION + get_return_type get() const {} +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp new file mode 100644 index 0000000000..4a9190c731 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp @@ -0,0 +1,436 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() +{ + m_queue->~TaskQueue(); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::TaskQueue + ( const TaskQueue< ExecSpace >::memory_space & arg_space, + unsigned const arg_memory_pool_capacity, + unsigned const arg_memory_pool_superblock_capacity_log2 ) + : m_memory( arg_space, + arg_memory_pool_capacity, + arg_memory_pool_superblock_capacity_log2 ) + m_team_size( unsigned( qthread_num_workers_local(NO_SHEPHERD) ) ), + m_accum_alloc(0), + m_count_alloc(0), + m_max_alloc(0), + m_ready_count(0) +{} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::~TaskQueue() +{ + // Verify that ready count is zero. + if ( 0 != m_ready_count ) { + Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks"); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n ) +{ + return m_memory.allocate_block_size( n ); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void * TaskQueue< ExecSpace >::allocate( size_t n ) +{ + void * const p = m_memory.allocate(n); + + if ( p ) { + Kokkos::atomic_increment( & m_accum_alloc ); + Kokkos::atomic_increment( & m_count_alloc ); + + if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; + } + + return p ; +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::deallocate( void * p, size_t n ) +{ + m_memory.deallocate( p, n ); + Kokkos::atomic_decrement( & m_count_alloc ); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::schedule + ( TaskQueue< ExecSpace >::task_root_type * const task ) +{ +#if 0 + printf( "schedule( 0x%lx { %d %d %d }\n", + uintptr_t(task), + task->m_task_type, + task->m_priority, + task->m_ref_count ); +#endif + + // The task has been constructed and is waiting to be executed. + task->m_state = TASK_STATE_WAITING ; + + if ( task->m_task_type != task_root_type::Aggregate ) { + // Scheduling a single or team task. + + // Increment active task count before spawning. + Kokkos::atomic_increment( m_ready_count ); + + if ( task->m_dep == 0 ) { + // Schedule a task with no dependences. + + if ( task_root_type::TaskTeam == task->m_task_type && m_team_size > 1 ) { + // If more than one shepherd spawn on a shepherd other than this shepherd + const int num_shepherd = qthread_num_shepherds(); + const int this_shepherd = qthread_shep(); + int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast(this), + spawn_shepherd, + m_team_size - 1 + ); + fflush(stdout); +#endif + + qthread_spawn_cloneable( + & task_root_type::qthread_func, + task, + 0, + NULL, + 0, // no depenedences + 0, // dependences array + spawn_shepherd, + unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ), + m_team_size - 1 + ); + } + else { + qthread_spawn( + & task_root_type::qthread_func, + task, + 0, + NULL, + 0, // no depenedences + 0, // dependences array + NO_SHEPHERD, + QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ + ); + } + } + else if ( task->m_dep->m_task_type != task_root_type::Aggregate ) + // Malloc the precondition array to pass to qthread_spawn(). For + // non-aggregate tasks, it is a single pointer since there are no + // dependences. Qthreads will eventually free this allocation so memory will + // not be leaked. Is malloc thread-safe? Should this call be guarded? The + // memory can't be allocated from the pool allocator because Qthreads frees + // it using free(). + aligned_t ** qprecon = (aligned_t **) malloc( sizeof(aligned_t *) ); + + *qprecon = reinterpret_cast( uintptr_t(m_dep_size) ); + + if ( task->m_task_type == task_root_type::TaskTeam && m_team_size > 1) { + // If more than one shepherd spawn on a shepherd other than this shepherd + const int num_shepherd = qthread_num_shepherds(); + const int this_shepherd = qthread_shep(); + int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast(this), + spawn_shepherd, + m_team_size - 1 + ); + fflush(stdout); +#endif + + qthread_spawn_cloneable( + & Task::qthread_func, + this, + 0, + NULL, + m_dep_size, + qprecon, /* dependences */ + spawn_shepherd, + unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ), + m_team_size - 1 + ); + } + else { + qthread_spawn( + & Task::qthread_func, /* function */ + this, /* function argument */ + 0, + NULL, + m_dep_size, + qprecon, /* dependences */ + NO_SHEPHERD, + QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ + ); + } + } + else { + // GEM: How do I handle an aggregate (when_all) task? + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) +{ + // Precondition: + // task is in Executing state + // task->m_next == LockTag + // + // Postcondition: + // task is in Executing-Respawn state + // task->m_next == 0 (no dependence) + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + + if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) { + Kokkos::abort("TaskScheduler::respawn ERROR: already respawned"); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::complete + ( TaskQueue< ExecSpace >::task_root_type * task ) +{ + // Complete a runnable task that has finished executing + // or a when_all task when all of its dependeneces are complete. + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + +#if 0 + printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n", + uintptr_t(task), + uintptr_t(task->m_wait), + uintptr_t(task->m_next), + task->m_task_type, + task->m_priority, + task->m_ref_count + ); + fflush( stdout ); +#endif + + const bool runnable = task_root_type::Aggregate != task->m_task_type ; + + //---------------------------------------- + + if ( runnable && lock != task->m_next ) { + // Is a runnable task has finished executing and requested respawn. + // Schedule the task for subsequent execution. + + schedule( task ); + } + //---------------------------------------- + else { + // Is either an aggregate or a runnable task that executed + // and did not respawn. Transition this task to complete. + + // If 'task' is an aggregate then any of the runnable tasks that + // it depends upon may be attempting to complete this 'task'. + // Must only transition a task once to complete status. + // This is controled by atomically locking the wait queue. + + // Stop other tasks from adding themselves to this task's wait queue + // by locking the head of this task's wait queue. + + task_root_type * x = Kokkos::atomic_exchange( & task->m_wait, lock ); + + if ( x != (task_root_type *) lock ) { + + // This thread has transitioned this 'task' to complete. + // 'task' is no longer in a queue and is not executing + // so decrement the reference count from 'task's creation. + // If no other references to this 'task' then it will be deleted. + + TaskQueue::assign( & task, zero ); + + // This thread has exclusive access to the wait list so + // the concurrency-safe pop_task function is not needed. + // Schedule the tasks that have been waiting on the input 'task', + // which may have been deleted. + + while ( x != end ) { + + // Set x->m_next = zero <= no dependence + + task_root_type * const next = + (task_root_type *) Kokkos::atomic_exchange( & x->m_next, zero ); + + schedule( x ); + + x = next ; + } + } + } + + if ( runnable ) { + // A runnable task was popped from a ready queue and executed. + // If respawned into a ready queue then the ready count was incremented + // so decrement whether respawned or not. + Kokkos::atomic_decrement( & m_ready_count ); + } +} + +//---------------------------------------------------------------------------- + +template<> +aligned_t +TaskBase< Kokkos::Qthreads, void, void >::qthread_func( void * arg ) +{ + using execution_space = Kokkos::Qthreads ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = Kokkos::Impl::QthreadsTeamPolicyMember; + + task_root_type * const task = reinterpret_cast< task_root_type * >( arg ); + + // First member of the team change state to executing. + // Use compare-exchange to avoid race condition with a respawn. + Kokkos::atomic_compare_exchange_strong( & task->m_state, + queue_type::TASK_STATE_WAITING, + queue_type::TASK_STATE_EXECUTING + ); + + if ( task_root_type::TaskTeam == task->m_task_type ) + { + if ( 1 < task->m_queue->m_team_size ) { + // Team task with team size of more than 1. + Member::TaskTeam task_team_tag ; + + // Initialize team size and rank with shephered info + Member member( task_team_tag ); + + (*task->m_apply)( task , & member ); + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast(task), + member.team_rank(), + member.team_size() + ); + fflush(stdout); +#endif + + member.team_barrier(); + if ( member.team_rank() == 0 ) task->closeout(); + member.team_barrier(); + } + else { + // Team task with team size of 1. + Member member ; + (*task->m_apply)( task , & member ); + task->closeout(); + } + } + else { + (*task->m_apply)( task ); + task->closeout(); + } + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx return\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast(task) + ); +fflush(stdout); +#endif + + return 0 ; +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/Qthread/README b/lib/kokkos/core/src/Qthreads/README similarity index 99% rename from lib/kokkos/core/src/Qthread/README rename to lib/kokkos/core/src/Qthreads/README index 6e6c86a9ef..e35b1f698e 100644 --- a/lib/kokkos/core/src/Qthread/README +++ b/lib/kokkos/core/src/Qthreads/README @@ -22,4 +22,3 @@ sh autogen.sh # install make install - diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 0f69be9ed4..b1f53489f4 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -264,7 +264,7 @@ void ThreadsExec::execute_sleep( ThreadsExec & exec , const void * ) const int rank_rev = exec.m_pool_size - ( exec.m_pool_rank + 1 ); for ( int i = 0 ; i < n ; ++i ) { - Impl::spinwait( exec.m_pool_base[ rank_rev + (1<m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( exec.m_pool_base[ rank_rev + (1<m_pool_state , ThreadsExec::Active ); } exec.m_pool_state = ThreadsExec::Inactive ; @@ -308,7 +308,7 @@ void ThreadsExec::fence() { if ( s_thread_pool_size[0] ) { // Wait for the root thread to complete: - Impl::spinwait( s_threads_exec[0]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( s_threads_exec[0]->m_pool_state , ThreadsExec::Active ); } s_current_function = 0 ; @@ -724,7 +724,7 @@ void ThreadsExec::initialize( unsigned thread_count , // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -777,7 +777,7 @@ void ThreadsExec::finalize() s_threads_process.m_pool_fan_size = 0 ; s_threads_process.m_pool_state = ThreadsExec::Inactive ; - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 385dd492d0..a6db02ebac 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -187,13 +187,13 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); } if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the reduction and broadcast @@ -229,13 +229,13 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); } if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the reduction and broadcast @@ -264,7 +264,7 @@ public: ThreadsExec & fan = *m_pool_base[ rev_rank + ( 1 << i ) ] ; - Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::Active ); Join::join( f , reduce_memory() , fan.reduce_memory() ); } @@ -280,7 +280,7 @@ public: const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { - Impl::spinwait( m_pool_base[rev_rank+(1<m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[rev_rank+(1<m_pool_state , ThreadsExec::Active ); } } @@ -312,7 +312,7 @@ public: ThreadsExec & fan = *m_pool_base[ rev_rank + (1< ReductionAvailable (or ScanAvailable) - Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::Active ); Join::join( f , work_value , fan.reduce_memory() ); } @@ -330,8 +330,8 @@ public: // Wait: Active -> ReductionAvailable // Wait: ReductionAvailable -> ScanAvailable - Impl::spinwait( th.m_pool_state , ThreadsExec::Active ); - Impl::spinwait( th.m_pool_state , ThreadsExec::ReductionAvailable ); + Impl::spinwait_while_equal( th.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( th.m_pool_state , ThreadsExec::ReductionAvailable ); Join::join( f , work_value + count , ((scalar_type *)th.reduce_memory()) + count ); } @@ -342,7 +342,7 @@ public: // Wait for all threads to complete inclusive scan // Wait: ScanAvailable -> Rendezvous - Impl::spinwait( m_pool_state , ThreadsExec::ScanAvailable ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::ScanAvailable ); } //-------------------------------- @@ -350,7 +350,7 @@ public: for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { ThreadsExec & fan = *m_pool_base[ rev_rank + (1< ScanAvailable - Impl::spinwait( fan.m_pool_state , ThreadsExec::ReductionAvailable ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::ReductionAvailable ); // Set: ScanAvailable -> Rendezvous fan.m_pool_state = ThreadsExec::Rendezvous ; } @@ -377,13 +377,13 @@ public: // Wait for all threads to copy previous thread's inclusive scan value // Wait for all threads: Rendezvous -> ScanCompleted for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { - Impl::spinwait( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Rendezvous ); } if ( rev_rank ) { // Set: ScanAvailable -> ScanCompleted m_pool_state = ThreadsExec::ScanCompleted ; // Wait: ScanCompleted -> Active - Impl::spinwait( m_pool_state , ThreadsExec::ScanCompleted ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::ScanCompleted ); } // Set: ScanCompleted -> Active for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { @@ -410,7 +410,7 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<m_pool_state , ThreadsExec::Active ); } for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i]; } @@ -418,7 +418,7 @@ public: if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the thread-scan before releasing threads diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index b9edb64551..7014954281 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -103,13 +104,13 @@ public: // Wait for fan-in threads for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) { - Impl::spinwait( m_team_base[j]->state() , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_team_base[j]->state() , ThreadsExec::Active ); } // If not root then wait for release if ( m_team_rank_rev ) { m_exec->state() = ThreadsExec::Rendezvous ; - Impl::spinwait( m_exec->state() , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_exec->state() , ThreadsExec::Rendezvous ); } return ! m_team_rank_rev ; @@ -350,6 +351,10 @@ public: const int team_rank_rev = pool_rank_rev % team.team_alloc(); const size_t pool_league_size = m_exec->pool_size() / team.team_alloc() ; const size_t pool_league_rank_rev = pool_rank_rev / team.team_alloc() ; + if(pool_league_rank_rev >= pool_league_size) { + m_invalid_thread = 1; + return; + } const size_t pool_league_rank = pool_league_size - ( pool_league_rank_rev + 1 ); const int pool_num_teams = m_exec->pool_size()/team.team_alloc(); @@ -505,7 +510,8 @@ private: , const int team_size_request ) { const int pool_size = traits::execution_space::thread_pool_size(0); - const int team_max = traits::execution_space::thread_pool_size(1); + const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + const int team_max = pool_size inline static - int team_size_max( const FunctorType & ) - { return traits::execution_space::thread_pool_size(1); } + int team_size_max( const FunctorType & ) { + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size static int team_size_recommended( const FunctorType & ) @@ -819,9 +829,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct KOKKOS_INLINE_FUNCTION void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& result ) { - ValueType result = init_result; #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); + lambda(i,result); } - init_result = result; } /** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) diff --git a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp new file mode 100644 index 0000000000..c4db3e15ef --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -0,0 +1,2356 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HOST_EXP_ITERATE_TILE_HPP +#define KOKKOS_HOST_EXP_ITERATE_TILE_HPP + +#include +#include +#include + +#include + +#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) +#define KOKKOS_MDRANGE_IVDEP +#endif + + +#ifdef KOKKOS_MDRANGE_IVDEP + #define KOKKOS_ENABLE_IVDEP_MDRANGE _Pragma("ivdep") +#else + #define KOKKOS_ENABLE_IVDEP_MDRANGE +#endif + + + +namespace Kokkos { namespace Experimental { namespace Impl { + +// Temporary, for testing new loop macros +#define KOKKOS_ENABLE_NEW_LOOP_MACROS 1 + + +#define LOOP_1L(type, tile) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0=0; i0(tile[0]); ++i0) + +#define LOOP_2L(type, tile) \ + for( type i1=0; i1(tile[1]); ++i1) \ + LOOP_1L(type, tile) + +#define LOOP_3L(type, tile) \ + for( type i2=0; i2(tile[2]); ++i2) \ + LOOP_2L(type, tile) + +#define LOOP_4L(type, tile) \ + for( type i3=0; i3(tile[3]); ++i3) \ + LOOP_3L(type, tile) + +#define LOOP_5L(type, tile) \ + for( type i4=0; i4(tile[4]); ++i4) \ + LOOP_4L(type, tile) + +#define LOOP_6L(type, tile) \ + for( type i5=0; i5(tile[5]); ++i5) \ + LOOP_5L(type, tile) + +#define LOOP_7L(type, tile) \ + for( type i6=0; i6(tile[6]); ++i6) \ + LOOP_6L(type, tile) + +#define LOOP_8L(type, tile) \ + for( type i7=0; i7(tile[7]); ++i7) \ + LOOP_7L(type, tile) + + +#define LOOP_1R(type, tile) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for ( type i0=0; i0(tile[0]); ++i0 ) + +#define LOOP_2R(type, tile) \ + LOOP_1R(type, tile) \ + for ( type i1=0; i1(tile[1]); ++i1 ) + +#define LOOP_3R(type, tile) \ + LOOP_2R(type, tile) \ + for ( type i2=0; i2(tile[2]); ++i2 ) + +#define LOOP_4R(type, tile) \ + LOOP_3R(type, tile) \ + for ( type i3=0; i3(tile[3]); ++i3 ) + +#define LOOP_5R(type, tile) \ + LOOP_4R(type, tile) \ + for ( type i4=0; i4(tile[4]); ++i4 ) + +#define LOOP_6R(type, tile) \ + LOOP_5R(type, tile) \ + for ( type i5=0; i5(tile[5]); ++i5 ) + +#define LOOP_7R(type, tile) \ + LOOP_6R(type, tile) \ + for ( type i6=0; i6(tile[6]); ++i6 ) + +#define LOOP_8R(type, tile) \ + LOOP_7R(type, tile) \ + for ( type i7=0; i7(tile[7]); ++i7 ) + + +#define LOOP_ARGS_1 i0 + m_offset[0] +#define LOOP_ARGS_2 LOOP_ARGS_1, i1 + m_offset[1] +#define LOOP_ARGS_3 LOOP_ARGS_2, i2 + m_offset[2] +#define LOOP_ARGS_4 LOOP_ARGS_3, i3 + m_offset[3] +#define LOOP_ARGS_5 LOOP_ARGS_4, i4 + m_offset[4] +#define LOOP_ARGS_6 LOOP_ARGS_5, i5 + m_offset[5] +#define LOOP_ARGS_7 LOOP_ARGS_6, i6 + m_offset[6] +#define LOOP_ARGS_8 LOOP_ARGS_7, i7 + m_offset[7] + + + +// New Loop Macros... +// parallel_for, non-tagged +#define APPLY( func, ... ) \ + func( __VA_ARGS__ ); + +// LayoutRight +// d = 0 to start +#define LOOP_R_1( func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + APPLY( func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define LOOP_R_2( func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + LOOP_R_1( func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define LOOP_R_3( func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + LOOP_R_2( func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define LOOP_R_4( func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + LOOP_R_3( func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define LOOP_R_5( func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + LOOP_R_4( func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define LOOP_R_6( func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + LOOP_R_5( func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define LOOP_R_7( func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + LOOP_R_6( func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define LOOP_R_8( func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + LOOP_R_7( func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define LOOP_L_1( func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + APPLY( func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_2( func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + LOOP_L_1( func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_3( func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + LOOP_L_2( func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_4( func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + LOOP_L_3( func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_5( func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + LOOP_L_4( func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_6( func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + LOOP_L_5( func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_7( func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + LOOP_L_6( func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_8( func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + LOOP_L_7( func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +// TODO: rank not necessary to pass through, can hardcode the values +#define LOOP_LAYOUT_1( func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[0]); ++i0) { \ + APPLY( func, i0 + m_offset[0] ) \ + } + +#define LOOP_LAYOUT_2( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast(extent[rank-1]); ++i1) { \ + LOOP_L_1( func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast(extent[0]); ++i1) { \ + LOOP_R_1( func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_3( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast(extent[rank-1]); ++i2) { \ + LOOP_L_2( func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast(extent[0]); ++i2) { \ + LOOP_R_2( func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_4( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast(extent[rank-1]); ++i3) { \ + LOOP_L_3( func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast(extent[0]); ++i3) { \ + LOOP_R_3( func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_5( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast(extent[rank-1]); ++i4) { \ + LOOP_L_4( func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast(extent[0]); ++i4) { \ + LOOP_R_4( func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_6( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast(extent[rank-1]); ++i5) { \ + LOOP_L_5( func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast(extent[0]); ++i5) { \ + LOOP_R_5( func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_7( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast(extent[rank-1]); ++i6) { \ + LOOP_L_6( func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast(extent[0]); ++i6) { \ + LOOP_R_6( func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_8( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast(extent[rank-1]); ++i7) { \ + LOOP_L_7( func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast(extent[0]); ++i7) { \ + LOOP_R_7( func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TILE_LOOP_1( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_1( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_1( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_2( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_2( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_2( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_3( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_3( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_3( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_4( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_4( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_4( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_5( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_5( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_5( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_6( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_6( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_6( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_7( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_7( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_7( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_8( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_8( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_8( func, type, is_left, m_offset, extent_partial, rank ) } + + +// parallel_reduce, non-tagged +// Reduction version +#define APPLY_REDUX( val, func, ... ) \ + func( __VA_ARGS__, val ); + +// LayoutRight +// d = 0 to start +#define LOOP_R_1_REDUX( val, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + APPLY_REDUX( val, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define LOOP_R_2_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + LOOP_R_1_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define LOOP_R_3_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + LOOP_R_2_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define LOOP_R_4_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + LOOP_R_3_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define LOOP_R_5_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + LOOP_R_4_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define LOOP_R_6_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + LOOP_R_5_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define LOOP_R_7_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + LOOP_R_6_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define LOOP_R_8_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + LOOP_R_7_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define LOOP_L_1_REDUX( val, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + APPLY_REDUX( val, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_2_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + LOOP_L_1_REDUX( val, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_3_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + LOOP_L_2_REDUX( val, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_4_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + LOOP_L_3_REDUX( val, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_5_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + LOOP_L_4_REDUX( val, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_6_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + LOOP_L_5_REDUX( val, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_7_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + LOOP_L_6_REDUX( val, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_8_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + LOOP_L_7_REDUX( val, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +#define LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[0]); ++i0) { \ + APPLY_REDUX( val, func, i0 + m_offset[0] ) \ + } + +#define LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast(extent[rank-1]); ++i1) { \ + LOOP_L_1_REDUX( val, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast(extent[0]); ++i1) { \ + LOOP_R_1_REDUX( val, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast(extent[rank-1]); ++i2) { \ + LOOP_L_2_REDUX( val, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast(extent[0]); ++i2) { \ + LOOP_R_2_REDUX( val, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast(extent[rank-1]); ++i3) { \ + LOOP_L_3_REDUX( val, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast(extent[0]); ++i3) { \ + LOOP_R_3_REDUX( val, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast(extent[rank-1]); ++i4) { \ + LOOP_L_4_REDUX( val, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast(extent[0]); ++i4) { \ + LOOP_R_4_REDUX( val, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast(extent[rank-1]); ++i5) { \ + LOOP_L_5_REDUX( val, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast(extent[0]); ++i5) { \ + LOOP_R_5_REDUX( val, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast(extent[rank-1]); ++i6) { \ + LOOP_L_6_REDUX( val, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast(extent[0]); ++i6) { \ + LOOP_R_6_REDUX( val, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast(extent[rank-1]); ++i7) { \ + LOOP_L_7_REDUX( val, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast(extent[0]); ++i7) { \ + LOOP_R_7_REDUX( val, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TILE_LOOP_1_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_2_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_3_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_4_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_5_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_6_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_7_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_8_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } +// end New Loop Macros + + +// tagged macros +#define TAGGED_APPLY( tag, func, ... ) \ + func( tag, __VA_ARGS__ ); + +// LayoutRight +// d = 0 to start +#define TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + TAGGED_APPLY( tag, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_8( tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + TAGGED_APPLY( tag, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_8( tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +// TODO: rank not necessary to pass through, can hardcode the values +#define TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[0]); ++i0) { \ + TAGGED_APPLY( tag, func, i0 + m_offset[0] ) \ + } + +#define TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast(extent[rank-1]); ++i1) { \ + TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast(extent[0]); ++i1) { \ + TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast(extent[rank-1]); ++i2) { \ + TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast(extent[0]); ++i2) { \ + TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast(extent[rank-1]); ++i3) { \ + TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast(extent[0]); ++i3) { \ + TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast(extent[rank-1]); ++i4) { \ + TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast(extent[0]); ++i4) { \ + TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast(extent[rank-1]); ++i5) { \ + TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast(extent[0]); ++i5) { \ + TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast(extent[rank-1]); ++i6) { \ + TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast(extent[0]); ++i6) { \ + TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast(extent[rank-1]); ++i7) { \ + TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast(extent[0]); ++i7) { \ + TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TAGGED_TILE_LOOP_1( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_2( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_3( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_4( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_5( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_6( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_7( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_8( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent_partial, rank ) } + + +// parallel_reduce, tagged +// Reduction version +#define TAGGED_APPLY_REDUX( val, tag, func, ... ) \ + func( tag, __VA_ARGS__, val ); + +// LayoutRight +// d = 0 to start +#define TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_8_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[d]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast(extent[d]); ++i1) { \ + TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast(extent[d]); ++i2) { \ + TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast(extent[d]); ++i3) { \ + TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast(extent[d]); ++i4) { \ + TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast(extent[d]); ++i5) { \ + TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast(extent[d]); ++i6) { \ + TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_8_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast(extent[d]); ++i7) { \ + TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +#define TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast(extent[0]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, i0 + m_offset[0] ) \ + } + +#define TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast(extent[rank-1]); ++i1) { \ + TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast(extent[0]); ++i1) { \ + TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast(extent[rank-1]); ++i2) { \ + TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast(extent[0]); ++i2) { \ + TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast(extent[rank-1]); ++i3) { \ + TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast(extent[0]); ++i3) { \ + TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast(extent[rank-1]); ++i4) { \ + TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast(extent[0]); ++i4) { \ + TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast(extent[rank-1]); ++i5) { \ + TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast(extent[0]); ++i5) { \ + TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast(extent[rank-1]); ++i6) { \ + TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast(extent[0]); ++i6) { \ + TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast(extent[rank-1]); ++i7) { \ + TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast(extent[0]); ++i7) { \ + TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TAGGED_TILE_LOOP_1_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_2_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_3_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_4_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_5_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_6_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_7_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_8_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +// end tagged macros + + + + +// Structs for calling loops +template < int Rank, bool IsLeft, typename IType, typename Tagged, typename Enable = void > +struct Tile_Loop_Type; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<1, IsLeft, IType, void, void > +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_1( func, IType, IsLeft, cond, offset, a, b, 1 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_1_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 1 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<2, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_2( func, IType, IsLeft, cond, offset, a, b, 2 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_2_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 2 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<3, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_3( func, IType, IsLeft, cond, offset, a, b, 3 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_3_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 3 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<4, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_4( func, IType, IsLeft, cond, offset, a, b, 4 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_4_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 4 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<5, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_5( func, IType, IsLeft, cond, offset, a, b, 5 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_5_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 5 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<6, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_6( func, IType, IsLeft, cond, offset, a, b, 6 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_6_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 6 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<7, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_7( func, IType, IsLeft, cond, offset, a, b, 7 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_7_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 7 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<8, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_8( func, IType, IsLeft, cond, offset, a, b, 8 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_8_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 8 ); + } +}; + +// tagged versions + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<1, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type > +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_1( Tagged(), func, IType, IsLeft, cond, offset, a, b, 1 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_1_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 1 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<2, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_2( Tagged(), func, IType, IsLeft, cond, offset, a, b, 2 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_2_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 2 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<3, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_3( Tagged(), func, IType, IsLeft, cond, offset, a, b, 3 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_3_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 3 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<4, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_4( Tagged(), func, IType, IsLeft, cond, offset, a, b, 4 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_4_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 4 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<5, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_5( Tagged(), func, IType, IsLeft, cond, offset, a, b, 5 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_5_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 5 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<6, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_6( Tagged(), func, IType, IsLeft, cond, offset, a, b, 6 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_6_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 6 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<7, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_7( Tagged(), func, IType, IsLeft, cond, offset, a, b, 7 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_7_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 7 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<8, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_8( Tagged(), func, IType, IsLeft, cond, offset, a, b, 8 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_8_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 8 ); + } +}; +// end Structs for calling loops + + +template +using is_void = std::is_same< T , void >; + +template < typename RP + , typename Functor + , typename Tag = void + , typename ValueType = void + , typename Enable = void + > +struct HostIterateTile; + +//For ParallelFor +template < typename RP + , typename Functor + , typename Tag + , typename ValueType + > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void::value >::type > +{ + using index_type = typename RP::index_type; + using point_type = typename RP::point_type; + + using value_type = ValueType; + + inline + HostIterateTile( RP const& rp, Functor const& func ) + : m_rp(rp) + , m_func(func) + { + } + + inline + bool check_iteration_bounds( point_type& partial_tile , point_type& offset ) const { + bool is_full_tile = true; + + for ( int i = 0; i < RP::rank; ++i ) { + if ((offset[i] + m_rp.m_tile[i]) <= m_rp.m_upper[i]) { + partial_tile[i] = m_rp.m_tile[i] ; + } + else { + is_full_tile = false ; + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range + } + } + + return is_full_tile ; + } // end check bounds + + + template + struct RankTag + { + typedef RankTag type; + enum { value = (int)Rank }; + }; + +#if KOKKOS_ENABLE_NEW_LOOP_MACROS + template + inline + void + operator()(IType tile_idx) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); + + } + +#else + template + inline + void + operator()(IType tile_idx) const + { operator_impl( tile_idx , RankTag() ); } + // added due to compiler error when using sfinae to choose operator based on rank w/ cuda+serial + + template + inline + void operator_impl( IType tile_idx , const RankTag<2> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Right + + } //end op() rank == 2 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<3> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Right + + } //end op() rank == 3 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<4> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Right + + } //end op() rank == 4 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<5> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Right + + } //end op() rank == 5 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<6> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Right + + } //end op() rank == 6 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<7> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Right + + } //end op() rank == 7 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<8> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Right + + } //end op() rank == 8 +#endif + + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func(args...); + } + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && !std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func( m_tag, args...); + } + + + RP const& m_rp; + Functor const& m_func; + typename std::conditional< std::is_same::value,int,Tag>::type m_tag; +// value_type & m_v; + +}; + + +// ValueType: For reductions +template < typename RP + , typename Functor + , typename Tag + , typename ValueType + > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void::value >::type > +{ + using index_type = typename RP::index_type; + using point_type = typename RP::point_type; + + using value_type = ValueType; + + inline + HostIterateTile( RP const& rp, Functor const& func, value_type & v ) + : m_rp(rp) //Cuda 7.0 does not like braces... + , m_func(func) + , m_v(v) // use with non-void ValueType struct + { +// Errors due to braces rather than parenthesis for init (with cuda 7.0) +// /home/ndellin/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp:1216:98: error: too many braces around initializer for ‘int’ [-fpermissive] +// /home/ndellin/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp:1216:98: error: aggregate value used where an integer was expected + } + + inline + bool check_iteration_bounds( point_type& partial_tile , point_type& offset ) const { + bool is_full_tile = true; + + for ( int i = 0; i < RP::rank; ++i ) { + if ((offset[i] + m_rp.m_tile[i]) <= m_rp.m_upper[i]) { + partial_tile[i] = m_rp.m_tile[i] ; + } + else { + is_full_tile = false ; + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range + } + } + + return is_full_tile ; + } // end check bounds + + + template + struct RankTag + { + typedef RankTag type; + enum { value = (int)Rank }; + }; + + +#if KOKKOS_ENABLE_NEW_LOOP_MACROS + template + inline + void + operator()(IType tile_idx) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); + + } + +#else + template + inline + void + operator()(IType tile_idx) const + { operator_impl( tile_idx , RankTag() ); } + // added due to compiler error when using sfinae to choose operator based on rank + + + template + inline + void operator_impl( IType tile_idx , const RankTag<2> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Right + + } //end op() rank == 2 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<3> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Right + + } //end op() rank == 3 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<4> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Right + + } //end op() rank == 4 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<5> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Right + + } //end op() rank == 5 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<6> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Right + + } //end op() rank == 6 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<7> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Right + + } //end op() rank == 7 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<8> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Right + + } //end op() rank == 8 +#endif + + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func(args... , m_v); + } + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && !std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func( m_tag, args... , m_v); + } + + + RP const& m_rp; + Functor const& m_func; + value_type & m_v; + typename std::conditional< std::is_same::value,int,Tag>::type m_tag; + +}; + + +// ------------------------------------------------------------------ // + +// MDFunctor - wraps the range_policy and functor to pass to IterateTile +// Serial, Threads, OpenMP +// Cuda uses DeviceIterateTile directly within md_parallel_for +// ParallelReduce +template < typename MDRange, typename Functor, typename ValueType = void > +struct MDFunctor +{ + using range_policy = MDRange; + using functor_type = Functor; + using value_type = ValueType; + using work_tag = typename range_policy::work_tag; + using index_type = typename range_policy::index_type; + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange + , Functor + , work_tag + , value_type + >; + + + inline + MDFunctor( MDRange const& range, Functor const& f, ValueType & v ) + : m_range( range ) + , m_func( f ) + {} + + inline + MDFunctor( MDFunctor const& ) = default; + + inline + MDFunctor& operator=( MDFunctor const& ) = default; + + inline + MDFunctor( MDFunctor && ) = default; + + inline + MDFunctor& operator=( MDFunctor && ) = default; + +// KOKKOS_FORCEINLINE_FUNCTION //Caused cuda warning - __host__ warning + inline + void operator()(index_type t, value_type & v) const + { + iterate_type(m_range, m_func, v)(t); + } + + MDRange m_range; + Functor m_func; +}; + +// ParallelFor +template < typename MDRange, typename Functor > +struct MDFunctor< MDRange, Functor, void > +{ + using range_policy = MDRange; + using functor_type = Functor; + using work_tag = typename range_policy::work_tag; + using index_type = typename range_policy::index_type; + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange + , Functor + , work_tag + , void + >; + + + inline + MDFunctor( MDRange const& range, Functor const& f ) + : m_range( range ) + , m_func( f ) + {} + + inline + MDFunctor( MDFunctor const& ) = default; + + inline + MDFunctor& operator=( MDFunctor const& ) = default; + + inline + MDFunctor( MDFunctor && ) = default; + + inline + MDFunctor& operator=( MDFunctor && ) = default; + + inline + void operator()(index_type t) const + { + iterate_type(m_range, m_func)(t); + } + + MDRange m_range; + Functor m_func; +}; + +#undef KOKKOS_ENABLE_NEW_LOOP_MACROS + +} } } //end namespace Kokkos::Experimental::Impl + + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp index 0ffbc0548a..7d7fd3d133 100644 --- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -56,12 +56,13 @@ int bit_scan_forward( unsigned i ) { #if defined( __CUDA_ARCH__ ) return __ffs(i) - 1; -#elif defined( __GNUC__ ) || defined( __GNUG__ ) - return __builtin_ffs(i) - 1; -#elif defined( __INTEL_COMPILER ) +#elif defined( KOKKOS_COMPILER_INTEL ) return _bit_scan_forward(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return __cnttz4(i); +#elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_ffs(i) - 1; #else - unsigned t = 1u; int r = 0; while ( i && ( i & t == 0 ) ) @@ -79,10 +80,12 @@ int bit_scan_reverse( unsigned i ) enum { shift = static_cast( sizeof(unsigned) * CHAR_BIT - 1 ) }; #if defined( __CUDA_ARCH__ ) return shift - __clz(i); +#elif defined( KOKKOS_COMPILER_INTEL ) + return _bit_scan_reverse(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return shift - __cntlz4(i); #elif defined( __GNUC__ ) || defined( __GNUG__ ) return shift - __builtin_clz(i); -#elif defined( __INTEL_COMPILER ) - return _bit_scan_reverse(i); #else unsigned t = 1u << shift; int r = 0; @@ -101,10 +104,12 @@ int bit_count( unsigned i ) { #if defined( __CUDA_ARCH__ ) return __popc(i); -#elif defined( __GNUC__ ) || defined( __GNUG__ ) - return __builtin_popcount(i); #elif defined ( __INTEL_COMPILER ) return _popcnt32(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return __popcnt4(i); +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_popcount(i); #else // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index cd38eaa9da..7c38430c44 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -147,7 +147,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -155,7 +155,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0); void finalize_internal( const bool all_spaces = false ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif @@ -449,5 +449,323 @@ void fence() Impl::fence_internal(); } +void print_configuration( std::ostream & out , const bool detail ) +{ + std::ostringstream msg; + + msg << "Compiler:" << std::endl; +#ifdef KOKKOS_COMPILER_APPLECC + msg << " KOKKOS_COMPILER_APPLECC: " << KOKKOS_COMPILER_APPLECC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_CLANG + msg << " KOKKOS_COMPILER_CLANG: " << KOKKOS_COMPILER_CLANG << std::endl; +#endif +#ifdef KOKKOS_COMPILER_CRAYC + msg << " KOKKOS_COMPILER_CRAYC: " << KOKKOS_COMPILER_CRAYC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_GNU + msg << " KOKKOS_COMPILER_GNU: " << KOKKOS_COMPILER_GNU << std::endl; +#endif +#ifdef KOKKOS_COMPILER_IBM + msg << " KOKKOS_COMPILER_IBM: " << KOKKOS_COMPILER_IBM << std::endl; +#endif +#ifdef KOKKOS_COMPILER_INTEL + msg << " KOKKOS_COMPILER_INTEL: " << KOKKOS_COMPILER_INTEL << std::endl; +#endif +#ifdef KOKKOS_COMPILER_NVCC + msg << " KOKKOS_COMPILER_NVCC: " << KOKKOS_COMPILER_NVCC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_PGI + msg << " KOKKOS_COMPILER_PGI: " << KOKKOS_COMPILER_PGI << std::endl; +#endif + + + msg << "Architecture:" << std::endl; +#ifdef KOKKOS_ENABLE_ISA_KNC + msg << " KOKKOS_ENABLE_ISA_KNC: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_KNC: no" << std::endl; +#endif +#ifdef KOKKOS_ENABLE_ISA_POWERPCLE + msg << " KOKKOS_ENABLE_ISA_POWERPCLE: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_POWERPCLE: no" << std::endl; +#endif +#ifdef KOKKOS_ENABLE_ISA_X86_64 + msg << " KOKKOS_ENABLE_ISA_X86_64: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_X86_64: no" << std::endl; +#endif + + + msg << "Devices:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA: "; +#ifdef KOKKOS_ENABLE_CUDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_OPENMP: "; +#ifdef KOKKOS_ENABLE_OPENMP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PTHREAD: "; +#ifdef KOKKOS_ENABLE_PTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_STDTHREAD: "; +#ifdef KOKKOS_ENABLE_STDTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_WINTHREAD: "; +#ifdef KOKKOS_ENABLE_WINTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_QTHREADS: "; +#ifdef KOKKOS_ENABLE_QTHREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_SERIAL: "; +#ifdef KOKKOS_ENABLE_SERIAL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Default Device:" << std::endl; + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Atomics:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA_ATOMICS: "; +#ifdef KOKKOS_ENABLE_CUDA_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_GNU_ATOMICS: "; +#ifdef KOKKOS_ENABLE_GNU_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_INTEL_ATOMICS: "; +#ifdef KOKKOS_ENABLE_INTEL_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_OPENMP_ATOMICS: "; +#ifdef KOKKOS_ENABLE_OPENMP_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_WINDOWS_ATOMICS: "; +#ifdef KOKKOS_ENABLE_WINDOWS_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Vectorization:" << std::endl; + msg << " KOKKOS_ENABLE_PRAGMA_IVDEP: "; +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_LOOPCOUNT: "; +#ifdef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_SIMD: "; +#ifdef KOKKOS_ENABLE_PRAGMA_SIMD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_UNROLL: "; +#ifdef KOKKOS_ENABLE_PRAGMA_UNROLL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_VECTOR: "; +#ifdef KOKKOS_ENABLE_PRAGMA_VECTOR + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + msg << "Memory:" << std::endl; + msg << " KOKKOS_ENABLE_HBWSPACE: "; +#ifdef KOKKOS_ENABLE_HBWSPACE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_INTEL_MM_ALLOC: "; +#ifdef KOKKOS_ENABLE_INTEL_MM_ALLOC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_POSIX_MEMALIGN: "; +#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Options:" << std::endl; + msg << " KOKKOS_ENABLE_ASM: "; +#ifdef KOKKOS_ENABLE_ASM + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX1Z: "; +#ifdef KOKKOS_ENABLE_CXX1Z + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK: "; +#ifdef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_HWLOC: "; +#ifdef KOKKOS_ENABLE_HWLOC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_LIBRT: "; +#ifdef KOKKOS_ENABLE_LIBRT + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_MPI: "; +#ifdef KOKKOS_ENABLE_MPI + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PROFILING: "; +#ifdef KOKKOS_ENABLE_PROFILING + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + +#ifdef KOKKOS_ENABLE_CUDA + msg << "Cuda Options:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CUDA_LAMBDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: "; +#ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: "; +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_UVM: "; +#ifdef KOKKOS_ENABLE_CUDA_UVM + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUSPARSE: "; +#ifdef KOKKOS_ENABLE_CUSPARSE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + +#endif + + msg << "\nRuntime Configuration:" << std::endl; +#ifdef KOKKOS_ENABLE_CUDA + Cuda::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + OpenMP::print_configuration(msg, detail); +#endif +#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( WINTHREAD ) + Threads::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_QTHREADS + Qthreads::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_SERIAL + Serial::print_configuration(msg, detail); +#endif + + out << msg.str() << std::endl; +} + } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp new file mode 100644 index 0000000000..b425b3f19f --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp @@ -0,0 +1,653 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FUNCTORANALYSIS_HPP +#define KOKKOS_FUNCTORANALYSIS_HPP + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct FunctorPatternInterface { + struct FOR {}; + struct REDUCE {}; + struct SCAN {}; +}; + +/** \brief Query Functor and execution policy argument tag for value type. + * + * If 'value_type' is not explicitly declared in the functor + * then attempt to deduce the type from FunctorType::operator() + * interface used by the pattern and policy. + * + * For the REDUCE pattern generate a Reducer and finalization function + * derived from what is available within the functor. + */ +template< typename PatternInterface , class Policy , class Functor > +struct FunctorAnalysis { +private: + + using FOR = FunctorPatternInterface::FOR ; + using REDUCE = FunctorPatternInterface::REDUCE ; + using SCAN = FunctorPatternInterface::SCAN ; + + //---------------------------------------- + + struct VOID {}; + + template< typename P = Policy , typename = std::false_type > + struct has_work_tag + { + using type = void ; + using wtag = VOID ; + }; + + template< typename P > + struct has_work_tag + < P , typename std::is_same< typename P::work_tag , void >::type > + { + using type = typename P::work_tag ; + using wtag = typename P::work_tag ; + }; + + using Tag = typename has_work_tag<>::type ; + using WTag = typename has_work_tag<>::wtag ; + + //---------------------------------------- + // Check for Functor::value_type, which is either a simple type T or T[] + + template< typename F , typename = std::false_type > + struct has_value_type { using type = void ; }; + + template< typename F > + struct has_value_type + < F , typename std::is_same< typename F::value_type , void >::type > + { + using type = typename F::value_type ; + + static_assert( ! std::is_reference< type >::value && + std::rank< type >::value <= 1 && + std::extent< type >::value == 0 + , "Kokkos Functor::value_type is T or T[]" ); + }; + + //---------------------------------------- + // If Functor::value_type does not exist then evaluate operator(), + // depending upon the pattern and whether the policy has a work tag, + // to determine the reduction or scan value_type. + + template< typename F + , typename P = PatternInterface + , typename V = typename has_value_type::type + , bool T = std::is_same< Tag , void >::value + > + struct deduce_value_type { using type = V ; }; + + template< typename F > + struct deduce_value_type< F , REDUCE , void , true > { + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( M , A & ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , REDUCE , void , false > { + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag , M , A & ) const ); + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag const & , M , A & ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , SCAN , void , true > { + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( M , A & , I ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , SCAN , void , false > { + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag , M , A & , I ) const ); + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag const & , M , A & , I ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + //---------------------------------------- + + using candidate_type = typename deduce_value_type< Functor >::type ; + + enum { candidate_is_void = std::is_same< candidate_type , void >::value + , candidate_is_array = std::rank< candidate_type >::value == 1 }; + + //---------------------------------------- + +public: + + using value_type = typename std::remove_extent< candidate_type >::type ; + + static_assert( ! std::is_const< value_type >::value + , "Kokkos functor operator reduce argument cannot be const" ); + +private: + + // Stub to avoid defining a type 'void &' + using ValueType = typename + std::conditional< candidate_is_void , VOID , value_type >::type ; + +public: + + using pointer_type = typename + std::conditional< candidate_is_void , void , ValueType * >::type ; + + using reference_type = typename + std::conditional< candidate_is_array , ValueType * , typename + std::conditional< ! candidate_is_void , ValueType & , void > + ::type >::type ; + +private: + + template< bool IsArray , class FF > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< IsArray , unsigned >::type + get_length( FF const & f ) { return f.value_count ; } + + template< bool IsArray , class FF > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< ! IsArray , unsigned >::type + get_length( FF const & ) { return 1 ; } + +public: + + enum { StaticValueSize = ! candidate_is_void && + ! candidate_is_array + ? sizeof(ValueType) : 0 }; + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const Functor & f ) + { return FunctorAnalysis::template get_length< candidate_is_array >(f); } + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const Functor & f ) + { return FunctorAnalysis::template get_length< candidate_is_array >(f) * sizeof(ValueType); } + + //---------------------------------------- + + template< class Unknown > + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const Unknown & ) + { return 1 ; } + + template< class Unknown > + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const Unknown & ) + { return sizeof(ValueType); } + +private: + + enum INTERFACE : int + { DISABLE = 0 + , NO_TAG_NOT_ARRAY = 1 + , NO_TAG_IS_ARRAY = 2 + , HAS_TAG_NOT_ARRAY = 3 + , HAS_TAG_IS_ARRAY = 4 + , DEDUCED = + ! std::is_same< PatternInterface , REDUCE >::value ? DISABLE : ( + std::is_same::value + ? (candidate_is_array ? NO_TAG_IS_ARRAY : NO_TAG_NOT_ARRAY) + : (candidate_is_array ? HAS_TAG_IS_ARRAY : HAS_TAG_NOT_ARRAY) ) + }; + + //---------------------------------------- + // parallel_reduce join operator + + template< class F , INTERFACE > + struct has_join_function ; + + template< class F > + struct has_join_function< F , NO_TAG_NOT_ARRAY > + { + typedef volatile ValueType & vref_type ; + typedef volatile const ValueType & cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( *dst , *src ); } + }; + + template< class F > + struct has_join_function< F , NO_TAG_IS_ARRAY > + { + typedef volatile ValueType * vref_type ; + typedef volatile const ValueType * cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( dst , src ); } + }; + + template< class F > + struct has_join_function< F , HAS_TAG_NOT_ARRAY > + { + typedef volatile ValueType & vref_type ; + typedef volatile const ValueType & cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( WTag() , *dst , *src ); } + }; + + template< class F > + struct has_join_function< F , HAS_TAG_IS_ARRAY > + { + typedef volatile ValueType * vref_type ; + typedef volatile const ValueType * cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( WTag() , dst , src ); } + }; + + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceJoin + { + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { + const int n = FunctorAnalysis::value_count( f ); + for ( int i = 0 ; i < n ; ++i ) dst[i] += src[i]; + } + }; + + template< class F > + struct DeduceJoin< F , DISABLE , void > + { + KOKKOS_INLINE_FUNCTION static + void join( F const & + , ValueType volatile * + , ValueType volatile const * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceJoin< F , I , + decltype( has_join_function::enable_if( & F::join ) ) > + : public has_join_function {}; + + //---------------------------------------- + + template< class , INTERFACE > + struct has_init_function ; + + template< class F > + struct has_init_function< F , NO_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( *dst ); } + }; + + template< class F > + struct has_init_function< F , NO_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( dst ); } + }; + + template< class F > + struct has_init_function< F , HAS_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( WTag(), *dst ); } + }; + + template< class F > + struct has_init_function< F , HAS_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( WTag(), dst ); } + }; + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceInit + { + KOKKOS_INLINE_FUNCTION static + void init( F const & , ValueType * dst ) { new(dst) ValueType(); } + }; + + template< class F > + struct DeduceInit< F , DISABLE , void > + { + KOKKOS_INLINE_FUNCTION static + void init( F const & , ValueType * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceInit< F , I , + decltype( has_init_function::enable_if( & F::init ) ) > + : public has_init_function {}; + + //---------------------------------------- + +public: + + struct Reducer + { + private: + + Functor const & m_functor ; + ValueType * const m_result ; + int const m_length ; + + public: + + using reducer = Reducer ; + using value_type = FunctorAnalysis::value_type ; + using memory_space = void ; + using reference_type = FunctorAnalysis::reference_type ; + + KOKKOS_INLINE_FUNCTION + void join( ValueType volatile * dst + , ValueType volatile const * src ) const noexcept + { DeduceJoin<>::join( m_functor , dst , src ); } + + KOKKOS_INLINE_FUNCTION + void init( ValueType * dst ) const noexcept + { DeduceInit<>::init( m_functor , dst ); } + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( Functor const & arg_functor + , ValueType * arg_value = 0 + , int arg_length = 0 ) noexcept + : m_functor( arg_functor ), m_result(arg_value), m_length(arg_length) {} + + KOKKOS_INLINE_FUNCTION + constexpr int length() const noexcept { return m_length ; } + + KOKKOS_INLINE_FUNCTION + ValueType & operator[]( int i ) const noexcept + { return m_result[i]; } + + private: + + template< bool IsArray > + constexpr + typename std::enable_if< IsArray , ValueType * >::type + ref() const noexcept { return m_result ; } + + template< bool IsArray > + constexpr + typename std::enable_if< ! IsArray , ValueType & >::type + ref() const noexcept { return *m_result ; } + + public: + + KOKKOS_INLINE_FUNCTION + auto result() const noexcept + -> decltype( Reducer::template ref< candidate_is_array >() ) + { return Reducer::template ref< candidate_is_array >(); } + }; + + //---------------------------------------- + +private: + + template< class , INTERFACE > + struct has_final_function ; + + // No tag, not array + template< class F > + struct has_final_function< F , NO_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( *dst ); } + }; + + // No tag, is array + template< class F > + struct has_final_function< F , NO_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( dst ); } + }; + + // Has tag, not array + template< class F > + struct has_final_function< F , HAS_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( WTag(), *dst ); } + }; + + // Has tag, is array + template< class F > + struct has_final_function< F , HAS_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( WTag(), dst ); } + }; + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceFinal + { + KOKKOS_INLINE_FUNCTION + static void final( F const & , ValueType * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceFinal< F , I , + decltype( has_final_function::enable_if( & F::final ) ) > + : public has_init_function {}; + +public: + + static void final( Functor const & f , ValueType * result ) + { DeduceFinal<>::final( f , result ); } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_FUNCTORANALYSIS_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 96d30d0c4a..eb1f5ce96c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -62,7 +62,7 @@ #include #endif -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #endif @@ -198,7 +198,7 @@ void * HBWSpace::allocate( const size_t arg_alloc_size ) const case STD_MALLOC: msg << "STD_MALLOC" ; break ; } msg << " ]( " << arg_alloc_size << " ) FAILED" ; - if ( ptr == NULL ) { msg << " NULL" ; } + if ( ptr == NULL ) { msg << " NULL" ; } else { msg << " NOT ALIGNED " << ptr ; } std::cerr << msg.str() << std::endl ; @@ -218,7 +218,7 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s if ( m_alloc_mech == STD_MALLOC ) { void * alloc_ptr = *(reinterpret_cast(arg_alloc_ptr) -1); memkind_free(MEMKIND_TYPE, alloc_ptr ); - } + } } } @@ -249,7 +249,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -278,7 +278,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -297,7 +297,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_alloc_label + , const std::string & arg_alloc_label , const size_t arg_alloc_size ) { if ( ! arg_alloc_size ) return (void *) 0 ; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 3cd603728e..67be86c9a3 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,14 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #endif /*--------------------------------------------------------------------------*/ @@ -292,7 +292,7 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ; } msg << " ]( " << arg_alloc_size << " ) FAILED" ; - if ( ptr == NULL ) { msg << " NULL" ; } + if ( ptr == NULL ) { msg << " NULL" ; } else { msg << " NOT ALIGNED " << ptr ; } std::cerr << msg.str() << std::endl ; @@ -312,7 +312,7 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_ if ( m_alloc_mech == STD_MALLOC ) { void * alloc_ptr = *(reinterpret_cast(arg_alloc_ptr) -1); free( alloc_ptr ); - } + } #if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC ) else if ( m_alloc_mech == INTEL_MM_ALLOC ) { @@ -359,7 +359,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::HostSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -388,7 +388,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space ) , m_space( arg_space ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -406,7 +406,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space void * SharedAllocationRecord< Kokkos::HostSpace , void >:: allocate_tracked( const Kokkos::HostSpace & arg_space - , const std::string & arg_alloc_label + , const std::string & arg_alloc_label , const size_t arg_alloc_size ) { if ( ! arg_alloc_size ) return (void *) 0 ; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp new file mode 100644 index 0000000000..ac200209c7 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -0,0 +1,463 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void HostThreadTeamData::organize_pool + ( HostThreadTeamData * members[] , const int size ) +{ + bool ok = true ; + + // Verify not already a member of a pool: + for ( int rank = 0 ; rank < size && ok ; ++rank ) { + ok = ( 0 != members[rank] ) && ( 0 == members[rank]->m_pool_scratch ); + } + + if ( ok ) { + + int64_t * const root_scratch = members[0]->m_scratch ; + + for ( int i = m_pool_rendezvous ; i < m_pool_reduce ; ++i ) { + root_scratch[i] = 0 ; + } + + { + HostThreadTeamData ** const pool = + (HostThreadTeamData **) (root_scratch + m_pool_members); + + // team size == 1, league size == pool_size + + for ( int rank = 0 ; rank < size ; ++rank ) { + HostThreadTeamData * const mem = members[ rank ] ; + mem->m_pool_scratch = root_scratch ; + mem->m_team_scratch = mem->m_scratch ; + mem->m_pool_rank = rank ; + mem->m_pool_size = size ; + mem->m_team_base = rank ; + mem->m_team_rank = 0 ; + mem->m_team_size = 1 ; + mem->m_team_alloc = 1 ; + mem->m_league_rank = rank ; + mem->m_league_size = size ; + mem->m_pool_rendezvous_step = 0 ; + mem->m_team_rendezvous_step = 0 ; + pool[ rank ] = mem ; + } + } + + Kokkos::memory_fence(); + } + else { + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_pool ERROR pool already exists"); + } +} + +void HostThreadTeamData::disband_pool() +{ + m_work_range.first = -1 ; + m_work_range.second = -1 ; + m_pool_scratch = 0 ; + m_team_scratch = 0 ; + m_pool_rank = 0 ; + m_pool_size = 1 ; + m_team_base = 0 ; + m_team_rank = 0 ; + m_team_size = 1 ; + m_team_alloc = 1 ; + m_league_rank = 0 ; + m_league_size = 1 ; + m_pool_rendezvous_step = 0 ; + m_team_rendezvous_step = 0 ; +} + +int HostThreadTeamData::organize_team( const int team_size ) +{ + // Pool is initialized + const bool ok_pool = 0 != m_pool_scratch ; + + // Team is not set + const bool ok_team = + m_team_scratch == m_scratch && + m_team_base == m_pool_rank && + m_team_rank == 0 && + m_team_size == 1 && + m_team_alloc == 1 && + m_league_rank == m_pool_rank && + m_league_size == m_pool_size ; + + if ( ok_pool && ok_team ) { + + if ( team_size <= 0 ) return 0 ; // No teams to organize + + if ( team_size == 1 ) return 1 ; // Already organized in teams of one + + HostThreadTeamData * const * const pool = + (HostThreadTeamData **) (m_pool_scratch + m_pool_members); + + // "league_size" in this context is the number of concurrent teams + // that the pool can accommodate. Excess threads are idle. + const int league_size = m_pool_size / team_size ; + const int team_alloc_size = m_pool_size / league_size ; + const int team_alloc_rank = m_pool_rank % team_alloc_size ; + const int league_rank = m_pool_rank / team_alloc_size ; + const int team_base_rank = league_rank * team_alloc_size ; + + m_team_scratch = pool[ team_base_rank ]->m_scratch ; + m_team_base = team_base_rank ; + // This needs to check overflow, if m_pool_size % team_alloc_size !=0 + // there are two corner cases: + // (i) if team_alloc_size == team_size there might be a non-full + // zombi team around (for example m_pool_size = 5 and team_size = 2 + // (ii) if team_alloc > team_size then the last team might have less + // threads than the others + m_team_rank = ( team_base_rank + team_size <= m_pool_size ) && + ( team_alloc_rank < team_size ) ? + team_alloc_rank : -1; + m_team_size = team_size ; + m_team_alloc = team_alloc_size ; + m_league_rank = league_rank ; + m_league_size = league_size ; + m_team_rendezvous_step = 0 ; + + if ( team_base_rank == m_pool_rank ) { + // Initialize team's rendezvous memory + for ( int i = m_team_rendezvous ; i < m_pool_reduce ; ++i ) { + m_scratch[i] = 0 ; + } + // Make sure team's rendezvous memory initialized + // is written before proceeding. + Kokkos::memory_fence(); + } + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + if ( pool_rendezvous() ) { + pool_rendezvous_release(); + } + } + else { + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_team ERROR"); + } + + return 0 <= m_team_rank ; +} + +void HostThreadTeamData::disband_team() +{ + m_team_scratch = m_scratch ; + m_team_base = m_pool_rank ; + m_team_rank = 0 ; + m_team_size = 1 ; + m_team_alloc = 1 ; + m_league_rank = m_pool_rank ; + m_league_size = m_pool_size ; + m_team_rendezvous_step = 0 ; +} + +//---------------------------------------------------------------------------- +/* pattern for rendezvous + * + * if ( rendezvous() ) { + * ... all other threads are still in team_rendezvous() ... + * rendezvous_release(); + * ... all other threads are released from team_rendezvous() ... + * } + */ + +int HostThreadTeamData::rendezvous( int64_t * const buffer + , int & rendezvous_step + , int const size + , int const rank ) noexcept +{ + enum : int { shift_byte = 3 }; + enum : int { size_byte = ( 01 << shift_byte ) }; // == 8 + enum : int { mask_byte = size_byte - 1 }; + + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + + // Cycle step values: 1 <= step <= size_val_cycle + // An odd multiple of memory cycle so that when a memory location + // is reused it has a different value. + // Must be representable within a single byte: size_val_cycle < 16 + + enum : int { size_val_cycle = 3 * size_mem_cycle }; + + // Requires: + // Called by rank = [ 0 .. size ) + // buffer aligned to int64_t[4] + + // A sequence of rendezvous uses four cycled locations in memory + // and non-equal cycled synchronization values to + // 1) prevent rendezvous from overtaking one another and + // 2) give each spin wait location an int64_t[4] span + // so that it has its own cache line. + + const int step = ( rendezvous_step % size_val_cycle ) + 1 ; + + rendezvous_step = step ; + + // The leading int64_t[4] span is for thread 0 to write + // and all other threads to read spin-wait. + // sync_offset is the index into this array for this step. + + const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ; + + union { + int64_t full ; + int8_t byte[8] ; + } value ; + + if ( rank ) { + + const int group_begin = rank << shift_byte ; // == rank * size_byte + + if ( group_begin < size ) { + + // This thread waits for threads + // [ group_begin .. group_begin + 8 ) + // [ rank*8 .. rank*8 + 8 ) + // to write to their designated bytes. + + const int end = group_begin + size_byte < size + ? size_byte : size - group_begin ; + + value.full = 0 ; + for ( int i = 0 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + + store_fence(); // This should not be needed but fixes #742 + + spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] + , value.full ); + } + + { + // This thread sets its designated byte. + // ( rank % size_byte ) + + // ( ( rank / size_byte ) * size_byte * size_mem_cycle ) + + // ( sync_offset * size_byte ) + const int offset = ( rank & mask_byte ) + + ( ( rank & ~mask_byte ) << shift_mem_cycle ) + + ( sync_offset << shift_byte ); + + // All of this thread's previous memory stores must be complete before + // this thread stores the step value at this thread's designated byte + // in the shared synchronization array. + + Kokkos::memory_fence(); + + ((volatile int8_t*) buffer)[ offset ] = int8_t( step ); + + // Memory fence to push the previous store out + Kokkos::memory_fence(); + } + + // Wait for thread 0 to release all other threads + + spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) ); + + } + else { + // Thread 0 waits for threads [1..7] + // to write to their designated bytes. + + const int end = size_byte < size ? 8 : size ; + + value.full = 0 ; + for ( int i = 1 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + + spinwait_until_equal( buffer[ sync_offset ], value.full ); + } + + return rank ? 0 : 1 ; +} + +void HostThreadTeamData:: + rendezvous_release( int64_t * const buffer + , int const rendezvous_step ) noexcept +{ + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + + // Requires: + // Called after team_rendezvous + // Called only by true == team_rendezvous(root) + + // Memory fence to be sure all previous writes are complete: + Kokkos::memory_fence(); + + ((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] = + int64_t( rendezvous_step ); + + // Memory fence to push the store out + Kokkos::memory_fence(); +} + +//---------------------------------------------------------------------------- + +int HostThreadTeamData::get_work_stealing() noexcept +{ + pair_int_t w( -1 , -1 ); + + if ( 1 == m_team_size || team_rendezvous() ) { + + // Attempt first from beginning of my work range + for ( int attempt = m_work_range.first < m_work_range.second ; attempt ; ) { + + // Query and attempt to update m_work_range + // from: [ w.first , w.second ) + // to: [ w.first + 1 , w.second ) = w_new + // + // If w is invalid then is just a query. + + const pair_int_t w_new( w.first + 1 , w.second ); + + w = Kokkos::atomic_compare_exchange( & m_work_range, w, w_new ); + + if ( w.first < w.second ) { + // m_work_range is viable + + // If steal is successful then don't repeat attempt to steal + attempt = ! ( w_new.first == w.first + 1 && + w_new.second == w.second ); + } + else { + // m_work_range is not viable + w.first = -1 ; + w.second = -1 ; + + attempt = 0 ; + } + } + + if ( w.first == -1 && m_steal_rank != m_pool_rank ) { + + HostThreadTeamData * const * const pool = + (HostThreadTeamData**)( m_pool_scratch + m_pool_members ); + + // Attempt from begining failed, try to steal from end of neighbor + + pair_int_t volatile * steal_range = + & ( pool[ m_steal_rank ]->m_work_range ); + + for ( int attempt = true ; attempt ; ) { + + // Query and attempt to update steal_work_range + // from: [ w.first , w.second ) + // to: [ w.first , w.second - 1 ) = w_new + // + // If w is invalid then is just a query. + + const pair_int_t w_new( w.first , w.second - 1 ); + + w = Kokkos::atomic_compare_exchange( steal_range, w, w_new ); + + if ( w.first < w.second ) { + // steal_work_range is viable + + // If steal is successful then don't repeat attempt to steal + attempt = ! ( w_new.first == w.first && + w_new.second == w.second - 1 ); + } + else { + // steal_work_range is not viable, move to next member + w.first = -1 ; + w.second = -1 ; + + // We need to figure out whether the next team is active + // m_steal_rank + m_team_alloc could be the next base_rank to steal from + // but only if there are another m_team_size threads available so that that + // base rank has a full team. + m_steal_rank = m_steal_rank + m_team_alloc + m_team_size <= m_pool_size ? + m_steal_rank + m_team_alloc : 0; + + steal_range = & ( pool[ m_steal_rank ]->m_work_range ); + + // If tried all other members then don't repeat attempt to steal + attempt = m_steal_rank != m_pool_rank ; + } + } + + if ( w.first != -1 ) w.first = w.second - 1 ; + } + + if ( 1 < m_team_size ) { + // Must share the work index + *((int volatile *) team_reduce()) = w.first ; + + team_rendezvous_release(); + } + } + else if ( 1 < m_team_size ) { + w.first = *((int volatile *) team_reduce()); + } + + // May exit because successfully stole work and w is good. + // May exit because no work left to steal and w = (-1,-1). + +#if 0 +fprintf(stdout,"HostThreadTeamData::get_work_stealing() pool(%d of %d) %d\n" + , m_pool_rank , m_pool_size , w.first ); +fflush(stdout); +#endif + + return w.first ; +} + +} // namespace Impl +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp new file mode 100644 index 0000000000..6b5918eaef --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -0,0 +1,1090 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_HOSTTHREADTEAM_HPP +#define KOKKOS_IMPL_HOSTTHREADTEAM_HPP + +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class HostExecSpace > +class HostThreadTeamMember ; + +class HostThreadTeamData { +public: + + template< class > friend class HostThreadTeamMember ; + + // Assume upper bounds on number of threads: + // pool size <= 1024 threads + // pool rendezvous <= ( 1024 / 8 ) * 4 + 4 = 2052 + // team size <= 64 threads + // team rendezvous <= ( 64 / 8 ) * 4 + 4 = 36 + + enum : int { max_pool_members = 1024 }; + enum : int { max_team_members = 64 }; + enum : int { max_pool_rendezvous = ( max_pool_members / 8 ) * 4 + 4 }; + enum : int { max_team_rendezvous = ( max_team_members / 8 ) * 4 + 4 }; + +private: + + // per-thread scratch memory buffer chunks: + // + // [ pool_members ] = [ m_pool_members .. m_pool_rendezvous ) + // [ pool_rendezvous ] = [ m_pool_rendezvous .. m_team_rendezvous ) + // [ team_rendezvous ] = [ m_team_rendezvous .. m_pool_reduce ) + // [ pool_reduce ] = [ m_pool_reduce .. m_team_reduce ) + // [ team_reduce ] = [ m_team_reduce .. m_team_shared ) + // [ team_shared ] = [ m_team_shared .. m_thread_local ) + // [ thread_local ] = [ m_thread_local .. m_scratch_size ) + + enum : int { m_pool_members = 0 }; + enum : int { m_pool_rendezvous = m_pool_members + max_pool_members }; + enum : int { m_team_rendezvous = m_pool_rendezvous + max_pool_rendezvous }; + enum : int { m_pool_reduce = m_team_rendezvous + max_team_rendezvous }; + + using pair_int_t = Kokkos::pair ; + + pair_int_t m_work_range ; + int64_t m_work_end ; + int64_t * m_scratch ; // per-thread buffer + int64_t * m_pool_scratch ; // == pool[0]->m_scratch + int64_t * m_team_scratch ; // == pool[ 0 + m_team_base ]->m_scratch + int m_pool_rank ; + int m_pool_size ; + int m_team_reduce ; + int m_team_shared ; + int m_thread_local ; + int m_scratch_size ; + int m_team_base ; + int m_team_rank ; + int m_team_size ; + int m_team_alloc ; + int m_league_rank ; + int m_league_size ; + int m_work_chunk ; + int m_steal_rank ; // work stealing rank + int mutable m_pool_rendezvous_step ; + int mutable m_team_rendezvous_step ; + + HostThreadTeamData * team_member( int r ) const noexcept + { return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; } + + // Rendezvous pattern: + // if ( rendezvous(root) ) { + // ... only root thread here while all others wait ... + // rendezvous_release(); + // } + // else { + // ... all other threads release here ... + // } + // + // Requires: buffer[ ( max_threads / 8 ) * 4 + 4 ]; 0 == max_threads % 8 + // + static + int rendezvous( int64_t * const buffer + , int & rendezvous_step + , int const size + , int const rank ) noexcept ; + + static + void rendezvous_release( int64_t * const buffer + , int const rendezvous_step ) noexcept ; + +public: + + inline + int team_rendezvous( int const root ) const noexcept + { + return 1 == m_team_size ? 1 : + rendezvous( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step + , m_team_size + , ( m_team_rank + m_team_size - root ) % m_team_size ); + } + + inline + int team_rendezvous() const noexcept + { + return 1 == m_team_size ? 1 : + rendezvous( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step + , m_team_size + , m_team_rank ); + } + + inline + void team_rendezvous_release() const noexcept + { + if ( 1 < m_team_size ) { + rendezvous_release( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step ); + } + } + + inline + int pool_rendezvous() const noexcept + { + return 1 == m_pool_size ? 1 : + rendezvous( m_pool_scratch + m_pool_rendezvous + , m_pool_rendezvous_step + , m_pool_size + , m_pool_rank ); + } + + inline + void pool_rendezvous_release() const noexcept + { + if ( 1 < m_pool_size ) { + rendezvous_release( m_pool_scratch + m_pool_rendezvous + , m_pool_rendezvous_step ); + } + } + + //---------------------------------------- + + constexpr HostThreadTeamData() noexcept + : m_work_range(-1,-1) + , m_work_end(0) + , m_scratch(0) + , m_pool_scratch(0) + , m_team_scratch(0) + , m_pool_rank(0) + , m_pool_size(1) + , m_team_reduce(0) + , m_team_shared(0) + , m_thread_local(0) + , m_scratch_size(0) + , m_team_base(0) + , m_team_rank(0) + , m_team_size(1) + , m_team_alloc(1) + , m_league_rank(0) + , m_league_size(1) + , m_work_chunk(0) + , m_steal_rank(0) + , m_pool_rendezvous_step(0) + , m_team_rendezvous_step(0) + {} + + //---------------------------------------- + // Organize array of members into a pool. + // The 0th member is the root of the pool. + // Requires: members are not already in a pool. + // Requires: called by one thread. + // Pool members are ordered as "close" - sorted by NUMA and then CORE + // Each thread is its own team with team_size == 1. + static void organize_pool( HostThreadTeamData * members[] + , const int size ); + + // Called by each thread within the pool + void disband_pool(); + + //---------------------------------------- + // Each thread within a pool organizes itself into a team. + // Must be called by all threads of the pool. + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + // + // Return true if a valid member of a team. + // Return false if not a member and thread should be idled. + int organize_team( const int team_size ); + + // Each thread within a pool disbands itself from current team. + // Each thread becomes its own team with team_size == 1. + // Must be called by all threads of the pool. + void disband_team(); + + //---------------------------------------- + + constexpr int pool_rank() const { return m_pool_rank ; } + constexpr int pool_size() const { return m_pool_size ; } + + HostThreadTeamData * pool_member( int r ) const noexcept + { return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[r]; } + + //---------------------------------------- + +private: + + enum : int { mask_to_16 = 0x0f }; // align to 16 bytes + enum : int { shift_to_8 = 3 }; // size to 8 bytes + +public: + + static constexpr int align_to_int64( int n ) + { return ( ( n + mask_to_16 ) & ~mask_to_16 ) >> shift_to_8 ; } + + constexpr int pool_reduce_bytes() const + { return m_scratch_size ? sizeof(int64_t) * ( m_team_reduce - m_pool_reduce ) : 0 ; } + + constexpr int team_reduce_bytes() const + { return sizeof(int64_t) * ( m_team_shared - m_team_reduce ); } + + constexpr int team_shared_bytes() const + { return sizeof(int64_t) * ( m_thread_local - m_team_shared ); } + + constexpr int thread_local_bytes() const + { return sizeof(int64_t) * ( m_scratch_size - m_thread_local ); } + + constexpr int scratch_bytes() const + { return sizeof(int64_t) * m_scratch_size ; } + + // Memory chunks: + + int64_t * scratch_buffer() const noexcept + { return m_scratch ; } + + int64_t * pool_reduce() const noexcept + { return m_pool_scratch + m_pool_reduce ; } + + int64_t * pool_reduce_local() const noexcept + { return m_scratch + m_pool_reduce ; } + + int64_t * team_reduce() const noexcept + { return m_team_scratch + m_team_reduce ; } + + int64_t * team_reduce_local() const noexcept + { return m_scratch + m_team_reduce ; } + + int64_t * team_shared() const noexcept + { return m_team_scratch + m_team_shared ; } + + int64_t * local_scratch() const noexcept + { return m_scratch + m_thread_local ; } + + // Given: + // pool_reduce_size = number bytes for pool reduce + // team_reduce_size = number bytes for team reduce + // team_shared_size = number bytes for team shared memory + // thread_local_size = number bytes for thread local memory + // Return: + // total number of bytes that must be allocated + static + size_t scratch_size( int pool_reduce_size + , int team_reduce_size + , int team_shared_size + , int thread_local_size ) + { + pool_reduce_size = align_to_int64( pool_reduce_size ); + team_reduce_size = align_to_int64( team_reduce_size ); + team_shared_size = align_to_int64( team_shared_size ); + thread_local_size = align_to_int64( thread_local_size ); + + const size_t total_bytes = ( + m_pool_reduce + + pool_reduce_size + + team_reduce_size + + team_shared_size + + thread_local_size ) * sizeof(int64_t); + + return total_bytes ; + } + + // Given: + // alloc_ptr = pointer to allocated memory + // alloc_size = number bytes of allocated memory + // pool_reduce_size = number bytes for pool reduce/scan operations + // team_reduce_size = number bytes for team reduce/scan operations + // team_shared_size = number bytes for team-shared memory + // thread_local_size = number bytes for thread-local memory + // Return: + // total number of bytes that must be allocated + void scratch_assign( void * const alloc_ptr + , size_t const alloc_size + , int pool_reduce_size + , int team_reduce_size + , int team_shared_size + , int /* thread_local_size */ ) + { + pool_reduce_size = align_to_int64( pool_reduce_size ); + team_reduce_size = align_to_int64( team_reduce_size ); + team_shared_size = align_to_int64( team_shared_size ); + // thread_local_size = align_to_int64( thread_local_size ); + + m_scratch = (int64_t *) alloc_ptr ; + m_team_reduce = m_pool_reduce + pool_reduce_size ; + m_team_shared = m_team_reduce + team_reduce_size ; + m_thread_local = m_team_shared + team_shared_size ; + m_scratch_size = align_to_int64( alloc_size ); + +#if 0 +fprintf(stdout,"HostThreadTeamData::scratch_assign { %d %d %d %d %d %d %d }\n" + , int(m_pool_members) + , int(m_pool_rendezvous) + , int(m_pool_reduce) + , int(m_team_reduce) + , int(m_team_shared) + , int(m_thread_local) + , int(m_scratch_size) + ); +fflush(stdout); +#endif + + } + + //---------------------------------------- + // Get a work index within the range. + // First try to steal from beginning of own teams's partition. + // If that fails then try to steal from end of another teams' partition. + int get_work_stealing() noexcept ; + + //---------------------------------------- + // Set the initial work partitioning of [ 0 .. length ) among the teams + // with granularity of chunk + + void set_work_partition( int64_t const length + , int const chunk ) noexcept + { + // Minimum chunk size to insure that + // m_work_end < std::numeric_limits::max() * m_work_chunk + + int const chunk_min = ( length + std::numeric_limits::max() ) + / std::numeric_limits::max(); + + m_work_end = length ; + m_work_chunk = std::max( chunk , chunk_min ); + + // Number of work chunks and partitioning of that number: + int const num = ( m_work_end + m_work_chunk - 1 ) / m_work_chunk ; + int const part = ( num + m_league_size - 1 ) / m_league_size ; + + m_work_range.first = part * m_league_rank ; + m_work_range.second = m_work_range.first + part ; + + // Steal from next team, round robin + // The next team is offset by m_team_alloc if it fits in the pool. + + m_steal_rank = m_team_base + m_team_alloc + m_team_size <= m_pool_size ? + m_team_base + m_team_alloc : 0 ; + } + + std::pair get_work_partition() noexcept + { + return std::pair + ( m_work_range.first * m_work_chunk + , m_work_range.second * m_work_chunk < m_work_end + ? m_work_range.second * m_work_chunk : m_work_end ); + } + + std::pair get_work_stealing_chunk() noexcept + { + std::pair x(-1,-1); + + const int i = get_work_stealing(); + + if ( 0 <= i ) { + x.first = m_work_chunk * i ; + x.second = x.first + m_work_chunk < m_work_end + ? x.first + m_work_chunk : m_work_end ; + } + + return x ; + } +}; + +//---------------------------------------------------------------------------- + +template< class HostExecSpace > +class HostThreadTeamMember { +public: + + using scratch_memory_space = typename HostExecSpace::scratch_memory_space ; + +private: + + scratch_memory_space m_scratch ; + HostThreadTeamData & m_data ; + int const m_league_rank ; + int const m_league_size ; + +public: + + constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept + : m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() ) + , m_data( arg_data ) + , m_league_rank(0) + , m_league_size(1) + {} + + constexpr HostThreadTeamMember( HostThreadTeamData & arg_data + , int const arg_league_rank + , int const arg_league_size + ) noexcept + : m_scratch( arg_data.team_shared() + , arg_data.team_shared_bytes() + , arg_data.team_shared() + , arg_data.team_shared_bytes() ) + , m_data( arg_data ) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) + {} + + ~HostThreadTeamMember() = default ; + HostThreadTeamMember() = delete ; + HostThreadTeamMember( HostThreadTeamMember && ) = default ; + HostThreadTeamMember( HostThreadTeamMember const & ) = default ; + HostThreadTeamMember & operator = ( HostThreadTeamMember && ) = default ; + HostThreadTeamMember & operator = ( HostThreadTeamMember const & ) = default ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int team_rank() const noexcept { return m_data.m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const noexcept { return m_data.m_team_size ; } + + KOKKOS_INLINE_FUNCTION + int league_rank() const noexcept { return m_league_rank ; } + + KOKKOS_INLINE_FUNCTION + int league_size() const noexcept { return m_league_size ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const + { return m_scratch.set_team_thread_mode(0,1,0); } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_scratch(int) const + { return m_scratch.set_team_thread_mode(0,1,0); } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & thread_scratch(int) const + { return m_scratch.set_team_thread_mode(0,m_data.m_team_size,m_data.m_team_rank); } + + //---------------------------------------- + // Team collectives + + KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( m_data.team_rendezvous() ) m_data.team_rendezvous_release(); + } +#else + {} +#endif + + template< class Closure > + KOKKOS_INLINE_FUNCTION + void team_barrier( Closure const & f ) const noexcept + { + if ( m_data.team_rendezvous() ) { + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + f(); + + m_data.team_rendezvous_release(); + } + } + + //-------------------------------------------------------------------------- + + template< typename T > + KOKKOS_INLINE_FUNCTION + void team_broadcast( T & value , const int source_team_rank ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 1 < m_data.m_team_size ) { + T volatile * const shared_value = (T*) m_data.team_reduce(); + + // Don't overwrite shared memory until all threads arrive + + if ( m_data.team_rendezvous( source_team_rank ) ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + *shared_value = value ; + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + value = *shared_value ; + } + } + } +#else + { Kokkos::abort("HostThreadTeamMember team_broadcast\n"); } +#endif + + //-------------------------------------------------------------------------- + + template< class Closure , typename T > + KOKKOS_INLINE_FUNCTION + void team_broadcast( Closure const & f , T & value , const int source_team_rank) const noexcept + { + T volatile * const shared_value = (T*) m_data.team_reduce(); + + // Don't overwrite shared memory until all threads arrive + + if ( m_data.team_rendezvous(source_team_rank) ) { + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + f( value ); + + if ( 1 < m_data.m_team_size ) { *shared_value = value ; } + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + value = *shared_value ; + } + } + + //-------------------------------------------------------------------------- + // team_reduce( Sum(result) ); + // team_reduce( Min(result) ); + // team_reduce( Max(result) ); + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 1 < m_data.m_team_size ) { + + using value_type = typename ReducerType::value_type ; + + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + reducer.copy( (value_type*) m_data.team_reduce_local() + , reducer.data() ); + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread sums contributed values + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + value_type * const src = + (value_type*) m_data.team_member(i)->team_reduce_local(); + + reducer.join( reducer.data() , src ); + } + + // Copy result to root member's buffer: + reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + // Copy from root member's buffer: + reducer.copy( reducer.data() , (value_type*) m_data.team_reduce() ); + } + } + } +#else + { Kokkos::abort("HostThreadTeamMember team_reduce\n"); } +#endif + + //-------------------------------------------------------------------------- + + template< typename ValueType , class JoinOp > + KOKKOS_INLINE_FUNCTION + ValueType + team_reduce( ValueType const & value + , JoinOp const & join ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + *((ValueType*) m_data.team_reduce_local()) = value ; + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + const Impl::Reducer< ValueType , JoinOp > reducer( join ); + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread sums contributed values + + ValueType * const dst = (ValueType*) m_data.team_reduce_local(); + + *dst = value ; + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + ValueType * const src = + (ValueType*) m_data.team_member(i)->team_reduce_local(); + + reducer.join( dst , src ); + } + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + + return *((ValueType*) m_data.team_reduce()); + } +#else + { Kokkos::abort("HostThreadTeamMember team_reduce\n"); return ValueType(); } +#endif + + + template< typename T > + KOKKOS_INLINE_FUNCTION + T team_scan( T const & value , T * const global = 0 ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + ((T*) m_data.team_reduce_local())[1] = value ; + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread scans contributed values + + { + T * prev = (T*) m_data.team_reduce_local(); + + prev[0] = 0 ; + prev[1] = value ; + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + T * const ptr = (T*) m_data.team_member(i)->team_reduce_local(); + + ptr[0] = prev[0] + prev[1] ; + + prev = ptr ; + } + } + + // If adding to global value then atomic_fetch_add to that value + // and sum previous value to every entry of the scan. + if ( global ) { + T * prev = (T*) m_data.team_reduce_local(); + + { + T * ptr = (T*) m_data.team_member( m_data.m_team_size - 1 )->team_reduce_local(); + prev[0] = Kokkos::atomic_fetch_add( global , ptr[0] + ptr[1] ); + } + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + T * ptr = (T*) m_data.team_member(i)->team_reduce_local(); + ptr[0] += prev[0] ; + } + } + + m_data.team_rendezvous_release(); + } + + return ((T*) m_data.team_reduce_local())[0]; + } +#else + { Kokkos::abort("HostThreadTeamMember team_scan\n"); return T(); } +#endif + +}; + + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct > +TeamThreadRange( Impl::HostThreadTeamMember const & member + , iType const & count ) +{ + return + Impl::TeamThreadRangeBoundariesStruct + >(member,0,count); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Impl::HostThreadTeamMember > +TeamThreadRange( Impl::HostThreadTeamMember const & member + , iType1 const & begin , iType2 const & end ) +{ + return + Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Impl::HostThreadTeamMember >( member , begin , end ); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct > +ThreadVectorRange + ( Impl::HostThreadTeamMember const & member + , const iType & count ) +{ + return Impl::ThreadVectorRangeBoundariesStruct >(member,count); +} + +//---------------------------------------------------------------------------- +/** \brief Inter-thread parallel_for. + * + * Executes lambda(iType i) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the the calling thread team. +*/ +template +KOKKOS_INLINE_FUNCTION +void parallel_for + ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries + , Closure const & closure + ) +{ + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure (i); + } +} + +template +KOKKOS_INLINE_FUNCTION +void parallel_for + ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries + , Closure const & closure + ) +{ + #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure (i); + } +} + +//---------------------------------------------------------------------------- + +template< typename iType, class Space, class Closure, class Reducer > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type +parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct > + const & loop_boundaries + , Closure const & closure + , Reducer const & reducer + ) +{ + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +template< typename iType, class Space, typename Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! Kokkos::is_reducer::value >::type +parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct > + const & loop_boundaries + , Closure const & closure + , ValueType & result + ) +{ + Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > reducer( & result ); + + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +template< typename iType, class Space + , class Closure, class Joiner , typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct > + const & loop_boundaries + , Closure const & closure + , Joiner const & joiner + , ValueType & result + ) +{ + Impl::Reducer< ValueType , Joiner > reducer( joiner , & result ); + + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +//---------------------------------------------------------------------------- +/** \brief Inter-thread vector parallel_reduce. + * + * Executes lambda(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template< typename iType, class Space , class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + const Lambda & lambda, + ValueType& result) +{ + result = ValueType(); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start ; + i < loop_boundaries.end ; + i += loop_boundaries.increment) { + lambda(i,result); + } +} + +/** \brief Intra-thread vector parallel_reduce. + * + * Executes lambda(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes of the the + * calling thread and a reduction of val is performed using + * JoinType(ValueType& val, const ValueType& update) + * and put into init_result. + * The input value of init_result is used as initializer for + * temporary variables of ValueType. Therefore * the input + * value should be the neutral element with respect to the + * join operation (e.g. '0 for +-' or * '1 for *'). + */ +template< typename iType, class Space + , class Lambda, class JoinType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& result) +{ +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start ; + i < loop_boundaries.end ; + i += loop_boundaries.increment ) { + lambda(i,result); + } +} + +//---------------------------------------------------------------------------- + +template< typename iType, class Space, class Closure > +KOKKOS_INLINE_FUNCTION +void parallel_scan + ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries + , Closure const & closure + ) +{ + // Extract ValueType from the closure + + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + + // Intra-member scan + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,accum,false); + } + + // 'accum' output is the exclusive prefix sum + accum = loop_boundaries.thread.team_scan(accum); + + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,accum,true); + } +} + + +template< typename iType, class Space, class ClosureType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries + , ClosureType const & closure + ) +{ + using value_type = typename + Kokkos::Impl::FunctorAnalysis + < Impl::FunctorPatternInterface::SCAN + , void + , ClosureType >::value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,scan_val,true); + } +} + +//---------------------------------------------------------------------------- + +template< class Space > +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct > +PerTeam(const Impl::HostThreadTeamMember & member ) +{ + return Impl::ThreadSingleStruct >(member); +} + +template< class Space > +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct > +PerThread(const Impl::HostThreadTeamMember & member) +{ + return Impl::VectorSingleStruct >(member); +} + +template< class Space , class FunctorType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor ) +{ + if ( single.team_member.team_rank() == 0 ) functor(); + // 'single' does not perform a barrier. + // single.team_member.team_barrier( functor ); +} + +template< class Space , class FunctorType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor , ValueType & val ) +{ + single.team_member.team_broadcast( functor , val , 0 ); +} + +template< class Space , class FunctorType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor ) +{ + functor(); +} + +template< class Space , class FunctorType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor , ValueType & val ) +{ + functor(val); +} + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_IMPL_HOSTTHREADTEAM_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index 84cf536bb7..7489018ac6 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -52,6 +52,10 @@ void memory_fence() { #if defined( __CUDA_ARCH__ ) __threadfence(); +#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) + asm volatile ( + "mfence" ::: "memory" + ); #elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \ ( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) ) __sync_synchronize(); @@ -76,8 +80,8 @@ void store_fence() { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) asm volatile ( - "sfence" ::: "memory" - ); + "sfence" ::: "memory" + ); #else memory_fence(); #endif @@ -93,8 +97,8 @@ void load_fence() { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) asm volatile ( - "lfence" ::: "memory" - ); + "lfence" ::: "memory" + ); #else memory_fence(); #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp index da95c943fe..5852efb011 100644 --- a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp @@ -129,8 +129,8 @@ #endif #ifdef KOKKOS_HAVE_CUDA_RDC -#ifndef KOKKOS_ENABLE_CUDA_RDC -#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC +#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE +#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_HAVE_CUDA_RDC #endif #endif @@ -242,9 +242,9 @@ #endif #endif -#ifdef KOKKOS_HAVE_QTHREAD -#ifndef KOKKOS_ENABLE_QTHREAD -#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD +#ifdef KOKKOS_HAVE_QTHREADS +#ifndef KOKKOS_ENABLE_QTHREADS +#define KOKKOS_ENABLE_QTHREADS KOKKOS_HAVE_QTHREADS #endif #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 99c5df4db3..0c006a8c00 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -43,7 +43,7 @@ #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include namespace Kokkos { @@ -84,21 +84,21 @@ namespace Kokkos { (*endScanCallee)(kernelID); } } - + void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { if(NULL != beginReduceCallee) { Kokkos::fence(); (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); } } - + void endParallelReduce(const uint64_t kernelID) { if(NULL != endReduceCallee) { Kokkos::fence(); (*endReduceCallee)(kernelID); } } - + void pushRegion(const std::string& kName) { if( NULL != pushRegionCallee ) { diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 3d6a389252..139a20d8f9 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -50,7 +50,7 @@ #include #include -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include #include #include @@ -59,7 +59,7 @@ #define KOKKOSP_INTERFACE_VERSION 20150628 -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) namespace Kokkos { namespace Profiling { diff --git a/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp b/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp new file mode 100644 index 0000000000..b3ed5f1514 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp @@ -0,0 +1,317 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_REDUCER_HPP +#define KOKKOS_IMPL_REDUCER_HPP + +#include + +//---------------------------------------------------------------------------- +/* Reducer abstraction: + * 1) Provides 'join' operation + * 2) Provides 'init' operation + * 3) Provides 'copy' operation + * 4) Optionally provides result value in a memory space + * + * Created from: + * 1) Functor::operator()( destination , source ) + * 2) Functor::{ join , init ) + */ +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename value_type > +struct ReduceSum +{ + KOKKOS_INLINE_FUNCTION static + void copy( value_type & dest + , value_type const & src ) noexcept + { dest = src ; } + + KOKKOS_INLINE_FUNCTION static + void init( value_type & dest ) noexcept + { new( &dest ) value_type(); } + + KOKKOS_INLINE_FUNCTION static + void join( value_type volatile & dest + , value_type const volatile & src ) noexcept + { dest += src ; } + + KOKKOS_INLINE_FUNCTION static + void join( value_type & dest + , value_type const & src ) noexcept + { dest += src ; } +}; + +template< typename T + , class ReduceOp = ReduceSum< T > + , typename MemorySpace = void > +struct Reducer + : private ReduceOp + , private integral_nonzero_constant + < int , ( std::rank::value == 1 ? std::extent::value : 1 )> +{ +private: + + // Determine if T is simple array + + enum : int { rank = std::rank::value }; + + static_assert( rank <= 1 , "Kokkos::Impl::Reducer type is at most rank-one" ); + + using length_t = + integral_nonzero_constant::value : 1 )> ; + +public: + + using reducer = Reducer ; + using memory_space = MemorySpace ; + using value_type = typename std::remove_extent::type ; + using reference_type = + typename std::conditional< ( rank != 0 ) + , value_type * + , value_type & + >::type ; +private: + + //-------------------------------------------------------------------------- + // Determine what functions 'ReduceOp' provides: + // copy( destination , source ) + // init( destination ) + // + // operator()( destination , source ) + // join( destination , source ) + // + // Provide defaults for missing optional operations + + template< class R , typename = void> + struct COPY { + KOKKOS_INLINE_FUNCTION static + void copy( R const & + , value_type * dst + , value_type const * src ) { *dst = *src ; } + }; + + template< class R > + struct COPY< R , decltype( ((R*)0)->copy( *((value_type*)0) + , *((value_type const *)0) ) ) > + { + KOKKOS_INLINE_FUNCTION static + void copy( R const & r + , value_type * dst + , value_type const * src ) { r.copy( *dst , *src ); } + }; + + template< class R , typename = void > + struct INIT { + KOKKOS_INLINE_FUNCTION static + void init( R const & , value_type * dst ) { new(dst) value_type(); } + }; + + template< class R > + struct INIT< R , decltype( ((R*)0)->init( *((value_type*)0 ) ) ) > + { + KOKKOS_INLINE_FUNCTION static + void init( R const & r , value_type * dst ) { r.init( *dst ); } + }; + + template< class R , typename V , typename = void > struct JOIN + { + // If no join function then try operator() + KOKKOS_INLINE_FUNCTION static + void join( R const & r , V * dst , V const * src ) + { r.operator()(*dst,*src); } + }; + + template< class R , typename V > + struct JOIN< R , V , decltype( ((R*)0)->join ( *((V *)0) , *((V const *)0) ) ) > + { + // If has join function use it + KOKKOS_INLINE_FUNCTION static + void join( R const & r , V * dst , V const * src ) + { r.join(*dst,*src); } + }; + + //-------------------------------------------------------------------------- + + value_type * const m_result ; + + template< int Rank > + KOKKOS_INLINE_FUNCTION + static constexpr + typename std::enable_if< ( 0 != Rank ) , reference_type >::type + ref( value_type * p ) noexcept { return p ; } + + template< int Rank > + KOKKOS_INLINE_FUNCTION + static constexpr + typename std::enable_if< ( 0 == Rank ) , reference_type >::type + ref( value_type * p ) noexcept { return *p ; } + +public: + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr int length() const noexcept + { return length_t::value ; } + + KOKKOS_INLINE_FUNCTION + value_type * data() const noexcept + { return m_result ; } + + KOKKOS_INLINE_FUNCTION + reference_type reference() const noexcept + { return Reducer::template ref< rank >( m_result ); } + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void copy( value_type * const dest + , value_type const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template COPY::copy( (ReduceOp &) *this , dest + i , src + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void init( value_type * dest ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template INIT::init( (ReduceOp &) *this , dest + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void join( value_type * const dest + , value_type const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template JOIN::join( (ReduceOp &) *this , dest + i , src + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void join( value_type volatile * const dest + , value_type volatile const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template JOIN::join( (ReduceOp &) *this , dest + i , src + i ); + } + } + + //-------------------------------------------------------------------------- + + template< typename ArgT > + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer + ( ArgT * arg_value + , typename std::enable_if + < std::is_same::value && + std::is_default_constructible< ReduceOp >::value + , int >::type arg_length = 1 + ) noexcept + : ReduceOp(), length_t( arg_length ), m_result( arg_value ) {} + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( ReduceOp const & arg_op + , value_type * arg_value = 0 + , int arg_length = 1 ) noexcept + : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( ReduceOp && arg_op + , value_type * arg_value = 0 + , int arg_length = 1 ) noexcept + : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} + + Reducer( Reducer const & ) = default ; + Reducer( Reducer && ) = default ; + Reducer & operator = ( Reducer const & ) = default ; + Reducer & operator = ( Reducer && ) = default ; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename ValueType > +constexpr +Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > +Sum( ValueType & arg_value ) +{ + static_assert( std::is_trivial::value + , "Kokkos reducer requires trivial value type" ); + return Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & arg_value ); +} + +template< typename ValueType > +constexpr +Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > > +Sum( ValueType * arg_value , int arg_length ) +{ + static_assert( std::is_trivial::value + , "Kokkos reducer requires trivial value type" ); + return Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > >( arg_value , arg_length ); +} + +//---------------------------------------------------------------------------- + +template< typename ValueType , class JoinType > +Impl::Reducer< ValueType , JoinType > +reducer( ValueType & value , JoinType const & lambda ) +{ + return Impl::Reducer< ValueType , JoinType >( lambda , & value ); +} + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_IMPL_REDUCER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp index 76161c10f1..7949613306 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -53,63 +53,126 @@ namespace Kokkos { namespace Impl { -namespace SerialImpl { +namespace { -Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {} +HostThreadTeamData g_serial_thread_team_data ; -Sentinel::~Sentinel() -{ - if ( m_scratch ) { free( m_scratch ); } - m_scratch = 0 ; - m_reduce_end = 0 ; - m_shared_end = 0 ; } -Sentinel & Sentinel::singleton() +// Resize thread team data scratch memory +void serial_resize_thread_team_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ) { - static Sentinel s ; return s ; + if ( pool_reduce_bytes < 512 ) pool_reduce_bytes = 512 ; + if ( team_reduce_bytes < 512 ) team_reduce_bytes = 512 ; + + const size_t old_pool_reduce = g_serial_thread_team_data.pool_reduce_bytes(); + const size_t old_team_reduce = g_serial_thread_team_data.team_reduce_bytes(); + const size_t old_team_shared = g_serial_thread_team_data.team_shared_bytes(); + const size_t old_thread_local = g_serial_thread_team_data.thread_local_bytes(); + const size_t old_alloc_bytes = g_serial_thread_team_data.scratch_bytes(); + + // Allocate if any of the old allocation is tool small: + + const bool allocate = ( old_pool_reduce < pool_reduce_bytes ) || + ( old_team_reduce < team_reduce_bytes ) || + ( old_team_shared < team_shared_bytes ) || + ( old_thread_local < thread_local_bytes ); + + if ( allocate ) { + + Kokkos::HostSpace space ; + + if ( old_alloc_bytes ) { + g_serial_thread_team_data.disband_team(); + g_serial_thread_team_data.disband_pool(); + + space.deallocate( g_serial_thread_team_data.scratch_buffer() + , g_serial_thread_team_data.scratch_bytes() ); + } + + if ( pool_reduce_bytes < old_pool_reduce ) { pool_reduce_bytes = old_pool_reduce ; } + if ( team_reduce_bytes < old_team_reduce ) { team_reduce_bytes = old_team_reduce ; } + if ( team_shared_bytes < old_team_shared ) { team_shared_bytes = old_team_shared ; } + if ( thread_local_bytes < old_thread_local ) { thread_local_bytes = old_thread_local ; } + + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + void * const ptr = space.allocate( alloc_bytes ); + + g_serial_thread_team_data. + scratch_assign( ((char *)ptr) + , alloc_bytes + , pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + HostThreadTeamData * pool[1] = { & g_serial_thread_team_data }; + + g_serial_thread_team_data.organize_pool( pool , 1 ); + g_serial_thread_team_data.organize_team(1); + } } -inline -unsigned align( unsigned n ) +// Get thread team data structure for omp_get_thread_num() +HostThreadTeamData * serial_get_thread_team_data() { - enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 }; - return ( n + MASK ) & ~MASK ; + return & g_serial_thread_team_data ; } -} // namespace - -SerialTeamMember::SerialTeamMember( int arg_league_rank - , int arg_league_size - , int arg_shared_size - ) - : m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end - , arg_shared_size ) - , m_league_rank( arg_league_rank ) - , m_league_size( arg_league_size ) -{} - } // namespace Impl +} // namespace Kokkos -void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size ) +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +int Serial::is_initialized() { - static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton(); + return 1 ; +} - reduce_size = Impl::SerialImpl::align( reduce_size ); - shared_size = Impl::SerialImpl::align( shared_size ); +void Serial::initialize( unsigned threads_count + , unsigned use_numa_count + , unsigned use_cores_per_numa + , bool allow_asynchronous_threadpool ) +{ + (void) threads_count; + (void) use_numa_count; + (void) use_cores_per_numa; + (void) allow_asynchronous_threadpool; - if ( ( s.m_reduce_end < reduce_size ) || - ( s.m_shared_end < s.m_reduce_end + shared_size ) ) { + // Init the array of locks used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + #if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif +} - if ( s.m_scratch ) { free( s.m_scratch ); } +void Serial::finalize() +{ + if ( Impl::g_serial_thread_team_data.scratch_buffer() ) { + Impl::g_serial_thread_team_data.disband_team(); + Impl::g_serial_thread_team_data.disband_pool(); - if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ; - if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ; + Kokkos::HostSpace space ; - s.m_scratch = malloc( s.m_shared_end ); + space.deallocate( Impl::g_serial_thread_team_data.scratch_buffer() + , Impl::g_serial_thread_team_data.scratch_bytes() ); + + Impl::g_serial_thread_team_data.scratch_assign( (void*) 0, 0, 0, 0, 0, 0 ); } - return s.m_scratch ; + #if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif } } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index 19f3abe71a..d22d604fbc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -62,11 +62,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member exec ; + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + Member exec( *data ); // Loop until all queues are empty while ( 0 < queue->m_ready_count ) { @@ -75,13 +77,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } if ( end != task ) { - // pop_task resulted in lock == task->m_next + // pop_ready_task resulted in lock == task->m_next // In the executing state (*task->m_apply)( task , & exec ); @@ -113,11 +115,13 @@ void TaskQueueSpecialization< Kokkos::Serial > :: using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member exec ; + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + Member exec( *data ); // Loop until no runnable task @@ -129,7 +133,7 @@ void TaskQueueSpecialization< Kokkos::Serial > :: for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 178305c5d3..ac7f17c0ea 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -65,6 +65,7 @@ public: using memory_space = Kokkos::HostSpace ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; static void iff_single_thread_recursive_execute( queue_type * const ); @@ -72,237 +73,19 @@ public: static void execute( queue_type * const ); - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( task_base_type::function_type * ptr ) - { - using TaskType = TaskBase< Kokkos::Serial - , typename FunctorType::value_type - , FunctorType - > ; - *ptr = TaskType::apply ; - } + typename TaskType::function_type + get_function_pointer() { return TaskType::apply ; } }; extern template class TaskQueue< Kokkos::Serial > ; -//---------------------------------------------------------------------------- - -template<> -class TaskExec< Kokkos::Serial > -{ -public: - - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } -}; - -template -struct TeamThreadRangeBoundariesStruct > -{ - typedef iType index_type; - const iType start ; - const iType end ; - enum {increment = 1}; - //const TaskExec< Kokkos::Serial > & thread; - TaskExec< Kokkos::Serial > & thread; - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct - //( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - : start(0) - , end(arg_count) - , thread(arg_thread) - {} - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct - //( const TaskExec< Kokkos::Serial > & arg_thread - ( TaskExec< Kokkos::Serial > & arg_thread - , const iType& arg_start - , const iType & arg_end - ) - : start( arg_start ) - , end( arg_end) - , thread( arg_thread ) - {} -}; - -//---------------------------------------------------------------------------- - -template -struct ThreadVectorRangeBoundariesStruct > -{ - typedef iType index_type; - const iType start ; - const iType end ; - enum {increment = 1}; - TaskExec< Kokkos::Serial > & thread; - - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct - ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - : start( 0 ) - , end(arg_count) - , thread(arg_thread) - {} -}; - }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -// OMP version needs non-const TaskExec -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count ); -} - -// OMP version needs non-const TaskExec -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( - thread, iType(start), iType(end) ); -} - -// OMP version needs non-const TaskExec -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::TaskExec< Kokkos::Serial > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); -} - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ - - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i, result); - - initialized_result = result; -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i, result); - - initialized_result = result; -} - -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ - initialized_result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - initialized_result+=tmp; - } -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - ValueType result = initialized_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - initialized_result = result; -} - -template< typename ValueType, typename iType, class Lambda > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) -{ - ValueType accum = 0 ; - ValueType val, local_total; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } - -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda) -{ -} - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp deleted file mode 100644 index b2aea14df4..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp +++ /dev/null @@ -1,693 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_HPP -#define KOKKOS_SYNCHRONIC_HPP - -#include - -#include -#include -#include -#include -#include - -namespace Kokkos { -namespace Impl { - -enum notify_hint { - notify_all, - notify_one, - notify_none -}; -enum expect_hint { - expect_urgent, - expect_delay -}; - -namespace Details { - -template -bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept { - int i = 0; - for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i) - if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) - return true; - else - __synchronic_relax(); - for(;i < attempts; ++i) - if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) - return true; - else - __synchronic_yield(); - return false; -} - -struct __exponential_backoff { - __exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) { - } - static inline void sleep_for(std::chrono::microseconds const& time) { - auto t = time.count(); - if(__builtin_expect(t > 75,0)) { - portable_sleep(time); - } - else if(__builtin_expect(t > 25,0)) - __synchronic_yield(); - else - __synchronic_relax(); - } - void sleep_for_step() { - sleep_for(step()); - } - std::chrono::microseconds step() { - float const f = ranfu(); - int const t = int(microseconds * f); - if(__builtin_expect(f >= 0.95f,0)) - microseconds = 8; - else - microseconds = (std::min)(microseconds>>1,maximum); - return std::chrono::microseconds(t); - } -private : - int maximum, microseconds, x, y, z; - int xorshf96() { - int t; - x ^= x << 16; x ^= x >> 5; x ^= x << 1; - t = x; x = y; y = z; z = t ^ x ^ y; - return z; - } - float ranfu() { - return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1); - } -}; - -template -struct __synchronic_base { - -protected: - std::atomic atom; - - void notify(notify_hint = notify_all) noexcept { - } - void notify(notify_hint = notify_all) volatile noexcept { - } - -public : - __synchronic_base() noexcept = default; - constexpr __synchronic_base(T v) noexcept : atom(v) { } - __synchronic_base(const __synchronic_base&) = delete; - ~__synchronic_base() { } - __synchronic_base& operator=(const __synchronic_base&) = delete; - __synchronic_base& operator=(const __synchronic_base&) volatile = delete; - - void expect_update(T val, expect_hint = expect_urgent) const noexcept { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - while(atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - } - } - void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - while(atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - } - } - - template - void expect_update_until(T val, std::chrono::time_point const& then, expect_hint = expect_urgent) const { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - remains = then - std::chrono::high_resolution_clock::now(); - } - } - template - void expect_update_until(T val, std::chrono::time_point const& then, expect_hint = expect_urgent) const volatile { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - remains = then - std::chrono::high_resolution_clock::now(); - } - } -}; - -#ifdef __SYNCHRONIC_COMPATIBLE -template -struct __synchronic_base::type> { - -public: - std::atomic atom; - - void notify(notify_hint hint = notify_all) noexcept { - if(__builtin_expect(hint == notify_none,1)) - return; - auto const x = count.fetch_add(0,std::memory_order_acq_rel); - if(__builtin_expect(x,0)) { - if(__builtin_expect(hint == notify_all,1)) - __synchronic_wake_all(&atom); - else - __synchronic_wake_one(&atom); - } - } - void notify(notify_hint hint = notify_all) volatile noexcept { - if(__builtin_expect(hint == notify_none,1)) - return; - auto const x = count.fetch_add(0,std::memory_order_acq_rel); - if(__builtin_expect(x,0)) { - if(__builtin_expect(hint == notify_all,1)) - __synchronic_wake_all_volatile(&atom); - else - __synchronic_wake_one_volatile(&atom); - } - } - -public : - __synchronic_base() noexcept : count(0) { } - constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { } - __synchronic_base(const __synchronic_base&) = delete; - ~__synchronic_base() { } - __synchronic_base& operator=(const __synchronic_base&) = delete; - __synchronic_base& operator=(const __synchronic_base&) volatile = delete; - - void expect_update(T val, expect_hint = expect_urgent) const noexcept { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait(&atom,val); - count.fetch_add(-1,std::memory_order_acquire); - } - } - void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_volatile(&atom,val); - count.fetch_add(-1,std::memory_order_acquire); - } - } - - template - void expect_update_until(T val, std::chrono::time_point const& then, expect_hint = expect_urgent) const { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_timed(&atom,val,remains); - count.fetch_add(-1,std::memory_order_acquire); - remains = then - std::chrono::high_resolution_clock::now(); - } - } - template - void expect_update_until(T val, std::chrono::time_point const& then, expect_hint = expect_urgent) const volatile { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_timed_volatile(&atom,val,remains); - count.fetch_add(-1,std::memory_order_acquire); - remains = then - std::chrono::high_resolution_clock::now(); - } - } -private: - mutable std::atomic count; -}; -#endif - -template -struct __synchronic : public __synchronic_base { - - __synchronic() noexcept = default; - constexpr __synchronic(T v) noexcept : __synchronic_base(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; -}; - -template -struct __synchronic::value>::type> : public __synchronic_base { - - T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_and(v,m); - this->notify(n); - return t; - } - T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_and(v,m); - this->notify(n); - return t; - } - T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_or(v,m); - this->notify(n); - return t; - } - T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_or(v,m); - this->notify(n); - return t; - } - T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_xor(v,m); - this->notify(n); - return t; - } - T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_xor(v,m); - this->notify(n); - return t; - } - - __synchronic() noexcept = default; - constexpr __synchronic(T v) noexcept : __synchronic_base(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; - - T operator=(T v) volatile noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T operator=(T v) noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T operator++(int) volatile noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T operator++(int) noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T operator--(int) volatile noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T operator--(int) noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T operator++() volatile noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T operator++() noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T operator--() volatile noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T operator--() noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T operator+=(T v) volatile noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T operator+=(T v) noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T operator-=(T v) volatile noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T operator-=(T v) noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T operator&=(T v) volatile noexcept { - auto const t = this->atom &= v; - this->notify(); - return t; - } - T operator&=(T v) noexcept { - auto const t = this->atom &= v; - this->notify(); - return t; - } - T operator|=(T v) volatile noexcept { - auto const t = this->atom |= v; - this->notify(); - return t; - } - T operator|=(T v) noexcept { - auto const t = this->atom |= v; - this->notify(); - return t; - } - T operator^=(T v) volatile noexcept { - auto const t = this->atom ^= v; - this->notify(); - return t; - } - T operator^=(T v) noexcept { - auto const t = this->atom ^= v; - this->notify(); - return t; - } -}; - -template -struct __synchronic : public __synchronic_base { - - T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - - __synchronic() noexcept = default; - constexpr __synchronic(T* v) noexcept : __synchronic_base(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; - - T* operator=(T* v) volatile noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T* operator=(T* v) noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T* operator++(int) volatile noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T* operator++(int) noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T* operator--(int) volatile noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T* operator--(int) noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T* operator++() volatile noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T* operator++() noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T* operator--() volatile noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T* operator--() noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T* operator+=(ptrdiff_t v) volatile noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T* operator+=(ptrdiff_t v) noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T* operator-=(ptrdiff_t v) volatile noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T* operator-=(ptrdiff_t v) noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } -}; - -} //namespace Details - -template -struct synchronic : public Details::__synchronic { - - bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); } - bool is_lock_free() const noexcept { return this->atom.is_lock_free(); } - void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - this->atom.store(v,m); - this->notify(n); - } - void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - this->atom.store(v,m); - this->notify(n); - } - T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); } - T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); } - - operator T() const volatile noexcept { return (T)this->atom; } - operator T() const noexcept { return (T)this->atom; } - - T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.exchange(v,m); - this->notify(n); - return t; - } - T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.exchange(v,m); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m1, m2); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m); - this->notify(n); - return t; - } - - synchronic() noexcept = default; - constexpr synchronic(T val) noexcept : Details::__synchronic(val) { } - synchronic(const synchronic&) = delete; - ~synchronic() { } - synchronic& operator=(const synchronic&) = delete; - synchronic& operator=(const synchronic&) volatile = delete; - T operator=(T val) noexcept { - return Details::__synchronic::operator=(val); - } - T operator=(T val) volatile noexcept { - return Details::__synchronic::operator=(val); - } - - T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { - Details::__synchronic::expect_update(val,h); - return load(order); - } - T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { - Details::__synchronic::expect_update(val,h); - return load(order); - } - T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { - for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) - Details::__synchronic::expect_update(nval,h); - return load(order); - } - T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { - for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) - expect_update(nval,h); - return load(order); - } - template - void expect_update_for(T val, std::chrono::duration const& delta, expect_hint h = expect_urgent) const { - Details::__synchronic::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); - } - template < class Rep, class Period> - void expect_update_for(T val, std::chrono::duration const& delta, expect_hint h = expect_urgent) const volatile { - Details::__synchronic::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); - } -}; - -#include - -typedef synchronic synchronic_char; -typedef synchronic synchronic_schar; -typedef synchronic synchronic_uchar; -typedef synchronic synchronic_short; -typedef synchronic synchronic_ushort; -typedef synchronic synchronic_int; -typedef synchronic synchronic_uint; -typedef synchronic synchronic_long; -typedef synchronic synchronic_ulong; -typedef synchronic synchronic_llong; -typedef synchronic synchronic_ullong; -//typedef synchronic synchronic_char16_t; -//typedef synchronic synchronic_char32_t; -typedef synchronic synchronic_wchar_t; - -typedef synchronic synchronic_int_least8_t; -typedef synchronic synchronic_uint_least8_t; -typedef synchronic synchronic_int_least16_t; -typedef synchronic synchronic_uint_least16_t; -typedef synchronic synchronic_int_least32_t; -typedef synchronic synchronic_uint_least32_t; -//typedef synchronic synchronic_int_least_64_t; -typedef synchronic synchronic_uint_least64_t; -typedef synchronic synchronic_int_fast8_t; -typedef synchronic synchronic_uint_fast8_t; -typedef synchronic synchronic_int_fast16_t; -typedef synchronic synchronic_uint_fast16_t; -typedef synchronic synchronic_int_fast32_t; -typedef synchronic synchronic_uint_fast32_t; -typedef synchronic synchronic_int_fast64_t; -typedef synchronic synchronic_uint_fast64_t; -typedef synchronic synchronic_intptr_t; -typedef synchronic synchronic_uintptr_t; -typedef synchronic synchronic_size_t; -typedef synchronic synchronic_ptrdiff_t; -typedef synchronic synchronic_intmax_t; -typedef synchronic synchronic_uintmax_t; - -} -} - -#endif //__SYNCHRONIC_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp deleted file mode 100644 index 0a6dd6e715..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_CONFIG_H -#define KOKKOS_SYNCHRONIC_CONFIG_H - -#include -#include - -namespace Kokkos { -namespace Impl { - -//the default yield function used inside the implementation is the Standard one -#define __synchronic_yield std::this_thread::yield -#define __synchronic_relax __synchronic_yield - -#if defined(_MSC_VER) - //this is a handy GCC optimization that I use inside the implementation - #define __builtin_expect(condition,common) condition - #if _MSC_VER <= 1800 - //using certain keywords that VC++ temporarily doesn't support - #define _ALLOW_KEYWORD_MACROS - #define noexcept - #define constexpr - #endif - //yes, I define multiple assignment operators - #pragma warning(disable:4522) - //I don't understand how Windows is so bad at timing functions, but is OK - //with straight-up yield loops - #define __do_backoff(b) __synchronic_yield() -#else -#define __do_backoff(b) b.sleep_for_step() -#endif - -//certain platforms have efficient support for spin-waiting built into the operating system -#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602) -#if defined(_WIN32_WINNT) -#include -#include - //the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+ - #define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) - #define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast(t).count()) - #define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x) - #define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x) - #define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) - #define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast(t).count()) - #define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x) - #define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x) - #define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod::value && (sizeof(x) <= 8)) - - inline void native_sleep(unsigned long microseconds) - { - // What to do if microseconds is < 1000? - Sleep(microseconds / 1000); - } - - inline void native_yield() - { - SwitchToThread(); - } -#elif defined(__linux__) - #include - #include - #include - #include - #include - #include - #include - #include - template < class Rep, class Period> - inline timespec to_timespec(std::chrono::duration const& delta) { - struct timespec ts; - ts.tv_sec = static_cast(std::chrono::duration_cast(delta).count()); - assert(!ts.tv_sec); - ts.tv_nsec = static_cast(std::chrono::duration_cast(delta).count()); - return ts; - } - inline long futex(void const* addr1, int op, int val1) { - return syscall(SYS_futex, addr1, op, val1, 0, 0, 0); - } - inline long futex(void const* addr1, int op, int val1, struct timespec timeout) { - return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0); - } - inline void native_sleep(unsigned long microseconds) - { - usleep(microseconds); - } - inline void native_yield() - { - pthread_yield(); - } - - //the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions - #define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v) - #define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t)) - #define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1) - #define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX) - #define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v) - #define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t)) - #define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1) - #define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX) - #define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral::value && (sizeof(x) <= 4)) - - //the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting - #undef __synchronic_yield - #define __synchronic_yield sched_yield - - //for extremely short wait times, just let another hyper-thread run - #undef __synchronic_relax - #define __synchronic_relax() asm volatile("rep; nop" ::: "memory") - -#endif -#endif - -#ifdef _GLIBCXX_USE_NANOSLEEP -inline void portable_sleep(std::chrono::microseconds const& time) -{ std::this_thread::sleep_for(time); } -#else -inline void portable_sleep(std::chrono::microseconds const& time) -{ native_sleep(time.count()); } -#endif - -#ifdef _GLIBCXX_USE_SCHED_YIELD -inline void portable_yield() -{ std::this_thread::yield(); } -#else -inline void portable_yield() -{ native_yield(); } -#endif - -//this is the number of times we initially spin, on the first wait attempt -#define __SYNCHRONIC_SPIN_COUNT_A 16 - -//this is how decide to yield instead of just spinning, 'c' is the current trip count -//#define __SYNCHRONIC_SPIN_YIELD(c) true -#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3) - -//this is the number of times we normally spin, on every subsequent wait attempt -#define __SYNCHRONIC_SPIN_COUNT_B 8 - -} -} - -#endif //__SYNCHRONIC_CONFIG_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp deleted file mode 100644 index facc8d6d8e..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_N3998_HPP -#define KOKKOS_SYNCHRONIC_N3998_HPP - -#include -#include - -/* -In the section below, a synchronization point represents a point at which a -thread may block until a given synchronization condition has been reached or -at which it may notify other threads that a synchronization condition has -been achieved. -*/ -namespace Kokkos { namespace Impl { - - /* - A latch maintains an internal counter that is initialized when the latch - is created. The synchronization condition is reached when the counter is - decremented to 0. Threads may block at a synchronization point waiting - for the condition to be reached. When the condition is reached, any such - blocked threads will be released. - */ - struct latch { - latch(int val) : count(val), released(false) { } - latch(const latch&) = delete; - latch& operator=(const latch&) = delete; - ~latch( ) { } - void arrive( ) { - __arrive( ); - } - void arrive_and_wait( ) { - if(!__arrive( )) - wait( ); - } - void wait( ) { - while(!released.load_when_not_equal(false,std::memory_order_acquire)) - ; - } - bool try_wait( ) { - return released.load(std::memory_order_acquire); - } - private: - bool __arrive( ) { - if(count.fetch_add(-1,std::memory_order_release)!=1) - return false; - released.store(true,std::memory_order_release); - return true; - } - std::atomic count; - synchronic released; - }; - - /* - A barrier is created with an initial value representing the number of threads - that can arrive at the synchronization point. When that many threads have - arrived, the synchronization condition is reached and the threads are - released. The barrier will then reset, and may be reused for a new cycle, in - which the same set of threads may arrive again at the synchronization point. - The same set of threads shall arrive at the barrier in each cycle, otherwise - the behaviour is undefined. - */ - struct barrier { - barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { } - barrier(const barrier&) = delete; - barrier& operator=(const barrier&) = delete; - ~barrier() { } - void arrive_and_wait() { - int const myepoch = epoch.load(std::memory_order_relaxed); - if(!__arrive(myepoch)) - while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) - ; - } - void arrive_and_drop() { - nexpected.fetch_add(-1,std::memory_order_relaxed); - __arrive(epoch.load(std::memory_order_relaxed)); - } - private: - bool __arrive(int const myepoch) { - int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; - if(__builtin_expect(myresult == expected,0)) { - expected = nexpected.load(std::memory_order_relaxed); - arrived.store(0,std::memory_order_relaxed); - epoch.store(myepoch+1,std::memory_order_release); - return true; - } - return false; - } - int expected; - std::atomic arrived, nexpected; - synchronic epoch; - }; - - /* - A notifying barrier behaves as a barrier, but is constructed with a callable - completion function that is invoked after all threads have arrived at the - synchronization point, and before the synchronization condition is reached. - The completion may modify the set of threads that arrives at the barrier in - each cycle. - */ - struct notifying_barrier { - template - notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward(f)) { } - notifying_barrier(const notifying_barrier&) = delete; - notifying_barrier& operator=(const notifying_barrier&) = delete; - ~notifying_barrier( ) { } - void arrive_and_wait() { - int const myepoch = epoch.load(std::memory_order_relaxed); - if(!__arrive(myepoch)) - while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) - ; - } - void arrive_and_drop() { - nexpected.fetch_add(-1,std::memory_order_relaxed); - __arrive(epoch.load(std::memory_order_relaxed)); - } - private: - bool __arrive(int const myepoch) { - int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; - if(__builtin_expect(myresult == expected,0)) { - int const newexpected = completion(); - expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed); - arrived.store(0,std::memory_order_relaxed); - epoch.store(myepoch+1,std::memory_order_release); - return true; - } - return false; - } - int expected; - std::atomic arrived, nexpected; - synchronic epoch; - std::function completion; - }; -}} - -#endif //__N3998_H diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index afa01d0cde..b514df3517 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -76,9 +76,6 @@ namespace Impl { template< typename Space , typename ResultType , typename FunctorType > class TaskBase ; -template< typename Space > -class TaskExec ; - } /* namespace Impl */ } /* namespace Kokkos */ @@ -149,8 +146,8 @@ private: // task->m_next is the dependence or zero // Postcondition: // task->m_next is linked list membership - KOKKOS_FUNCTION - void schedule( task_root_type * const ); + KOKKOS_FUNCTION void schedule_runnable( task_root_type * const ); + KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const ); // Reschedule a task // Precondition: @@ -178,7 +175,7 @@ private: , task_root_type * const ); KOKKOS_FUNCTION - static task_root_type * pop_task( task_root_type * volatile * const ); + static task_root_type * pop_ready_task( task_root_type * volatile * const ); KOKKOS_FUNCTION static void decrement( task_root_type * task ); @@ -368,6 +365,7 @@ public: int16_t m_task_type ; ///< Type of task int16_t m_priority ; ///< Priority of runnable task + TaskBase() = delete ; TaskBase( TaskBase && ) = delete ; TaskBase( const TaskBase & ) = delete ; TaskBase & operator = ( TaskBase && ) = delete ; @@ -375,17 +373,43 @@ public: KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + // Constructor for a runnable task KOKKOS_INLINE_FUNCTION - constexpr TaskBase() noexcept - : m_apply(0) - , m_queue(0) - , m_wait(0) - , m_next(0) - , m_ref_count(0) - , m_alloc_size(0) - , m_dep_count(0) - , m_task_type( TaskSingle ) - , m_priority( 1 /* TaskRegularPriority */ ) + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , TaskBase * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + ) noexcept + : m_apply( arg_apply ) + , m_queue( arg_queue ) + , m_wait( 0 ) + , m_next( arg_dependence ) + , m_ref_count( arg_ref_count ) + , m_alloc_size( arg_alloc_size ) + , m_dep_count( 0 ) + , m_task_type( arg_task_type ) + , m_priority( arg_priority ) + {} + + // Constructor for an aggregate task + KOKKOS_INLINE_FUNCTION + constexpr TaskBase( queue_type * arg_queue + , int arg_ref_count + , int arg_alloc_size + , int arg_dep_count + ) noexcept + : m_apply( 0 ) + , m_queue( arg_queue ) + , m_wait( 0 ) + , m_next( 0 ) + , m_ref_count( arg_ref_count ) + , m_alloc_size( arg_alloc_size ) + , m_dep_count( arg_dep_count ) + , m_task_type( Aggregate ) + , m_priority( 0 ) {} //---------------------------------------- @@ -406,9 +430,13 @@ public: KOKKOS_INLINE_FUNCTION void add_dependence( TaskBase* dep ) { + // Precondition: lock == m_next + + TaskBase * const lock = (TaskBase *) LockTag ; + // Assign dependence to m_next. It will be processed in the subsequent // call to schedule. Error if the dependence is reset. - if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) { + if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); } @@ -431,8 +459,13 @@ class TaskBase< ExecSpace , ResultType , void > { private: - static_assert( sizeof(TaskBase) == 48 , "" ); + using root_type = TaskBase ; + using function_type = typename root_type::function_type ; + using queue_type = typename root_type::queue_type ; + static_assert( sizeof(root_type) == 48 , "" ); + + TaskBase() = delete ; TaskBase( TaskBase && ) = delete ; TaskBase( const TaskBase & ) = delete ; TaskBase & operator = ( TaskBase && ) = delete ; @@ -444,9 +477,24 @@ public: KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + // Constructor for runnable task KOKKOS_INLINE_FUNCTION - TaskBase() - : TaskBase< ExecSpace , void , void >() + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , root_type * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + ) + : root_type( arg_apply + , arg_queue + , arg_dependence + , arg_ref_count + , arg_alloc_size + , arg_task_type + , arg_priority + ) , m_result() {} @@ -471,11 +519,14 @@ private: public: - using root_type = TaskBase< ExecSpace , void , void > ; - using base_type = TaskBase< ExecSpace , ResultType , void > ; - using member_type = TaskExec< ExecSpace > ; - using functor_type = FunctorType ; - using result_type = ResultType ; + using root_type = TaskBase< ExecSpace , void , void > ; + using base_type = TaskBase< ExecSpace , ResultType , void > ; + using specialization = TaskQueueSpecialization< ExecSpace > ; + using function_type = typename root_type::function_type ; + using queue_type = typename root_type::queue_type ; + using member_type = typename specialization::member_type ; + using functor_type = FunctorType ; + using result_type = ResultType ; template< typename Type > KOKKOS_INLINE_FUNCTION static @@ -522,13 +573,30 @@ public: if ( 0 == member->team_rank() && !(task->requested_respawn()) ) { // Did not respawn, destroy the functor to free memory. static_cast(task)->~functor_type(); - // Cannot destroy the task until its dependences have been processed. + // Cannot destroy and deallocate the task until its dependences + // have been processed. } } + // Constructor for runnable task KOKKOS_INLINE_FUNCTION - TaskBase( functor_type const & arg_functor ) - : base_type() + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , root_type * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + , FunctorType && arg_functor + ) + : base_type( arg_apply + , arg_queue + , arg_dependence + , arg_ref_count + , arg_alloc_size + , arg_task_type + , arg_priority + ) , functor_type( arg_functor ) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index fefbbad8bd..23f5d3cd30 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -170,6 +170,7 @@ bool TaskQueue< ExecSpace >::push_task ) { // Push task into a concurrently pushed and popped queue. + // The queue can be either a ready task queue or a waiting task queue. // The queue is a linked list where 'task->m_next' form the links. // Fail the push attempt if the queue is locked; // otherwise retry until the push succeeds. @@ -227,13 +228,12 @@ bool TaskQueue< ExecSpace >::push_task template< typename ExecSpace > KOKKOS_FUNCTION typename TaskQueue< ExecSpace >::task_root_type * -TaskQueue< ExecSpace >::pop_task +TaskQueue< ExecSpace >::pop_ready_task ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue ) { - // Pop task from a concurrently pushed and popped queue. + // Pop task from a concurrently pushed and popped ready task queue. // The queue is a linked list where 'task->m_next' form the links. - task_root_type * const zero = (task_root_type *) 0 ; task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; @@ -252,85 +252,201 @@ TaskQueue< ExecSpace >::pop_task // (1) lock, (2) end, or (3) a valid task. // Thus zero will never appear in the queue. // - // If queue is locked then just read by guaranteeing - // the CAS will fail. + // If queue is locked then just read by guaranteeing the CAS will fail. if ( lock == task ) task = 0 ; task_root_type * const x = task ; - task = Kokkos::atomic_compare_exchange(queue,task,lock); + task = Kokkos::atomic_compare_exchange(queue,x,lock); - if ( x == task ) break ; // CAS succeeded and queue is locked - } + if ( x == task ) { + // CAS succeeded and queue is locked + // + // This thread has locked the queue and removed 'task' from the queue. + // Extract the next entry of the queue from 'task->m_next' + // and mark 'task' as popped from a queue by setting + // 'task->m_next = lock'. + // + // Place the next entry in the head of the queue, + // which also unlocks the queue. + // + // This thread has exclusive access to + // the queue and the popped task's m_next. - if ( end != task ) { + *queue = task->m_next ; task->m_next = lock ; - // This thread has locked the queue and removed 'task' from the queue. - // Extract the next entry of the queue from 'task->m_next' - // and mark 'task' as popped from a queue by setting - // 'task->m_next = lock'. + Kokkos::memory_fence(); - task_root_type * const next = - Kokkos::atomic_exchange( & task->m_next , lock ); +#if 0 + printf( "pop_ready_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" + , uintptr_t(queue) + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , int(task->m_task_type) + , int(task->m_priority) + , int(task->m_ref_count) ); +#endif - // Place the next entry in the head of the queue, - // which also unlocks the queue. - - task_root_type * const unlock = - Kokkos::atomic_exchange( queue , next ); - - if ( next == zero || next == lock || lock != unlock ) { - Kokkos::abort("TaskQueue::pop_task ERROR"); + return task ; } } -#if 0 - if ( end != task ) { - printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" - , uintptr_t(queue) - , uintptr_t(task) - , uintptr_t(task->m_wait) - , uintptr_t(task->m_next) - , int(task->m_task_type) - , int(task->m_priority) - , int(task->m_ref_count) ); - } -#endif - - return task ; + return end ; } //---------------------------------------------------------------------------- template< typename ExecSpace > KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule +void TaskQueue< ExecSpace >::schedule_runnable ( TaskQueue< ExecSpace >::task_root_type * const task ) { - // Schedule a runnable or when_all task upon construction / spawn + // Schedule a runnable task upon construction / spawn // and upon completion of other tasks that 'task' is waiting on. - - // Precondition on runnable task state: - // task is either constructing or executing + // + // Precondition: + // - called by a single thread for the input task + // - calling thread has exclusive access to the task + // - task is not a member of a queue + // - if runnable then task is either constructing or respawning // // Constructing state: // task->m_wait == 0 - // task->m_next == dependence - // Executing-respawn state: - // task->m_wait == head of linked list - // task->m_next == dependence + // task->m_next == dependence or 0 + // Respawn state: + // task->m_wait == head of linked list: 'end' or valid task + // task->m_next == dependence or 0 // // Task state transition: - // Constructing -> Waiting - // Executing-respawn -> Waiting + // Constructing -> Waiting + // Respawn -> Waiting // // Postcondition on task state: - // task->m_wait == head of linked list - // task->m_next == member of linked list + // task->m_wait == head of linked list (queue) + // task->m_next == member of linked list (queue) #if 0 - printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + printf( "schedule_runnable( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + bool respawn = false ; + + //---------------------------------------- + + if ( zero == task->m_wait ) { + // Task in Constructing state + // - Transition to Waiting state + // Preconditions: + // - call occurs exclusively within a single thread + + task->m_wait = end ; + // Task in Waiting state + } + else if ( lock != task->m_wait ) { + // Task in Executing state with Respawn request + // - Update dependence + // - Transition to Waiting state + respawn = true ; + } + else { + // Task in Complete state + Kokkos::abort("TaskQueue::schedule_runnable ERROR: task is complete"); + } + + //---------------------------------------- + // Scheduling a runnable task which may have a depencency 'dep'. + // Extract dependence, if any, from task->m_next. + // If 'dep' is not null then attempt to push 'task' + // into the wait queue of 'dep'. + // If the push succeeds then 'task' may be + // processed or executed by another thread at any time. + // If the push fails then 'dep' is complete and 'task' + // is ready to execute. + + // Exclusive access so don't need an atomic exchange + // task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); + task_root_type * dep = task->m_next ; task->m_next = zero ; + + const bool is_ready = + ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); + + if ( ( 0 != dep ) && respawn ) { + // Reference count for dep was incremented when + // respawn assigned dependency to task->m_next + // so that if dep completed prior to the + // above push_task dep would not be destroyed. + // dep reference count can now be decremented, + // which may deallocate the task. + TaskQueue::assign( & dep , (task_root_type *)0 ); + } + + if ( is_ready ) { + + // No dependence or 'dep' is complete so push task into ready queue. + // Increment the ready count before pushing into ready queue + // to track number of ready + executing tasks. + // The ready count will be decremented when the task is complete. + + Kokkos::atomic_increment( & m_ready_count ); + + task_root_type * volatile * const ready_queue = + & m_ready[ task->m_priority ][ task->m_task_type ]; + + // A push_task fails if the ready queue is locked. + // A ready queue is only locked during a push or pop; + // i.e., it is never permanently locked. + // Retry push to ready queue until it succeeds. + // When the push succeeds then 'task' may be + // processed or executed by another thread at any time. + + while ( ! push_task( ready_queue , task ) ); + } + + //---------------------------------------- + // Postcondition: + // - A runnable 'task' was pushed into a wait or ready queue. + // - Concurrent execution may have already popped 'task' + // from a queue and processed it as appropriate. +} + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::schedule_aggregate + ( TaskQueue< ExecSpace >::task_root_type * const task ) +{ + // Schedule an aggregate task upon construction + // and upon completion of other tasks that 'task' is waiting on. + // + // Precondition: + // - called by a single thread for the input task + // - calling thread has exclusive access to the task + // - task is not a member of a queue + // + // Constructing state: + // task->m_wait == 0 + // task->m_next == dependence or 0 + // + // Task state transition: + // Constructing -> Waiting + // + // Postcondition on task state: + // task->m_wait == head of linked list (queue) + // task->m_next == member of linked list (queue) + +#if 0 + printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" , uintptr_t(task) , uintptr_t(task->m_wait) , uintptr_t(task->m_next) @@ -344,134 +460,85 @@ void TaskQueue< ExecSpace >::schedule task_root_type * const end = (task_root_type *) task_root_type::EndTag ; //---------------------------------------- - { - // If Constructing then task->m_wait == 0 - // Change to waiting by task->m_wait = EndTag - task_root_type * const init = - Kokkos::atomic_compare_exchange( & task->m_wait , zero , end ); + if ( zero == task->m_wait ) { + // Task in Constructing state + // - Transition to Waiting state + // Preconditions: + // - call occurs exclusively within a single thread - // Precondition - - if ( lock == init ) { - Kokkos::abort("TaskQueue::schedule ERROR: task is complete"); - } - - // if ( init == 0 ) Constructing -> Waiting - // else Executing-Respawn -> Waiting + task->m_wait = end ; + // Task in Waiting state } + else if ( lock == task->m_wait ) { + // Task in Complete state + Kokkos::abort("TaskQueue::schedule_aggregate ERROR: task is complete"); + } + //---------------------------------------- + // Scheduling a 'when_all' task with multiple dependences. + // This scheduling may be called when the 'when_all' is + // (1) created or + // (2) being removed from a completed task's wait list. - if ( task_root_type::Aggregate != task->m_task_type ) { + task_root_type ** const aggr = task->aggregate_dependences(); - // Scheduling a runnable task which may have a depencency 'dep'. - // Extract dependence, if any, from task->m_next. - // If 'dep' is not null then attempt to push 'task' - // into the wait queue of 'dep'. - // If the push succeeds then 'task' may be - // processed or executed by another thread at any time. - // If the push fails then 'dep' is complete and 'task' - // is ready to execute. + // Assume the 'when_all' is complete until a dependence is + // found that is not complete. - task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); + bool is_complete = true ; - const bool is_ready = - ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); + for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { - // Reference count for dep was incremented when assigned - // to task->m_next so that if it completed prior to the - // above push_task dep would not be destroyed. - // dep reference count can now be decremented, - // which may deallocate the task. - TaskQueue::assign( & dep , (task_root_type *)0 ); + --i ; - if ( is_ready ) { + // Loop dependences looking for an incomplete task. + // Add this task to the incomplete task's wait queue. - // No dependence or 'dep' is complete so push task into ready queue. - // Increment the ready count before pushing into ready queue - // to track number of ready + executing tasks. - // The ready count will be decremented when the task is complete. + // Remove a task 'x' from the dependence list. + // The reference count of 'x' was incremented when + // it was assigned into the dependence list. - Kokkos::atomic_increment( & m_ready_count ); + // Exclusive access so don't need an atomic exchange + // task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero ); + task_root_type * x = aggr[i] ; aggr[i] = zero ; - task_root_type * volatile * const queue = - & m_ready[ task->m_priority ][ task->m_task_type ]; + if ( x ) { - // A push_task fails if the ready queue is locked. - // A ready queue is only locked during a push or pop; - // i.e., it is never permanently locked. - // Retry push to ready queue until it succeeds. - // When the push succeeds then 'task' may be - // processed or executed by another thread at any time. + // If x->m_wait is not locked then push succeeds + // and the aggregate is not complete. + // If the push succeeds then this when_all 'task' may be + // processed by another thread at any time. + // For example, 'x' may be completeed by another + // thread and then re-schedule this when_all 'task'. - while ( ! push_task( queue , task ) ); + is_complete = ! push_task( & x->m_wait , task ); + + // Decrement reference count which had been incremented + // when 'x' was added to the dependence list. + + TaskQueue::assign( & x , zero ); } } - //---------------------------------------- - else { - // Scheduling a 'when_all' task with multiple dependences. - // This scheduling may be called when the 'when_all' is - // (1) created or - // (2) being removed from a completed task's wait list. - task_root_type ** const aggr = task->aggregate_dependences(); + if ( is_complete ) { + // The when_all 'task' was not added to a wait queue because + // all dependences were complete so this aggregate is complete. + // Complete the when_all 'task' to schedule other tasks + // that are waiting for the when_all 'task' to complete. - // Assume the 'when_all' is complete until a dependence is - // found that is not complete. + task->m_next = lock ; - bool is_complete = true ; + complete( task ); - for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { - - --i ; - - // Loop dependences looking for an incomplete task. - // Add this task to the incomplete task's wait queue. - - // Remove a task 'x' from the dependence list. - // The reference count of 'x' was incremented when - // it was assigned into the dependence list. - - task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero ); - - if ( x ) { - - // If x->m_wait is not locked then push succeeds - // and the aggregate is not complete. - // If the push succeeds then this when_all 'task' may be - // processed by another thread at any time. - // For example, 'x' may be completeed by another - // thread and then re-schedule this when_all 'task'. - - is_complete = ! push_task( & x->m_wait , task ); - - // Decrement reference count which had been incremented - // when 'x' was added to the dependence list. - - TaskQueue::assign( & x , zero ); - } - } - - if ( is_complete ) { - // The when_all 'task' was not added to a wait queue because - // all dependences were complete so this aggregate is complete. - // Complete the when_all 'task' to schedule other tasks - // that are waiting for the when_all 'task' to complete. - - task->m_next = lock ; - - complete( task ); - - // '*task' may have been deleted upon completion - } + // '*task' may have been deleted upon completion } + //---------------------------------------- // Postcondition: - // A runnable 'task' was pushed into a wait or ready queue. - // An aggregate 'task' was either pushed to a wait queue - // or completed. - // Concurrent execution may have already popped 'task' - // from a queue and processed it as appropriate. + // - An aggregate 'task' was either pushed to a wait queue or completed. + // - Concurrent execution may have already popped 'task' + // from a queue and processed it as appropriate. } //---------------------------------------------------------------------------- @@ -529,7 +596,7 @@ void TaskQueue< ExecSpace >::complete // Is a runnable task has finished executing and requested respawn. // Schedule the task for subsequent execution. - schedule( task ); + schedule_runnable( task ); } //---------------------------------------- else { @@ -556,18 +623,22 @@ void TaskQueue< ExecSpace >::complete TaskQueue::assign( & task , zero ); // This thread has exclusive access to the wait list so - // the concurrency-safe pop_task function is not needed. + // the concurrency-safe pop_ready_task function is not needed. // Schedule the tasks that have been waiting on the input 'task', // which may have been deleted. while ( x != end ) { + // Have exclusive access to 'x' until it is scheduled + // Set x->m_next = zero <= no dependence, not a respawn - // Set x->m_next = zero <= no dependence + task_root_type * const next = x->m_next ; x->m_next = 0 ; - task_root_type * const next = - (task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero ); - - schedule( x ); + if ( task_root_type::Aggregate != x->m_task_type ) { + schedule_runnable( x ); + } + else { + schedule_aggregate( x ); + } x = next ; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp index ff503cb273..d72cde03fd 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -45,6 +45,7 @@ #define KOKKOS_CORE_IMPL_UTILITIES_HPP #include +#include #include //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp index ad1b6dce39..93ff6c48a7 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,52 +36,144 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include + #include +#include +#include + /*--------------------------------------------------------------------------*/ -#if ( KOKKOS_ENABLE_ASM ) - #if defined( __arm__ ) || defined( __aarch64__ ) - /* No-operation instruction to idle the thread. */ - #define YIELD asm volatile("nop") +#if !defined( _WIN32 ) + #if defined( KOKKOS_ENABLE_ASM ) + #if defined( __arm__ ) || defined( __aarch64__ ) + /* No-operation instruction to idle the thread. */ + #define KOKKOS_INTERNAL_PAUSE + #else + /* Pause instruction to prevent excess processor bus usage */ + #define KOKKOS_INTERNAL_PAUSE asm volatile("pause\n":::"memory") + #endif + #define KOKKOS_INTERNAL_NOP2 asm volatile("nop\n" "nop\n") + #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 + #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; + #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; + #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; + namespace { + inline void kokkos_internal_yield( const unsigned i ) noexcept { + switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { + case 0u: KOKKOS_INTERNAL_NOP2; break; + case 1u: KOKKOS_INTERNAL_NOP4; break; + case 2u: KOKKOS_INTERNAL_NOP8; break; + case 3u: KOKKOS_INTERNAL_NOP16; break; + default: KOKKOS_INTERNAL_NOP32; + } + KOKKOS_INTERNAL_PAUSE; + } + } #else - /* Pause instruction to prevent excess processor bus usage */ - #define YIELD asm volatile("pause\n":::"memory") + #include + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + sched_yield(); + } + } + #endif +#else // defined( _WIN32 ) + #if defined ( KOKKOS_ENABLE_WINTHREAD ) + #include + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + Sleep(0); + } + } + #elif defined( _MSC_VER ) + #define NOMINMAX + #include + #include + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + YieldProcessor(); + } + } + #else + #define KOKKOS_INTERNAL_PAUSE __asm__ __volatile__("pause\n":::"memory") + #define KOKKOS_INTERNAL_NOP2 __asm__ __volatile__("nop\n" "nop") + #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 + #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; + #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; + #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; + namespace { + inline void kokkos_internal_yield( const unsigned i ) noexcept { + switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { + case 0: KOKKOS_INTERNAL_NOP2; break; + case 1: KOKKOS_INTERNAL_NOP4; break; + case 2: KOKKOS_INTERNAL_NOP8; break; + case 3: KOKKOS_INTERNAL_NOP16; break; + default: KOKKOS_INTERNAL_NOP32; + } + KOKKOS_INTERNAL_PAUSE; + } + } #endif -#elif defined ( KOKKOS_ENABLE_WINTHREAD ) - #include - #define YIELD Sleep(0) -#elif defined ( _WIN32) && defined (_MSC_VER) - /* Windows w/ Visual Studio */ - #define NOMINMAX - #include - #include -#define YIELD YieldProcessor(); -#elif defined ( _WIN32 ) - /* Windows w/ Intel*/ - #define YIELD __asm__ __volatile__("pause\n":::"memory") -#else - #include - #define YIELD sched_yield() #endif + /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -void spinwait( volatile int & flag , const int value ) + +void spinwait_while_equal( volatile int32_t & flag , const int32_t value ) { + Kokkos::store_fence(); + unsigned i = 0; while ( value == flag ) { - YIELD ; + kokkos_internal_yield(i); + ++i; } + Kokkos::load_fence(); } + +void spinwait_until_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value != flag ) { + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + +void spinwait_while_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value == flag ) { + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + +void spinwait_until_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value != flag ) { + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + #endif } /* namespace Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp index cc87771fae..6e34b8a943 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,14 +47,30 @@ #include +#include + namespace Kokkos { namespace Impl { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -void spinwait( volatile int & flag , const int value ); + +void spinwait_while_equal( volatile int32_t & flag , const int32_t value ); +void spinwait_until_equal( volatile int32_t & flag , const int32_t value ); + +void spinwait_while_equal( volatile int64_t & flag , const int64_t value ); +void spinwait_until_equal( volatile int64_t & flag , const int64_t value ); #else + KOKKOS_INLINE_FUNCTION -void spinwait( volatile int & , const int ) {} +void spinwait_while_equal( volatile int32_t & , const int32_t ) {} +KOKKOS_INLINE_FUNCTION +void spinwait_until_equal( volatile int32_t & , const int32_t ) {} + +KOKKOS_INLINE_FUNCTION +void spinwait_while_equal( volatile int64_t & , const int64_t ) {} +KOKKOS_INLINE_FUNCTION +void spinwait_until_equal( volatile int64_t & , const int64_t ) {} + #endif } /* namespace Impl */ diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 795657fe87..caf6c50129 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -115,10 +115,31 @@ IF(Kokkos_ENABLE_OpenMP) ) ENDIF() -IF(Kokkos_ENABLE_QTHREAD) +IF(Kokkos_ENABLE_Qthreads) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_Qthread - SOURCES UnitTestMain.cpp TestQthread.cpp + UnitTest_Qthreads + SOURCES + UnitTestMain.cpp + qthreads/TestQthreads_Atomics.cpp + qthreads/TestQthreads_Other.cpp + qthreads/TestQthreads_Reductions.cpp + qthreads/TestQthreads_SubView_a.cpp + qthreads/TestQthreads_SubView_b.cpp + qthreads/TestQthreads_SubView_c01.cpp + qthreads/TestQthreads_SubView_c02.cpp + qthreads/TestQthreads_SubView_c03.cpp + qthreads/TestQthreads_SubView_c04.cpp + qthreads/TestQthreads_SubView_c05.cpp + qthreads/TestQthreads_SubView_c06.cpp + qthreads/TestQthreads_SubView_c07.cpp + qthreads/TestQthreads_SubView_c08.cpp + qthreads/TestQthreads_SubView_c09.cpp + qthreads/TestQthreads_SubView_c10.cpp + qthreads/TestQthreads_SubView_c11.cpp + qthreads/TestQthreads_SubView_c12.cpp + qthreads/TestQthreads_Team.cpp + qthreads/TestQthreads_ViewAPI_a.cpp + qthreads/TestQthreads_ViewAPI_b.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -194,4 +215,3 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ) - diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index cc59825fba..d93830a28d 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -6,6 +6,7 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp) @@ -35,15 +36,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda_Other.o TestCuda_Reductions_a.o TestCuda_Reductions_b.o TestCuda_Atomics.o TestCuda_Team.o TestCuda_Spaces.o OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestCuda_SubView_c_all.o + OBJ_OPENMP += TestCuda_SubView_c_all.o else OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o - OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o - OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o + OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o + OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o endif - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o - OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o + OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o OBJ_CUDA += TestCuda_ViewAPI_s.o OBJ_CUDA += UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Cuda @@ -51,13 +52,13 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o - OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o + OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o + OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o - OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o - OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o + OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o + OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o - OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Threads TEST_TARGETS += test-threads endif @@ -66,11 +67,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP = TestOpenMP_Other.o TestOpenMP_Reductions.o TestOpenMP_Atomics.o TestOpenMP_Team.o OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestOpenMP_SubView_c_all.o + OBJ_OPENMP += TestOpenMP_SubView_c_all.o else OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o - OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o - OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o + OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o + OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o endif OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o UnitTestMain.o gtest-all.o @@ -78,26 +79,36 @@ endif TEST_TARGETS += test-openmp endif -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o - OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o + OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestSerial_SubView_c_all.o + OBJ_QTHREADS += TestQthreads_SubView_c_all.o else - OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o - OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o - OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o - OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o + OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o + OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o + OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o + OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o endif - OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Serial - TEST_TARGETS += test-serial + OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Qthreads + TEST_TARGETS += test-qthreads endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - OBJ_QTHREAD = TestQthread.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Qthread - TEST_TARGETS += test-qthread +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + OBJ_OPENMP += TestSerial_SubView_c_all.o +else + OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o +endif + OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Serial + TEST_TARGETS += test-serial endif OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o @@ -115,10 +126,6 @@ TARGETS += ${INITTESTS_TARGETS} INITTESTS_TEST_TARGETS := $(addprefix test-default-init-,${INITTESTS_NUMBERS}) TEST_TARGETS += ${INITTESTS_TEST_TARGETS} -OBJ_SYNCHRONIC = TestSynchronic.o UnitTestMain.o gtest-all.o -TARGETS += KokkosCore_UnitTest_Synchronic -TEST_TARGETS += test-synchronic - KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Cuda @@ -131,8 +138,8 @@ KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Serial -KokkosCore_UnitTest_Qthread: $(OBJ_QTHREAD) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREAD) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthread +KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_HWLOC @@ -146,9 +153,6 @@ KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS) ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDeviceTypeInit_%.o UnitTestMain.o gtest-all.o $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) TestDefaultDeviceTypeInit_$*.o UnitTestMain.o gtest-all.o $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_DefaultDeviceTypeInit_$* -KokkosCore_UnitTest_Synchronic: $(OBJ_SYNCHRONIC) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SYNCHRONIC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Synchronic - test-cuda: KokkosCore_UnitTest_Cuda ./KokkosCore_UnitTest_Cuda @@ -161,8 +165,8 @@ test-openmp: KokkosCore_UnitTest_OpenMP test-serial: KokkosCore_UnitTest_Serial ./KokkosCore_UnitTest_Serial -test-qthread: KokkosCore_UnitTest_Qthread - ./KokkosCore_UnitTest_Qthread +test-qthreads: KokkosCore_UnitTest_Qthreads + ./KokkosCore_UnitTest_Qthreads test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC @@ -176,9 +180,6 @@ test-default: KokkosCore_UnitTest_Default ${INITTESTS_TEST_TARGETS}: test-default-init-%: KokkosCore_UnitTest_DefaultDeviceTypeInit_% ./KokkosCore_UnitTest_DefaultDeviceTypeInit_$* -test-synchronic: KokkosCore_UnitTest_Synchronic - ./KokkosCore_UnitTest_Synchronic - build_all: $(TARGETS) test: $(TEST_TARGETS) @@ -193,4 +194,3 @@ clean: kokkos-clean gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp index d22837f3ed..f09cc5018c 100644 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,8 +50,6 @@ #include #include -/*--------------------------------------------------------------------------*/ - #include namespace Test { @@ -59,51 +57,68 @@ namespace Test { template< class DeviceType > void TestViewAggregate() { - typedef Kokkos::Array value_type ; + typedef Kokkos::Array< double, 32 > value_type; + typedef Kokkos::Experimental::Impl::ViewDataAnalysis< value_type *, Kokkos::LayoutLeft, value_type > analysis_1d; - typedef Kokkos::Experimental::Impl:: - ViewDataAnalysis< value_type * , Kokkos::LayoutLeft , value_type > - analysis_1d ; + static_assert( std::is_same< typename analysis_1d::specialize, Kokkos::Array<> >::value, "" ); - static_assert( std::is_same< typename analysis_1d::specialize , Kokkos::Array<> >::value , "" ); + typedef Kokkos::ViewTraits< value_type **, DeviceType > a32_traits; + typedef Kokkos::ViewTraits< typename a32_traits::scalar_array_type, DeviceType > flat_traits; + static_assert( std::is_same< typename a32_traits::specialize, Kokkos::Array<> >::value, "" ); + static_assert( std::is_same< typename a32_traits::value_type, value_type >::value, "" ); + static_assert( a32_traits::rank == 2, "" ); + static_assert( a32_traits::rank_dynamic == 2, "" ); - typedef Kokkos::ViewTraits< value_type ** , DeviceType > a32_traits ; - typedef Kokkos::ViewTraits< typename a32_traits::scalar_array_type , DeviceType > flat_traits ; + static_assert( std::is_same< typename flat_traits::specialize, void >::value, "" ); + static_assert( flat_traits::rank == 3, "" ); + static_assert( flat_traits::rank_dynamic == 2, "" ); + static_assert( flat_traits::dimension::N2 == 32, "" ); - static_assert( std::is_same< typename a32_traits::specialize , Kokkos::Array<> >::value , "" ); - static_assert( std::is_same< typename a32_traits::value_type , value_type >::value , "" ); - static_assert( a32_traits::rank == 2 , "" ); - static_assert( a32_traits::rank_dynamic == 2 , "" ); + typedef Kokkos::View< Kokkos::Array< double, 32 > **, DeviceType > a32_type; + typedef typename a32_type::array_type a32_flat_type; - static_assert( std::is_same< typename flat_traits::specialize , void >::value , "" ); - static_assert( flat_traits::rank == 3 , "" ); - static_assert( flat_traits::rank_dynamic == 2 , "" ); - static_assert( flat_traits::dimension::N2 == 32 , "" ); + static_assert( std::is_same< typename a32_type::value_type, value_type >::value, "" ); + static_assert( std::is_same< typename a32_type::pointer_type, double * >::value, "" ); + static_assert( a32_type::Rank == 2, "" ); + static_assert( a32_flat_type::Rank == 3, "" ); - - typedef Kokkos::View< Kokkos::Array ** , DeviceType > a32_type ; - - typedef typename a32_type::array_type a32_flat_type ; - - static_assert( std::is_same< typename a32_type::value_type , value_type >::value , "" ); - static_assert( std::is_same< typename a32_type::pointer_type , double * >::value , "" ); - static_assert( a32_type::Rank == 2 , "" ); - static_assert( a32_flat_type::Rank == 3 , "" ); - - a32_type x("test",4,5); + a32_type x( "test", 4, 5 ); a32_flat_type y( x ); - ASSERT_EQ( x.extent(0) , 4 ); - ASSERT_EQ( x.extent(1) , 5 ); - ASSERT_EQ( y.extent(0) , 4 ); - ASSERT_EQ( y.extent(1) , 5 ); - ASSERT_EQ( y.extent(2) , 32 ); + ASSERT_EQ( x.extent( 0 ), 4 ); + ASSERT_EQ( x.extent( 1 ), 5 ); + ASSERT_EQ( y.extent( 0 ), 4 ); + ASSERT_EQ( y.extent( 1 ), 5 ); + ASSERT_EQ( y.extent( 2 ), 32 ); + + // Initialize arrays from brace-init-list as for std::array. + // + // Comment: Clang will issue the following warning if we don't use double + // braces here (one for initializing the Kokkos::Array and one for + // initializing the sub-aggreagate C-array data member), + // + // warning: suggest braces around initialization of subobject + // + // but single brace syntax would be valid as well. + Kokkos::Array< float, 2 > aggregate_initialization_syntax_1 = { { 1.41, 3.14 } }; + ASSERT_FLOAT_EQ( aggregate_initialization_syntax_1[0], 1.41 ); + ASSERT_FLOAT_EQ( aggregate_initialization_syntax_1[1], 3.14 ); + + Kokkos::Array< int, 3 > aggregate_initialization_syntax_2{ { 0, 1, 2 } }; // since C++11 + for ( int i = 0; i < 3; ++i ) { + ASSERT_EQ( aggregate_initialization_syntax_2[i], i ); + } + + // Note that this is a valid initialization. + Kokkos::Array< double, 3 > initialized_with_one_argument_missing = { { 255, 255 } }; + for (int i = 0; i < 2; ++i) { + ASSERT_DOUBLE_EQ( initialized_with_one_argument_missing[i], 255 ); + } + // But the following line would not compile +// Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; } -} - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ +} // namespace Test #endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp index e948723574..ff77b8dca6 100644 --- a/lib/kokkos/core/unit_test/TestAtomic.hpp +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -45,116 +45,129 @@ namespace TestAtomic { -// Struct for testing arbitrary size atomics +// Struct for testing arbitrary size atomics. -template +template< int N > struct SuperScalar { double val[N]; KOKKOS_INLINE_FUNCTION SuperScalar() { - for(int i=0; i -std::ostream& operator<<(std::ostream& os, const SuperScalar& dt) +template< int N > +std::ostream & operator<<( std::ostream & os, const SuperScalar< N > & dt ) { - os << "{ "; - for(int i=0;i +template< class T, class DEVICE_TYPE > struct ZeroFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View type; - typedef typename Kokkos::View::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = 0; } }; @@ -163,47 +176,53 @@ struct ZeroFunctor { //--------------atomic_fetch_add--------------------- //--------------------------------------------------- -template -struct AddFunctor{ +template< class T, class DEVICE_TYPE > +struct AddFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_add(&data(),(T)1); + void operator()( int ) const { + Kokkos::atomic_fetch_add( &data(), (T) 1 ); } }; -template -T AddLoop(int loop) { - struct ZeroFunctor f_zero; - typename ZeroFunctor::type data("Data"); - typename ZeroFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T AddLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct AddFunctor f_add; + struct AddFunctor< T, execution_space > f_add; + f_add.data = data; - Kokkos::parallel_for(loop,f_add); + Kokkos::parallel_for( loop, f_add ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T AddLoopSerial(int loop) { +template< class T > +T AddLoopSerial( int loop ) { T* data = new T[1]; data[0] = 0; - for(int i=0;i -struct CASFunctor{ +template< class T, class DEVICE_TYPE > +struct CASFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - T old = data(); - T newval, assumed; - do { - assumed = old; - newval = assumed + (T)1; - old = Kokkos::atomic_compare_exchange(&data(), assumed, newval); - } - while( old != assumed ); + void operator()( int ) const { + T old = data(); + T newval, assumed; + + do { + assumed = old; + newval = assumed + (T) 1; + old = Kokkos::atomic_compare_exchange( &data(), assumed, newval ); + } while( old != assumed ); } }; -template -T CASLoop(int loop) { - struct ZeroFunctor f_zero; - typename ZeroFunctor::type data("Data"); - typename ZeroFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T CASLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct CASFunctor f_cas; + struct CASFunctor< T, execution_space > f_cas; + f_cas.data = data; - Kokkos::parallel_for(loop,f_cas); + Kokkos::parallel_for( loop, f_cas ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); return val; } -template -T CASLoopSerial(int loop) { +template< class T > +T CASLoopSerial( int loop ) { T* data = new T[1]; data[0] = 0; - for(int i=0;i -struct ExchFunctor{ +template< class T, class DEVICE_TYPE > +struct ExchFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data, data2; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { - T old = Kokkos::atomic_exchange(&data(),(T)i); - Kokkos::atomic_fetch_add(&data2(),old); + void operator()( int i ) const { + T old = Kokkos::atomic_exchange( &data(), (T) i ); + Kokkos::atomic_fetch_add( &data2(), old ); } }; -template -T ExchLoop(int loop) { - struct ZeroFunctor f_zero; - typename ZeroFunctor::type data("Data"); - typename ZeroFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T ExchLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - typename ZeroFunctor::type data2("Data"); - typename ZeroFunctor::h_type h_data2("HData"); + typename ZeroFunctor< T, execution_space >::type data2( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" ); + f_zero.data = data2; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct ExchFunctor f_exch; + struct ExchFunctor< T, execution_space > f_exch; + f_exch.data = data; f_exch.data2 = data2; - Kokkos::parallel_for(loop,f_exch); + Kokkos::parallel_for( loop, f_exch ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); - Kokkos::deep_copy(h_data2,data2); + Kokkos::deep_copy( h_data, data ); + Kokkos::deep_copy( h_data2, data2 ); T val = h_data() + h_data2(); return val; } -template -T ExchLoopSerial(typename std::conditional >::value,int,void>::type loop) { +template< class T > +T ExchLoopSerial( typename std::conditional< !std::is_same< T, Kokkos::complex >::value, int, void >::type loop ) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; - for(int i=0;i -T ExchLoopSerial(typename std::conditional >::value,int,void>::type loop) { +template< class T > +T ExchLoopSerial( typename std::conditional< std::is_same< T, Kokkos::complex >::value, int, void >::type loop ) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; - for(int i=0;ireal() = (static_cast(i)); - data->imag() = 0; - *data2+=old; + + for ( int i = 0; i < loop; i++ ) { + T old = *data; + data->real() = ( static_cast( i ) ); + data->imag() = 0; + *data2 += old; } T val = *data2 + *data; delete [] data; delete [] data2; + return val; } -template -T LoopVariant(int loop, int test) { - switch (test) { - case 1: return AddLoop(loop); - case 2: return CASLoop(loop); - case 3: return ExchLoop(loop); +template< class T, class DeviceType > +T LoopVariant( int loop, int test ) { + switch ( test ) { + case 1: return AddLoop< T, DeviceType >( loop ); + case 2: return CASLoop< T, DeviceType >( loop ); + case 3: return ExchLoop< T, DeviceType >( loop ); } + return 0; } -template -T LoopVariantSerial(int loop, int test) { - switch (test) { - case 1: return AddLoopSerial(loop); - case 2: return CASLoopSerial(loop); - case 3: return ExchLoopSerial(loop); +template< class T > +T LoopVariantSerial( int loop, int test ) { + switch ( test ) { + case 1: return AddLoopSerial< T >( loop ); + case 2: return CASLoopSerial< T >( loop ); + case 3: return ExchLoopSerial< T >( loop ); } + return 0; } -template -bool Loop(int loop, int test) +template< class T, class DeviceType > +bool Loop( int loop, int test ) { - T res = LoopVariant(loop,test); - T resSerial = LoopVariantSerial(loop,test); + T res = LoopVariant< T, DeviceType >( loop, test ); + T resSerial = LoopVariantSerial< T >( loop, test ); bool passed = true; @@ -387,16 +420,14 @@ bool Loop(int loop, int test) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = " << test << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - - return passed ; -} - + return passed; } +} // namespace TestAtomic diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index 7f15190451..e3ceca404f 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -49,14 +49,16 @@ namespace TestAtomicOperations { //--------------zero_functor--------------------- //----------------------------------------------- -template +template< class T, class DEVICE_TYPE > struct ZeroFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View type; - typedef typename Kokkos::View::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = 0; } }; @@ -65,78 +67,84 @@ struct ZeroFunctor { //--------------init_functor--------------------- //----------------------------------------------- -template +template< class T, class DEVICE_TYPE > struct InitFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View type; - typedef typename Kokkos::View::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; - T init_value ; + T init_value; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = init_value; } - InitFunctor(T _init_value) : init_value(_init_value) {} + InitFunctor( T _init_value ) : init_value( _init_value ) {} }; - //--------------------------------------------------- //--------------atomic_fetch_max--------------------- //--------------------------------------------------- -template -struct MaxFunctor{ +template< class T, class DEVICE_TYPE > +struct MaxFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - //Kokkos::atomic_fetch_max(&data(),(T)1); - Kokkos::atomic_fetch_max(&data(),(T)i1); + void operator()( int ) const { + //Kokkos::atomic_fetch_max( &data(), (T) 1 ); + Kokkos::atomic_fetch_max( &data(), (T) i1 ); } - MaxFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + MaxFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T MaxAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T MaxAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MaxFunctor f(i0,i1); + struct MaxFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T MaxAtomicCheck(T i0 , T i1) { +template< class T > +T MaxAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = (i0 > i1 ? i0 : i1) ; + *data = ( i0 > i1 ? i0 : i1 ); T val = *data; delete [] data; + return val; } -template -bool MaxAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MaxAtomicTest( T i0, T i1 ) { - T res = MaxAtomic(i0,i1); - T resSerial = MaxAtomicCheck(i0,i1); + T res = MaxAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MaxAtomicCheck( i0, i1 ); bool passed = true; @@ -144,71 +152,77 @@ bool MaxAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MaxAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_min--------------------- //--------------------------------------------------- -template -struct MinFunctor{ +template< class T, class DEVICE_TYPE > +struct MinFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_min(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_min( &data(), (T) i1 ); } - MinFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + MinFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T MinAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T MinAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MinFunctor f(i0,i1); + struct MinFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T MinAtomicCheck(T i0 , T i1) { +template< class T > +T MinAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = (i0 < i1 ? i0 : i1) ; + *data = ( i0 < i1 ? i0 : i1 ); T val = *data; delete [] data; + return val; } -template -bool MinAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MinAtomicTest( T i0, T i1 ) { - T res = MinAtomic(i0,i1); - T resSerial = MinAtomicCheck(i0,i1); + T res = MinAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MinAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -216,55 +230,60 @@ bool MinAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MinAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_increment--------------------- //--------------------------------------------------- -template -struct IncFunctor{ +template< class T, class DEVICE_TYPE > +struct IncFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_increment(&data()); + void operator()( int ) const { + Kokkos::atomic_increment( &data() ); } - IncFunctor( T _i0 ) : i0(_i0) {} + + IncFunctor( T _i0 ) : i0( _i0 ) {} }; -template -T IncAtomic(T i0) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T IncAtomic( T i0 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct IncFunctor f(i0); + struct IncFunctor< T, execution_space > f( i0 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T IncAtomicCheck(T i0) { +template< class T > +T IncAtomicCheck( T i0 ) { T* data = new T[1]; data[0] = 0; @@ -272,14 +291,15 @@ T IncAtomicCheck(T i0) { T val = *data; delete [] data; + return val; } -template -bool IncAtomicTest(T i0) +template< class T, class DeviceType > +bool IncAtomicTest( T i0 ) { - T res = IncAtomic(i0); - T resSerial = IncAtomicCheck(i0); + T res = IncAtomic< T, DeviceType >( i0 ); + T resSerial = IncAtomicCheck< T >( i0 ); bool passed = true; @@ -287,55 +307,60 @@ bool IncAtomicTest(T i0) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = IncAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_decrement--------------------- //--------------------------------------------------- -template -struct DecFunctor{ +template< class T, class DEVICE_TYPE > +struct DecFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_decrement(&data()); + void operator()( int ) const { + Kokkos::atomic_decrement( &data() ); } - DecFunctor( T _i0 ) : i0(_i0) {} + + DecFunctor( T _i0 ) : i0( _i0 ) {} }; -template -T DecAtomic(T i0) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T DecAtomic( T i0 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct DecFunctor f(i0); + struct DecFunctor< T, execution_space > f( i0 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T DecAtomicCheck(T i0) { +template< class T > +T DecAtomicCheck( T i0 ) { T* data = new T[1]; data[0] = 0; @@ -343,14 +368,15 @@ T DecAtomicCheck(T i0) { T val = *data; delete [] data; + return val; } -template -bool DecAtomicTest(T i0) +template< class T, class DeviceType > +bool DecAtomicTest( T i0 ) { - T res = DecAtomic(i0); - T resSerial = DecAtomicCheck(i0); + T res = DecAtomic< T, DeviceType >( i0 ); + T resSerial = DecAtomicCheck< T >( i0 ); bool passed = true; @@ -358,71 +384,77 @@ bool DecAtomicTest(T i0) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DecAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_mul--------------------- //--------------------------------------------------- -template -struct MulFunctor{ +template< class T, class DEVICE_TYPE > +struct MulFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_mul(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_mul( &data(), (T) i1 ); } - MulFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + MulFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T MulAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T MulAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MulFunctor f(i0,i1); + struct MulFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T MulAtomicCheck(T i0 , T i1) { +template< class T > +T MulAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0*i1 ; + *data = i0*i1; T val = *data; delete [] data; + return val; } -template -bool MulAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MulAtomicTest( T i0, T i1 ) { - T res = MulAtomic(i0,i1); - T resSerial = MulAtomicCheck(i0,i1); + T res = MulAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MulAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -430,71 +462,77 @@ bool MulAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MulAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_div--------------------- //--------------------------------------------------- -template -struct DivFunctor{ +template< class T, class DEVICE_TYPE > +struct DivFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_div(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_div( &data(), (T) i1 ); } - DivFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + DivFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T DivAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T DivAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct DivFunctor f(i0,i1); + struct DivFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T DivAtomicCheck(T i0 , T i1) { +template< class T > +T DivAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0/i1 ; + *data = i0 / i1; T val = *data; delete [] data; + return val; } -template -bool DivAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool DivAtomicTest( T i0, T i1 ) { - T res = DivAtomic(i0,i1); - T resSerial = DivAtomicCheck(i0,i1); + T res = DivAtomic< T, DeviceType >( i0, i1 ); + T resSerial = DivAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -502,71 +540,77 @@ bool DivAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DivAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_mod--------------------- //--------------------------------------------------- -template -struct ModFunctor{ +template< class T, class DEVICE_TYPE > +struct ModFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_mod(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_mod( &data(), (T) i1 ); } - ModFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + ModFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T ModAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T ModAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct ModFunctor f(i0,i1); + struct ModFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T ModAtomicCheck(T i0 , T i1) { +template< class T > +T ModAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0%i1 ; + *data = i0 % i1; T val = *data; delete [] data; + return val; } -template -bool ModAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool ModAtomicTest( T i0, T i1 ) { - T res = ModAtomic(i0,i1); - T resSerial = ModAtomicCheck(i0,i1); + T res = ModAtomic< T, DeviceType >( i0, i1 ); + T resSerial = ModAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -574,71 +618,77 @@ bool ModAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = ModAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_and--------------------- //--------------------------------------------------- -template -struct AndFunctor{ +template< class T, class DEVICE_TYPE > +struct AndFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_and(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_and( &data(), (T) i1 ); } - AndFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + AndFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T AndAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T AndAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct AndFunctor f(i0,i1); + struct AndFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T AndAtomicCheck(T i0 , T i1) { +template< class T > +T AndAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0&i1 ; + *data = i0 & i1; T val = *data; delete [] data; + return val; } -template -bool AndAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool AndAtomicTest( T i0, T i1 ) { - T res = AndAtomic(i0,i1); - T resSerial = AndAtomicCheck(i0,i1); + T res = AndAtomic< T, DeviceType >( i0, i1 ); + T resSerial = AndAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -646,71 +696,77 @@ bool AndAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = AndAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_or---------------------- //--------------------------------------------------- -template -struct OrFunctor{ +template< class T, class DEVICE_TYPE > +struct OrFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_or(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_or( &data(), (T) i1 ); } - OrFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + OrFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T OrAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T OrAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct OrFunctor f(i0,i1); + struct OrFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T OrAtomicCheck(T i0 , T i1) { +template< class T > +T OrAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0|i1 ; + *data = i0 | i1; T val = *data; delete [] data; + return val; } -template -bool OrAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool OrAtomicTest( T i0, T i1 ) { - T res = OrAtomic(i0,i1); - T resSerial = OrAtomicCheck(i0,i1); + T res = OrAtomic< T, DeviceType >( i0, i1 ); + T resSerial = OrAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -718,71 +774,77 @@ bool OrAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = OrAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_xor--------------------- //--------------------------------------------------- -template -struct XorFunctor{ +template< class T, class DEVICE_TYPE > +struct XorFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_xor(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_xor( &data(), (T) i1 ); } - XorFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + XorFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T XorAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T XorAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct XorFunctor f(i0,i1); + struct XorFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T XorAtomicCheck(T i0 , T i1) { +template< class T > +T XorAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0^i1 ; + *data = i0 ^ i1; T val = *data; delete [] data; + return val; } -template -bool XorAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool XorAtomicTest( T i0, T i1 ) { - T res = XorAtomic(i0,i1); - T resSerial = XorAtomicCheck(i0,i1); + T res = XorAtomic< T, DeviceType >( i0, i1 ); + T resSerial = XorAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -790,71 +852,77 @@ bool XorAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = XorAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_lshift--------------------- //--------------------------------------------------- -template -struct LShiftFunctor{ +template< class T, class DEVICE_TYPE > +struct LShiftFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_lshift(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_lshift( &data(), (T) i1 ); } - LShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + LShiftFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T LShiftAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T LShiftAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct LShiftFunctor f(i0,i1); + struct LShiftFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T LShiftAtomicCheck(T i0 , T i1) { +template< class T > +T LShiftAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0< -bool LShiftAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool LShiftAtomicTest( T i0, T i1 ) { - T res = LShiftAtomic(i0,i1); - T resSerial = LShiftAtomicCheck(i0,i1); + T res = LShiftAtomic< T, DeviceType >( i0, i1 ); + T resSerial = LShiftAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -862,71 +930,77 @@ bool LShiftAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = LShiftAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_rshift--------------------- //--------------------------------------------------- -template -struct RShiftFunctor{ +template< class T, class DEVICE_TYPE > +struct RShiftFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_rshift(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_rshift( &data(), (T) i1 ); } - RShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + RShiftFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template -T RShiftAtomic(T i0 , T i1) { - struct InitFunctor f_init(i0); - typename InitFunctor::type data("Data"); - typename InitFunctor::h_type h_data("HData"); +template< class T, class execution_space > +T RShiftAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct RShiftFunctor f(i0,i1); + struct RShiftFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template -T RShiftAtomicCheck(T i0 , T i1) { +template< class T > +T RShiftAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0>>i1 ; + *data = i0 >> i1; T val = *data; delete [] data; + return val; } -template -bool RShiftAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool RShiftAtomicTest( T i0, T i1 ) { - T res = RShiftAtomic(i0,i1); - T resSerial = RShiftAtomicCheck(i0,i1); + T res = RShiftAtomic< T, DeviceType >( i0, i1 ); + T resSerial = RShiftAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -934,52 +1008,52 @@ bool RShiftAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RShiftAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //--------------atomic_test_control------------------ //--------------------------------------------------- -template -bool AtomicOperationsTestIntegralType( int i0 , int i1 , int test ) +template< class T, class DeviceType > +bool AtomicOperationsTestIntegralType( int i0, int i1, int test ) { - switch (test) { - case 1: return MaxAtomicTest( (T)i0 , (T)i1 ); - case 2: return MinAtomicTest( (T)i0 , (T)i1 ); - case 3: return MulAtomicTest( (T)i0 , (T)i1 ); - case 4: return DivAtomicTest( (T)i0 , (T)i1 ); - case 5: return ModAtomicTest( (T)i0 , (T)i1 ); - case 6: return AndAtomicTest( (T)i0 , (T)i1 ); - case 7: return OrAtomicTest( (T)i0 , (T)i1 ); - case 8: return XorAtomicTest( (T)i0 , (T)i1 ); - case 9: return LShiftAtomicTest( (T)i0 , (T)i1 ); - case 10: return RShiftAtomicTest( (T)i0 , (T)i1 ); - case 11: return IncAtomicTest( (T)i0 ); - case 12: return DecAtomicTest( (T)i0 ); + switch ( test ) { + case 1: return MaxAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 2: return MinAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 3: return MulAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 4: return DivAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 5: return ModAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 6: return AndAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 7: return OrAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 8: return XorAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 9: return LShiftAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 10: return RShiftAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 11: return IncAtomicTest< T, DeviceType >( (T) i0 ); + case 12: return DecAtomicTest< T, DeviceType >( (T) i0 ); } + return 0; } -template -bool AtomicOperationsTestNonIntegralType( int i0 , int i1 , int test ) +template< class T, class DeviceType > +bool AtomicOperationsTestNonIntegralType( int i0, int i1, int test ) { - switch (test) { - case 1: return MaxAtomicTest( (T)i0 , (T)i1 ); - case 2: return MinAtomicTest( (T)i0 , (T)i1 ); - case 3: return MulAtomicTest( (T)i0 , (T)i1 ); - case 4: return DivAtomicTest( (T)i0 , (T)i1 ); + switch ( test ) { + case 1: return MaxAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 2: return MinAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 3: return MulAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 4: return DivAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); } + return 0; } -} // namespace - +} // namespace TestAtomicOperations diff --git a/lib/kokkos/core/unit_test/TestAtomicViews.hpp b/lib/kokkos/core/unit_test/TestAtomicViews.hpp index 739492d32f..71080e5c82 100644 --- a/lib/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicViews.hpp @@ -49,56 +49,52 @@ namespace TestAtomicViews { //-----------atomic view api tests----------------- //------------------------------------------------- -template< class T , class ... P > -size_t allocation_count( const Kokkos::View & view ) +template< class T, class ... P > +size_t allocation_count( const Kokkos::View< T, P... > & view ) { const size_t card = view.size(); const size_t alloc = view.span(); - const int memory_span = Kokkos::View::required_allocation_size(100); + const int memory_span = Kokkos::View< int* >::required_allocation_size( 100 ); - return (card <= alloc && memory_span == 400) ? alloc : 0 ; + return ( card <= alloc && memory_span == 400 ) ? alloc : 0; } -template< class DataType , - class DeviceType , +template< class DataType, + class DeviceType, unsigned Rank = Kokkos::ViewTraits< DataType >::rank > -struct TestViewOperator_LeftAndRight ; +struct TestViewOperator_LeftAndRight; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 1 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space, Kokkos::MemoryTraits > left_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic > > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space, Kokkos::MemoryTraits > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic > > right_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space, Kokkos::MemoryTraits > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic >> stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -111,357 +107,338 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { - // below checks that values match, but unable to check the references - // - should this be able to be checked? - if ( left(i0) != left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( right(i0) != right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( left(i0) != left_stride(i0) ) { update |= 4 ; } - if ( right(i0) != right_stride(i0) ) { update |= 8 ; } - /* - if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } - if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } - */ + // Below checks that values match, but unable to check the references. + // Should this be able to be checked? + if ( left( i0 ) != left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( right( i0 ) != right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( left( i0 ) != left_stride( i0 ) ) { update |= 4; } + if ( right( i0 ) != right_stride( i0 ) ) { update |= 8; } +/* + if ( &left( i0 ) != &left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( &right( i0 ) != &right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( &left( i0 ) != &left_stride( i0 ) ) { update |= 4; } + if ( &right( i0 ) != &right_stride( i0 ) ) { update |= 8; } +*/ } } }; - template< typename T, class DeviceType > class TestAtomicViewAPI { public: - typedef DeviceType device ; + typedef DeviceType device; - enum { N0 = 1000 , - N1 = 3 , - N2 = 5 , + enum { N0 = 1000, + N1 = 3, + N2 = 5, N3 = 7 }; - typedef Kokkos::View< T , device > dView0 ; - typedef Kokkos::View< T* , device > dView1 ; - typedef Kokkos::View< T*[N1] , device > dView2 ; - typedef Kokkos::View< T*[N1][N2] , device > dView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ; - typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ; - typedef typename dView0::host_mirror_space host ; + typedef Kokkos::View< T, device > dView0; + typedef Kokkos::View< T*, device > dView1; + typedef Kokkos::View< T*[N1], device > dView2; + typedef Kokkos::View< T*[N1][N2], device > dView3; + typedef Kokkos::View< T*[N1][N2][N3], device > dView4; + typedef Kokkos::View< const T*[N1][N2][N3], device > const_dView4; + typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged; + typedef typename dView0::host_mirror_space host; - typedef Kokkos::View< T , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView0 ; - typedef Kokkos::View< T* , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView1 ; - typedef Kokkos::View< T*[N1] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView2 ; - typedef Kokkos::View< T*[N1][N2] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > const_aView4 ; + typedef Kokkos::View< T, device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView0; + typedef Kokkos::View< T*, device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView1; + typedef Kokkos::View< T*[N1], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView2; + typedef Kokkos::View< T*[N1][N2], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView3; + typedef Kokkos::View< T*[N1][N2][N3], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView4; + typedef Kokkos::View< const T*[N1][N2][N3], device, Kokkos::MemoryTraits< Kokkos::Atomic > > const_aView4; - typedef Kokkos::View< T****, device, Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::Atomic > > aView4_unmanaged ; + typedef Kokkos::View< T****, device, Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::Atomic > > aView4_unmanaged; - typedef typename aView0::host_mirror_space host_atomic ; + typedef typename aView0::host_mirror_space host_atomic; TestAtomicViewAPI() { - TestViewOperator_LeftAndRight< int[2] , device >::testit(); + TestViewOperator_LeftAndRight< int[2], device >::testit(); run_test_rank0(); run_test_rank4(); run_test_const(); } - static void run_test_rank0() { - dView0 dx , dy ; - aView0 ax , ay , az ; + dView0 dx, dy; + aView0 ax, ay, az; dx = dView0( "dx" ); dy = dView0( "dy" ); - ASSERT_EQ( dx.use_count() , size_t(1) ); - ASSERT_EQ( dy.use_count() , size_t(1) ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); + ASSERT_EQ( dy.use_count(), size_t( 1 ) ); - ax = dx ; - ay = dy ; - ASSERT_EQ( dx.use_count() , size_t(2) ); - ASSERT_EQ( dy.use_count() , size_t(2) ); - ASSERT_EQ( dx.use_count() , ax.use_count() ); + ax = dx; + ay = dy; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); + ASSERT_EQ( dy.use_count(), size_t( 2 ) ); + ASSERT_EQ( dx.use_count(), ax.use_count() ); - az = ax ; - ASSERT_EQ( dx.use_count() , size_t(3) ); - ASSERT_EQ( ax.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , ax.use_count() ); + az = ax; + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), ax.use_count() ); } static void run_test_rank4() { - dView4 dx , dy ; - aView4 ax , ay , az ; + dView4 dx, dy; + aView4 ax, ay, az; - dx = dView4( "dx" , N0 ); - dy = dView4( "dy" , N0 ); - ASSERT_EQ( dx.use_count() , size_t(1) ); - ASSERT_EQ( dy.use_count() , size_t(1) ); + dx = dView4( "dx", N0 ); + dy = dView4( "dy", N0 ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); + ASSERT_EQ( dy.use_count(), size_t( 1 ) ); - ax = dx ; - ay = dy ; - ASSERT_EQ( dx.use_count() , size_t(2) ); - ASSERT_EQ( dy.use_count() , size_t(2) ); - ASSERT_EQ( dx.use_count() , ax.use_count() ); + ax = dx; + ay = dy; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); + ASSERT_EQ( dy.use_count(), size_t( 2 ) ); + ASSERT_EQ( dx.use_count(), ax.use_count() ); dView4_unmanaged unmanaged_dx = dx; - ASSERT_EQ( dx.use_count() , size_t(2) ); + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); - az = ax ; - ASSERT_EQ( dx.use_count() , size_t(3) ); - ASSERT_EQ( ax.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , ax.use_count() ); + az = ax; + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), ax.use_count() ); aView4_unmanaged unmanaged_ax = ax; - ASSERT_EQ( ax.use_count() , size_t(3) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); - aView4_unmanaged unmanaged_ax_from_ptr_dx = aView4_unmanaged(dx.data(), - dx.dimension_0(), - dx.dimension_1(), - dx.dimension_2(), - dx.dimension_3()); - ASSERT_EQ( ax.use_count() , size_t(3) ); + aView4_unmanaged unmanaged_ax_from_ptr_dx = + aView4_unmanaged( dx.data(), dx.dimension_0(), dx.dimension_1(), dx.dimension_2(), dx.dimension_3() ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); - const_aView4 const_ax = ax ; - ASSERT_EQ( ax.use_count() , size_t(4) ); - ASSERT_EQ( const_ax.use_count() , ax.use_count() ); + const_aView4 const_ax = ax; + ASSERT_EQ( ax.use_count(), size_t( 4 ) ); + ASSERT_EQ( const_ax.use_count(), ax.use_count() ); ASSERT_FALSE( ax.data() == 0 ); ASSERT_FALSE( const_ax.data() == 0 ); // referenceable ptr ASSERT_FALSE( unmanaged_ax.data() == 0 ); ASSERT_FALSE( unmanaged_ax_from_ptr_dx.data() == 0 ); ASSERT_FALSE( ay.data() == 0 ); -// ASSERT_NE( ax , ay ); +// ASSERT_NE( ax, ay ); // Above test results in following runtime error from gtest: // Expected: (ax) != (ay), actual: 32-byte object <30-01 D0-A0 D8-7F 00-00 00-31 44-0C 01-00 00-00 E8-03 00-00 00-00 00-00 69-00 00-00 00-00 00-00> vs 32-byte object <80-01 D0-A0 D8-7F 00-00 00-A1 4A-0C 01-00 00-00 E8-03 00-00 00-00 00-00 69-00 00-00 00-00 00-00> - ASSERT_EQ( ax.dimension_0() , unsigned(N0) ); - ASSERT_EQ( ax.dimension_1() , unsigned(N1) ); - ASSERT_EQ( ax.dimension_2() , unsigned(N2) ); - ASSERT_EQ( ax.dimension_3() , unsigned(N3) ); + ASSERT_EQ( ax.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( ax.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( ax.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( ax.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( ay.dimension_0() , unsigned(N0) ); - ASSERT_EQ( ay.dimension_1() , unsigned(N1) ); - ASSERT_EQ( ay.dimension_2() , unsigned(N2) ); - ASSERT_EQ( ay.dimension_3() , unsigned(N3) ); + ASSERT_EQ( ay.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( ay.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( ay.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( ay.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( unmanaged_ax_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + ASSERT_EQ( unmanaged_ax_from_ptr_dx.capacity(), unsigned( N0 ) * unsigned( N1 ) * unsigned( N2 ) * unsigned( N3 ) ); } - typedef T DataType[2] ; + typedef T DataType[2]; static void check_auto_conversion_to_const( - const Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > & arg_const , - const Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > & arg ) + const Kokkos::View< const DataType, device, Kokkos::MemoryTraits > & arg_const, + const Kokkos::View< const DataType, device, Kokkos::MemoryTraits > & arg ) { ASSERT_TRUE( arg_const == arg ); } static void run_test_const() { - typedef Kokkos::View< DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > typeX ; - typedef Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > const_typeX ; + typedef Kokkos::View< DataType, device, Kokkos::MemoryTraits > typeX; + typedef Kokkos::View< const DataType, device, Kokkos::MemoryTraits > const_typeX; typeX x( "X" ); - const_typeX xc = x ; + const_typeX xc = x; //ASSERT_TRUE( xc == x ); // const xc is referenceable, non-const x is not //ASSERT_TRUE( x == xc ); - check_auto_conversion_to_const( x , xc ); + check_auto_conversion_to_const( x, xc ); } - }; - //--------------------------------------------------- //-----------initialization functors----------------- //--------------------------------------------------- template struct InitFunctor_Seq { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; + view_type input; + const long length; - view_type input ; - const long length ; - - InitFunctor_Seq( view_type & input_ , const long length_ ) - : input(input_) - , length(length_) + InitFunctor_Seq( view_type & input_, const long length_ ) + : input( input_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - input(i) = (T) i ; + input( i ) = (T) i; } } - }; - template struct InitFunctor_ModTimes { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; + view_type input; + const long length; + const long remainder; - view_type input ; - const long length ; - const long remainder ; - - InitFunctor_ModTimes( view_type & input_ , const long length_ , const long remainder_ ) - : input(input_) - , length(length_) - , remainder(remainder_) + InitFunctor_ModTimes( view_type & input_, const long length_, const long remainder_ ) + : input( input_ ) + , length( length_ ) + , remainder( remainder_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - if ( i % (remainder+1) == remainder ) { - input(i) = (T)2 ; + if ( i % ( remainder + 1 ) == remainder ) { + input( i ) = (T) 2; } else { - input(i) = (T)1 ; + input( i ) = (T) 1; } } } }; - template struct InitFunctor_ModShift { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; + view_type input; + const long length; + const long remainder; - view_type input ; - const long length ; - const long remainder ; - - InitFunctor_ModShift( view_type & input_ , const long length_ , const long remainder_ ) - : input(input_) - , length(length_) - , remainder(remainder_) + InitFunctor_ModShift( view_type & input_, const long length_, const long remainder_ ) + : input( input_ ) + , length( length_ ) + , remainder( remainder_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - if ( i % (remainder+1) == remainder ) { - input(i) = 1 ; + if ( i % ( remainder + 1 ) == remainder ) { + input( i ) = 1; } } } }; - //--------------------------------------------------- //-----------atomic view plus-equal------------------ //--------------------------------------------------- template struct PlusEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; // Wrap the result view in an atomic view, use this for operator - PlusEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + PlusEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) += input(i); + even_odd_result( 0 ) += input( i ); } else { - even_odd_result(1) += input(i); + even_odd_result( 1 ) += input( i ); } } } - }; - -template -T PlusEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T PlusEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq init_f( input , length ) ; - Kokkos::parallel_for(Kokkos::RangePolicy(0, length) , init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - PlusEqualAtomicViewFunctor functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + PlusEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0) + h_result_view(1) ) ; + return (T) ( h_result_view( 0 ) + h_result_view( 1 ) ); } -template +template< class T > T PlusEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; T result[2]; + if ( N % 2 == 0 ) { - const long half_sum_end = (N/2) - 1; + const long half_sum_end = ( N / 2 ) - 1; const long full_sum_end = N - 1; - result[0] = half_sum_end*(half_sum_end + 1)/2 ; //even sum - result[1] = ( full_sum_end*(full_sum_end + 1)/2 ) - result[0] ; // odd sum + result[0] = half_sum_end * ( half_sum_end + 1 ) / 2; // Even sum. + result[1] = ( full_sum_end * ( full_sum_end + 1 ) / 2 ) - result[0]; // Odd sum. } else { - const long half_sum_end = (T)(N/2) ; + const long half_sum_end = (T) ( N / 2 ); const long full_sum_end = N - 2; - result[0] = half_sum_end*(half_sum_end - 1)/2 ; //even sum - result[1] = ( full_sum_end*(full_sum_end - 1)/2 ) - result[0] ; // odd sum + result[0] = half_sum_end * ( half_sum_end - 1 ) / 2; // Even sum. + result[1] = ( full_sum_end * ( full_sum_end - 1 ) / 2 ) - result[0]; // Odd sum. } - return (T)(result[0] + result[1]); + return (T) ( result[0] + result[1] ); } -template -bool PlusEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool PlusEqualAtomicViewTest( long input_length ) { - T res = PlusEqualAtomicView(input_length); - T resSerial = PlusEqualAtomicViewCheck(input_length); + T res = PlusEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = PlusEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -469,104 +446,98 @@ bool PlusEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = PlusEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view minus-equal----------------- //--------------------------------------------------- template struct MinusEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - MinusEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + MinusEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) -= input(i); + even_odd_result( 0 ) -= input( i ); } else { - even_odd_result(1) -= input(i); + even_odd_result( 1 ) -= input( i ); } } } - }; - -template -T MinusEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T MinusEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - MinusEqualAtomicViewFunctor functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + MinusEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0) + h_result_view(1) ) ; + return (T) ( h_result_view( 0 ) + h_result_view( 1 ) ); } -template +template< class T > T MinusEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; T result[2]; + if ( N % 2 == 0 ) { - const long half_sum_end = (N/2) - 1; + const long half_sum_end = ( N / 2 ) - 1; const long full_sum_end = N - 1; - result[0] = -1*( half_sum_end*(half_sum_end + 1)/2 ) ; //even sum - result[1] = -1*( ( full_sum_end*(full_sum_end + 1)/2 ) + result[0] ) ; // odd sum + result[0] = -1 * ( half_sum_end * ( half_sum_end + 1 ) / 2 ); // Even sum. + result[1] = -1 * ( ( full_sum_end * ( full_sum_end + 1 ) / 2 ) + result[0] ); // Odd sum. } else { - const long half_sum_end = (long)(N/2) ; + const long half_sum_end = (long) ( N / 2 ); const long full_sum_end = N - 2; - result[0] = -1*( half_sum_end*(half_sum_end - 1)/2 ) ; //even sum - result[1] = -1*( ( full_sum_end*(full_sum_end - 1)/2 ) + result[0] ) ; // odd sum + result[0] = -1 * ( half_sum_end * ( half_sum_end - 1 ) / 2 ); // Even sum. + result[1] = -1 * ( ( full_sum_end * ( full_sum_end - 1 ) / 2 ) + result[0] ); // Odd sum. } - return (result[0] + result[1]); + return ( result[0] + result[1] ); } -template -bool MinusEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool MinusEqualAtomicViewTest( long input_length ) { - T res = MinusEqualAtomicView(input_length); - T resSerial = MinusEqualAtomicViewCheck(input_length); + T res = MinusEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = MinusEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -574,83 +545,76 @@ bool MinusEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MinusEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view times-equal----------------- //--------------------------------------------------- template struct TimesEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type result; const long length; // Wrap the result view in an atomic view, use this for operator - TimesEqualAtomicViewFunctor( const view_type & input_ , view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + TimesEqualAtomicViewFunctor( const view_type & input_, view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result(0) *= (double)input(i); + result( 0 ) *= (double) input( i ); } } - }; - -template -T TimesEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T TimesEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",1) ; - deep_copy(result_view, 1.0); + view_type input( "input_view", length ); + view_type result_view( "result_view", 1 ); + deep_copy( result_view, 1.0 ); - InitFunctor_ModTimes init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - TimesEqualAtomicViewFunctor functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + TimesEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template +template< class T > T TimesEqualAtomicViewCheck( const long input_length, const long remainder ) { - - //Analytical result + // Analytical result. const long N = input_length; T result = 1.0; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result *= 2.0; } else { @@ -658,15 +622,15 @@ T TimesEqualAtomicViewCheck( const long input_length, const long remainder ) { } } - return (T)result; + return (T) result; } -template -bool TimesEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType> +bool TimesEqualAtomicViewTest( const long input_length ) { const long remainder = 23; - T res = TimesEqualAtomicView(input_length, remainder); - T resSerial = TimesEqualAtomicViewCheck(input_length, remainder); + T res = TimesEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = TimesEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -674,101 +638,93 @@ bool TimesEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = TimesEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view div-equal------------------ //--------------------------------------------------- template struct DivEqualAtomicViewFunctor { - - typedef Kokkos::View< T , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; + typedef Kokkos::View< T, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; view_type input; atomic_view_type result; const long length; - // Wrap the result view in an atomic view, use this for operator - DivEqualAtomicViewFunctor( const view_type & input_ , scalar_view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + DivEqualAtomicViewFunctor( const view_type & input_, scalar_view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result() /= (double)(input(i)); + result() /= (double) ( input( i ) ); } } - }; - -template -T DivEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; - typedef typename scalar_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T DivEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; + typedef typename scalar_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - scalar_view_type result_view("result_view") ; - Kokkos::deep_copy(result_view, 12121212121); + view_type input( "input_view", length ); + scalar_view_type result_view( "result_view" ); + Kokkos::deep_copy( result_view, 12121212121 ); - InitFunctor_ModTimes init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - DivEqualAtomicViewFunctor functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + DivEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view()) ; + return (T) ( h_result_view() ); } -template -T DivEqualAtomicViewCheck( const long input_length , const long remainder ) { - +template< class T > +T DivEqualAtomicViewCheck( const long input_length, const long remainder ) { const long N = input_length; T result = 12121212121.0; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result /= 1.0; } else { result /= 2.0; } - } - return (T)result; + return (T) result; } -template -bool DivEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool DivEqualAtomicViewTest( const long input_length ) { const long remainder = 23; - T res = DivEqualAtomicView(input_length, remainder); - T resSerial = DivEqualAtomicViewCheck(input_length, remainder); + T res = DivEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = DivEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -776,83 +732,76 @@ bool DivEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DivEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view mod-equal------------------ //--------------------------------------------------- -template +template< class T, class execution_space > struct ModEqualAtomicViewFunctor { - - typedef Kokkos::View< T , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; + typedef Kokkos::View< T, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; view_type input; atomic_view_type result; const long length; - // Wrap the result view in an atomic view, use this for operator - ModEqualAtomicViewFunctor( const view_type & input_ , scalar_view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + ModEqualAtomicViewFunctor( const view_type & input_, scalar_view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result() %= (double)(input(i)); + result() %= (double) ( input( i ) ); } } - }; - -template -T ModEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; - typedef typename scalar_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T ModEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; + typedef typename scalar_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - scalar_view_type result_view("result_view") ; - Kokkos::deep_copy(result_view, 12121212121); + view_type input( "input_view", length ); + scalar_view_type result_view( "result_view" ); + Kokkos::deep_copy( result_view, 12121212121 ); - InitFunctor_ModTimes init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - ModEqualAtomicViewFunctor functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + ModEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view()) ; + return (T) ( h_result_view() ); } -template -T ModEqualAtomicViewCheck( const long input_length , const long remainder ) { - +template< class T > +T ModEqualAtomicViewCheck( const long input_length, const long remainder ) { const long N = input_length; T result = 12121212121; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result %= 1; } else { @@ -860,19 +809,18 @@ T ModEqualAtomicViewCheck( const long input_length , const long remainder ) { } } - return (T)result; + return (T) result; } -template -bool ModEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool ModEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral::value, "ModEqualAtomicView Error: Type must be integral type for this unit test"); + static_assert( std::is_integral< T >::value, "ModEqualAtomicView Error: Type must be integral type for this unit test" ); const long remainder = 23; - T res = ModEqualAtomicView(input_length, remainder); - T resSerial = ModEqualAtomicViewCheck(input_length, remainder); + T res = ModEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = ModEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -880,142 +828,134 @@ bool ModEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = ModEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view rs-equal------------------ //--------------------------------------------------- -template +template< class T, class execution_space > struct RSEqualAtomicViewFunctor { - - typedef Kokkos::View< T**** , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; + typedef Kokkos::View< T****, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; const view_type input; atomic_view_type result; const long length; const long value; - // Wrap the result view in an atomic view, use this for operator - RSEqualAtomicViewFunctor( const view_type & input_ , result_view_type & result_ , const long & length_ , const long & value_ ) - : input(input_) - , result(result_) - , length(length_) - , value(value_) + // Wrap the result view in an atomic view, use this for operator. + RSEqualAtomicViewFunctor( const view_type & input_, result_view_type & result_, const long & length_, const long & value_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) + , value( value_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 4 == 0 ) { - result(1,0,0,0) >>= input(i); + result( 1, 0, 0, 0 ) >>= input( i ); } else if ( i % 4 == 1 ) { - result(0,1,0,0) >>= input(i); + result( 0, 1, 0, 0 ) >>= input( i ); } else if ( i % 4 == 2 ) { - result(0,0,1,0) >>= input(i); + result( 0, 0, 1, 0 ) >>= input( i ); } else if ( i % 4 == 3 ) { - result(0,0,0,1) >>= input(i); + result( 0, 0, 0, 1 ) >>= input( i ); } } } - }; - -template -T RSEqualAtomicView(const long input_length, const long value, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; - typedef typename result_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T RSEqualAtomicView( const long input_length, const long value, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; + typedef typename result_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - result_view_type result_view("result_view",2,2,2,2) ; - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - h_result_view(1,0,0,0) = value; - h_result_view(0,1,0,0) = value; - h_result_view(0,0,1,0) = value; - h_result_view(0,0,0,1) = value; - Kokkos::deep_copy( result_view , h_result_view ); + view_type input( "input_view", length ); + result_view_type result_view( "result_view", 2, 2, 2, 2 ); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + h_result_view( 1, 0, 0, 0 ) = value; + h_result_view( 0, 1, 0, 0 ) = value; + h_result_view( 0, 0, 1, 0 ) = value; + h_result_view( 0, 0, 0, 1 ) = value; + Kokkos::deep_copy( result_view, h_result_view ); + InitFunctor_ModShift< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - InitFunctor_ModShift init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); - - RSEqualAtomicViewFunctor functor(input, result_view, length, value); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + RSEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length, value ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - Kokkos::deep_copy(h_result_view, result_view); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(1,0,0,0)) ; + return (T) ( h_result_view( 1, 0, 0, 0 ) ); } -template +template< class T > T RSEqualAtomicViewCheck( const long input_length, const long value, const long remainder ) { - - T result[4] ; - result[0] = value ; - result[1] = value ; - result[2] = value ; - result[3] = value ; + T result[4]; + result[0] = value; + result[1] = value; + result[2] = value; + result[3] = value; T * input = new T[input_length]; for ( long i = 0; i < input_length; ++i ) { - if ( i % (remainder+1) == remainder ) { - input[i] = 1; - } - else { - input[i] = 0; - } + if ( i % ( remainder + 1 ) == remainder ) { + input[i] = 1; + } + else { + input[i] = 0; + } } for ( long i = 0; i < input_length; ++i ) { - if ( i % 4 == 0 ) { - result[0] >>= input[i]; - } - else if ( i % 4 == 1 ) { - result[1] >>= input[i]; - } - else if ( i % 4 == 2 ) { - result[2] >>= input[i]; - } - else if ( i % 4 == 3 ) { - result[3] >>= input[i]; - } + if ( i % 4 == 0 ) { + result[0] >>= input[i]; + } + else if ( i % 4 == 1 ) { + result[1] >>= input[i]; + } + else if ( i % 4 == 2 ) { + result[2] >>= input[i]; + } + else if ( i % 4 == 3 ) { + result[3] >>= input[i]; + } } + delete [] input; - return (T)result[0]; + return (T) result[0]; } -template -bool RSEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool RSEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral::value, "RSEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "RSEqualAtomicViewTest: Must be integral type for test" ); const long remainder = 61042; //prime - 1 - const long value = 1073741825; // 2^30+1 - T res = RSEqualAtomicView(input_length, value, remainder); - T resSerial = RSEqualAtomicViewCheck(input_length, value, remainder); + const long value = 1073741825; // 2^30+1 + T res = RSEqualAtomicView< T, DeviceType >( input_length, value, remainder ); + T resSerial = RSEqualAtomicViewCheck< T >( input_length, value, remainder ); bool passed = true; @@ -1023,142 +963,134 @@ bool RSEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RSEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view ls-equal------------------ //--------------------------------------------------- template struct LSEqualAtomicViewFunctor { - - typedef Kokkos::View< T**** , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; + typedef Kokkos::View< T****, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; view_type input; atomic_view_type result; const long length; const long value; - // Wrap the result view in an atomic view, use this for operator - LSEqualAtomicViewFunctor( const view_type & input_ , result_view_type & result_ , const long & length_ , const long & value_ ) - : input(input_) - , result(result_) - , length(length_) - , value(value_) + // Wrap the result view in an atomic view, use this for operator. + LSEqualAtomicViewFunctor( const view_type & input_, result_view_type & result_, const long & length_, const long & value_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) + , value( value_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 4 == 0 ) { - result(1,0,0,0) <<= input(i); + result( 1, 0, 0, 0 ) <<= input( i ); } else if ( i % 4 == 1 ) { - result(0,1,0,0) <<= input(i); + result( 0, 1, 0, 0 ) <<= input( i ); } else if ( i % 4 == 2 ) { - result(0,0,1,0) <<= input(i); + result( 0, 0, 1, 0 ) <<= input( i ); } else if ( i % 4 == 3 ) { - result(0,0,0,1) <<= input(i); + result( 0, 0, 0, 1 ) <<= input( i ); } } } - }; - -template -T LSEqualAtomicView(const long input_length, const long value, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; - typedef typename result_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T LSEqualAtomicView( const long input_length, const long value, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; + typedef typename result_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - result_view_type result_view("result_view",2,2,2,2) ; - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - h_result_view(1,0,0,0) = value; - h_result_view(0,1,0,0) = value; - h_result_view(0,0,1,0) = value; - h_result_view(0,0,0,1) = value; - Kokkos::deep_copy( result_view , h_result_view ); + view_type input( "input_view", length ); + result_view_type result_view( "result_view", 2, 2, 2, 2 ); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + h_result_view( 1, 0, 0, 0 ) = value; + h_result_view( 0, 1, 0, 0 ) = value; + h_result_view( 0, 0, 1, 0 ) = value; + h_result_view( 0, 0, 0, 1 ) = value; + Kokkos::deep_copy( result_view, h_result_view ); - InitFunctor_ModShift init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_ModShift< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - LSEqualAtomicViewFunctor functor(input, result_view, length, value); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + LSEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length, value ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - Kokkos::deep_copy(h_result_view, result_view); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(1,0,0,0)) ; + return (T) ( h_result_view( 1, 0, 0, 0 ) ); } -template +template< class T > T LSEqualAtomicViewCheck( const long input_length, const long value, const long remainder ) { - - T result[4] ; - result[0] = value ; - result[1] = value ; - result[2] = value ; - result[3] = value ; + T result[4]; + result[0] = value; + result[1] = value; + result[2] = value; + result[3] = value; T * input = new T[input_length]; for ( long i = 0; i < input_length; ++i ) { - if ( i % (remainder+1) == remainder ) { - input[i] = 1; - } - else { - input[i] = 0; - } + if ( i % ( remainder + 1 ) == remainder ) { + input[i] = 1; + } + else { + input[i] = 0; + } } for ( long i = 0; i < input_length; ++i ) { - if ( i % 4 == 0 ) { - result[0] <<= input[i]; - } - else if ( i % 4 == 1 ) { - result[1] <<= input[i]; - } - else if ( i % 4 == 2 ) { - result[2] <<= input[i]; - } - else if ( i % 4 == 3 ) { - result[3] <<= input[i]; - } + if ( i % 4 == 0 ) { + result[0] <<= input[i]; + } + else if ( i % 4 == 1 ) { + result[1] <<= input[i]; + } + else if ( i % 4 == 2 ) { + result[2] <<= input[i]; + } + else if ( i % 4 == 3 ) { + result[3] <<= input[i]; + } } delete [] input; - return (T)result[0]; + return (T) result[0]; } -template -bool LSEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool LSEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral::value, "LSEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "LSEqualAtomicViewTest: Must be integral type for test" ); const long remainder = 61042; //prime - 1 - const long value = 1; // 2^30+1 - T res = LSEqualAtomicView(input_length, value, remainder); - T resSerial = LSEqualAtomicViewCheck(input_length, value, remainder); + const long value = 1; // 2^30+1 + T res = LSEqualAtomicView< T, DeviceType >( input_length, value, remainder ); + T resSerial = LSEqualAtomicViewCheck< T >( input_length, value, remainder ); bool passed = true; @@ -1166,104 +1098,96 @@ bool LSEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RSEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view and-equal----------------- //--------------------------------------------------- -template +template< class T, class execution_space > struct AndEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - AndEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + AndEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) &= input(i); + even_odd_result( 0 ) &= input( i ); } else { - even_odd_result(1) &= input(i); + even_odd_result( 1 ) &= input( i ); } } } - }; - -template -T AndEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T AndEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; - Kokkos::deep_copy(result_view, 1); + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); + Kokkos::deep_copy( result_view, 1 ); - InitFunctor_Seq init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - AndEqualAtomicViewFunctor functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + AndEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template +template< class T > T AndEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; - T result[2] = {1}; + T result[2] = { 1 }; for ( long i = 0; i < N; ++i ) { if ( N % 2 == 0 ) { - result[0] &= (T)i; + result[0] &= (T) i; } else { - result[1] &= (T)i; + result[1] &= (T) i; } } - return (result[0]); + return ( result[0] ); } -template -bool AndEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool AndEqualAtomicViewTest( long input_length ) { + static_assert( std::is_integral< T >::value, "AndEqualAtomicViewTest: Must be integral type for test" ); - static_assert( std::is_integral::value, "AndEqualAtomicViewTest: Must be integral type for test"); - - T res = AndEqualAtomicView(input_length); - T resSerial = AndEqualAtomicViewCheck(input_length); + T res = AndEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = AndEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1271,103 +1195,96 @@ bool AndEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = AndEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view or-equal----------------- //--------------------------------------------------- -template +template< class T, class execution_space > struct OrEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - OrEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + OrEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) |= input(i); + even_odd_result( 0 ) |= input( i ); } else { - even_odd_result(1) |= input(i); + even_odd_result( 1 ) |= input( i ); } } } - }; - -template -T OrEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T OrEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - OrEqualAtomicViewFunctor functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + OrEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template +template< class T > T OrEqualAtomicViewCheck( const long input_length ) { const long N = input_length; - T result[2] = {0}; + T result[2] = { 0 }; for ( long i = 0; i < N; ++i ) { if ( i % 2 == 0 ) { - result[0] |= (T)i; + result[0] |= (T) i; } else { - result[1] |= (T)i; + result[1] |= (T) i; } } - return (T)(result[0]); + return (T) ( result[0] ); } -template -bool OrEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool OrEqualAtomicViewTest( long input_length ) { - - static_assert( std::is_integral::value, "OrEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "OrEqualAtomicViewTest: Must be integral type for test" ); - T res = OrEqualAtomicView(input_length); - T resSerial = OrEqualAtomicViewCheck(input_length); + T res = OrEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = OrEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1375,103 +1292,95 @@ bool OrEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = OrEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view xor-equal----------------- //--------------------------------------------------- -template +template< class T, class execution_space > struct XOrEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - XOrEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + XOrEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) ^= input(i); + even_odd_result( 0 ) ^= input( i ); } else { - even_odd_result(1) ^= input(i); + even_odd_result( 1 ) ^= input( i ); } } } - }; - -template -T XOrEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T XOrEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - XOrEqualAtomicViewFunctor functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy(0, length), functor); + XOrEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template +template< class T > T XOrEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; - T result[2] = {0}; + T result[2] = { 0 }; for ( long i = 0; i < N; ++i ) { if ( i % 2 == 0 ) { - result[0] ^= (T)i; + result[0] ^= (T) i; } else { - result[1] ^= (T)i; + result[1] ^= (T) i; } } - return (T)(result[0]); + return (T) ( result[0] ); } -template -bool XOrEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool XOrEqualAtomicViewTest( long input_length ) { + static_assert( std::is_integral< T >::value, "XOrEqualAtomicViewTest: Must be integral type for test" ); - static_assert( std::is_integral::value, "XOrEqualAtomicViewTest: Must be integral type for test"); - - T res = XOrEqualAtomicView(input_length); - T resSerial = XOrEqualAtomicViewCheck(input_length); + T res = XOrEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = XOrEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1479,54 +1388,52 @@ bool XOrEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = XOrEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - // inc/dec? - //--------------------------------------------------- //--------------atomic_test_control------------------ //--------------------------------------------------- -template -bool AtomicViewsTestIntegralType( const int length , int test ) +template< class T, class DeviceType > +bool AtomicViewsTestIntegralType( const int length, int test ) { - static_assert( std::is_integral::value, "TestAtomicViews Error: Non-integral type passed into IntegralType tests"); + static_assert( std::is_integral< T >::value, "TestAtomicViews Error: Non-integral type passed into IntegralType tests" ); - switch (test) { - case 1: return PlusEqualAtomicViewTest( length ); - case 2: return MinusEqualAtomicViewTest( length ); - case 3: return RSEqualAtomicViewTest( length ); - case 4: return LSEqualAtomicViewTest( length ); - case 5: return ModEqualAtomicViewTest( length ); - case 6: return AndEqualAtomicViewTest( length ); - case 7: return OrEqualAtomicViewTest( length ); - case 8: return XOrEqualAtomicViewTest( length ); + switch ( test ) { + case 1: return PlusEqualAtomicViewTest< T, DeviceType >( length ); + case 2: return MinusEqualAtomicViewTest< T, DeviceType >( length ); + case 3: return RSEqualAtomicViewTest< T, DeviceType >( length ); + case 4: return LSEqualAtomicViewTest< T, DeviceType >( length ); + case 5: return ModEqualAtomicViewTest< T, DeviceType >( length ); + case 6: return AndEqualAtomicViewTest< T, DeviceType >( length ); + case 7: return OrEqualAtomicViewTest< T, DeviceType >( length ); + case 8: return XOrEqualAtomicViewTest< T, DeviceType >( length ); } + return 0; } - -template -bool AtomicViewsTestNonIntegralType( const int length , int test ) +template< class T, class DeviceType > +bool AtomicViewsTestNonIntegralType( const int length, int test ) { - switch (test) { - case 1: return PlusEqualAtomicViewTest( length ); - case 2: return MinusEqualAtomicViewTest( length ); - case 3: return TimesEqualAtomicViewTest( length ); - case 4: return DivEqualAtomicViewTest( length ); + switch ( test ) { + case 1: return PlusEqualAtomicViewTest< T, DeviceType >( length ); + case 2: return MinusEqualAtomicViewTest< T, DeviceType >( length ); + case 3: return TimesEqualAtomicViewTest< T, DeviceType >( length ); + case 4: return DivEqualAtomicViewTest< T, DeviceType >( length ); } + return 0; } -} // namespace - +} // namespace TestAtomicViews diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp index d6dde5e963..e2ad623d9c 100644 --- a/lib/kokkos/core/unit_test/TestCXX11.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,283 +36,294 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + #include namespace TestCXX11 { -template -struct FunctorAddTest{ - typedef Kokkos::View view_type; - view_type a_, b_; +template< class DeviceType > +struct FunctorAddTest { + typedef Kokkos::View< double**, DeviceType > view_type; typedef DeviceType execution_space; - FunctorAddTest(view_type & a, view_type &b):a_(a),b_(b) {} + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member; + + view_type a_, b_; + + FunctorAddTest( view_type & a, view_type & b ) : a_( a ), b_( b ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - b_(i,0) = a_(i,1) + a_(i,2); - b_(i,1) = a_(i,0) - a_(i,3); - b_(i,2) = a_(i,4) + a_(i,0); - b_(i,3) = a_(i,2) - a_(i,1); - b_(i,4) = a_(i,3) + a_(i,4); + void operator() ( const int& i ) const { + b_( i, 0 ) = a_( i, 1 ) + a_( i, 2 ); + b_( i, 1 ) = a_( i, 0 ) - a_( i, 3 ); + b_( i, 2 ) = a_( i, 4 ) + a_( i, 0 ); + b_( i, 3 ) = a_( i, 2 ) - a_( i, 1 ); + b_( i, 4 ) = a_( i, 3 ) + a_( i, 4 ); } - typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; KOKKOS_INLINE_FUNCTION - void operator() (const team_member & dev) const { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - b_(i,0) = a_(i,1) + a_(i,2); - b_(i,1) = a_(i,0) - a_(i,3); - b_(i,2) = a_(i,4) + a_(i,0); - b_(i,3) = a_(i,2) - a_(i,1); - b_(i,4) = a_(i,3) + a_(i,4); + void operator() ( const team_member & dev ) const { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + b_( i, 0 ) = a_( i, 1 ) + a_( i, 2 ); + b_( i, 1 ) = a_( i, 0 ) - a_( i, 3 ); + b_( i, 2 ) = a_( i, 4 ) + a_( i, 0 ); + b_( i, 3 ) = a_( i, 2 ) - a_( i, 1 ); + b_( i, 4 ) = a_( i, 3 ) + a_( i, 4 ); } } }; -template +template< class DeviceType, bool PWRTest > double AddTestFunctor() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; - typedef Kokkos::TeamPolicy policy_type ; + Kokkos::View< double**, DeviceType > a( "A", 100, 5 ); + Kokkos::View< double**, DeviceType > b( "B", 100, 5 ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_a = Kokkos::create_mirror_view( a ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_b = Kokkos::create_mirror_view( b ); - Kokkos::View a("A",100,5); - Kokkos::View b("B",100,5); - typename Kokkos::View::HostMirror h_a = Kokkos::create_mirror_view(a); - typename Kokkos::View::HostMirror h_b = Kokkos::create_mirror_view(b); - - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); - if(PWRTest==false) - Kokkos::parallel_for(100,FunctorAddTest(a,b)); - else - Kokkos::parallel_for(policy_type(25,Kokkos::AUTO),FunctorAddTest(a,b)); - Kokkos::deep_copy(h_b,b); + if ( PWRTest == false ) { + Kokkos::parallel_for( 100, FunctorAddTest< DeviceType >( a, b ) ); + } + else { + Kokkos::parallel_for( policy_type( 25, Kokkos::AUTO ), FunctorAddTest< DeviceType >( a, b ) ); + } + Kokkos::deep_copy( h_b, b ); double result = 0; - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - result += h_b(i,j); + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + result += h_b( i, j ); } + } return result; } - -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -template +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +template< class DeviceType, bool PWRTest > double AddTestLambda() { + Kokkos::View< double**, DeviceType > a( "A", 100, 5 ); + Kokkos::View< double**, DeviceType > b( "B", 100, 5 ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_a = Kokkos::create_mirror_view( a ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_b = Kokkos::create_mirror_view( b ); - Kokkos::View a("A",100,5); - Kokkos::View b("B",100,5); - typename Kokkos::View::HostMirror h_a = Kokkos::create_mirror_view(a); - typename Kokkos::View::HostMirror h_b = Kokkos::create_mirror_view(b); - - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); - if(PWRTest==false) { - Kokkos::parallel_for(100,KOKKOS_LAMBDA(const int& i) { - b(i,0) = a(i,1) + a(i,2); - b(i,1) = a(i,0) - a(i,3); - b(i,2) = a(i,4) + a(i,0); - b(i,3) = a(i,2) - a(i,1); - b(i,4) = a(i,3) + a(i,4); + if ( PWRTest == false ) { + Kokkos::parallel_for( 100, KOKKOS_LAMBDA( const int & i ) { + b( i, 0 ) = a( i, 1 ) + a( i, 2 ); + b( i, 1 ) = a( i, 0 ) - a( i, 3 ); + b( i, 2 ) = a( i, 4 ) + a( i, 0 ); + b( i, 3 ) = a( i, 2 ) - a( i, 1 ); + b( i, 4 ) = a( i, 3 ) + a( i, 4 ); }); - } else { - typedef Kokkos::TeamPolicy policy_type ; - typedef typename policy_type::member_type team_member ; + } + else { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef typename policy_type::member_type team_member; - policy_type policy(25,Kokkos::AUTO); + policy_type policy( 25, Kokkos::AUTO ); - Kokkos::parallel_for(policy,KOKKOS_LAMBDA(const team_member & dev) { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - b(i,0) = a(i,1) + a(i,2); - b(i,1) = a(i,0) - a(i,3); - b(i,2) = a(i,4) + a(i,0); - b(i,3) = a(i,2) - a(i,1); - b(i,4) = a(i,3) + a(i,4); + Kokkos::parallel_for( policy, KOKKOS_LAMBDA( const team_member & dev ) { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + b( i, 0 ) = a( i, 1 ) + a( i, 2 ); + b( i, 1 ) = a( i, 0 ) - a( i, 3 ); + b( i, 2 ) = a( i, 4 ) + a( i, 0 ); + b( i, 3 ) = a( i, 2 ) - a( i, 1 ); + b( i, 4 ) = a( i, 3 ) + a( i, 4 ); } }); } - Kokkos::deep_copy(h_b,b); + Kokkos::deep_copy( h_b, b ); double result = 0; - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - result += h_b(i,j); + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + result += h_b( i, j ); } + } return result; } - #else -template +template< class DeviceType, bool PWRTest > double AddTestLambda() { - return AddTestFunctor(); + return AddTestFunctor< DeviceType, PWRTest >(); } #endif - -template -struct FunctorReduceTest{ - typedef Kokkos::View view_type; - view_type a_; +template< class DeviceType > +struct FunctorReduceTest { + typedef Kokkos::View< double**, DeviceType > view_type; typedef DeviceType execution_space; typedef double value_type; - FunctorReduceTest(view_type & a):a_(a) {} + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member; + + view_type a_; + + FunctorReduceTest( view_type & a ) : a_( a ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i, value_type& sum) const { - sum += a_(i,1) + a_(i,2); - sum += a_(i,0) - a_(i,3); - sum += a_(i,4) + a_(i,0); - sum += a_(i,2) - a_(i,1); - sum += a_(i,3) + a_(i,4); + void operator() ( const int & i, value_type & sum ) const { + sum += a_( i, 1 ) + a_( i, 2 ); + sum += a_( i, 0 ) - a_( i, 3 ); + sum += a_( i, 4 ) + a_( i, 0 ); + sum += a_( i, 2 ) - a_( i, 1 ); + sum += a_( i, 3 ) + a_( i, 4 ); } - typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; - KOKKOS_INLINE_FUNCTION - void operator() (const team_member & dev, value_type& sum) const { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - sum += a_(i,1) + a_(i,2); - sum += a_(i,0) - a_(i,3); - sum += a_(i,4) + a_(i,0); - sum += a_(i,2) - a_(i,1); - sum += a_(i,3) + a_(i,4); + void operator() ( const team_member & dev, value_type & sum ) const { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + sum += a_( i, 1 ) + a_( i, 2 ); + sum += a_( i, 0 ) - a_( i, 3 ); + sum += a_( i, 4 ) + a_( i, 0 ); + sum += a_( i, 2 ) - a_( i, 1 ); + sum += a_( i, 3 ) + a_( i, 4 ); } } + KOKKOS_INLINE_FUNCTION - void init(value_type& update) const {update = 0.0;} + void init( value_type & update ) const { update = 0.0; } + KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, volatile value_type const& input) const {update += input;} + void join( volatile value_type & update, volatile value_type const & input ) const { update += input; } }; -template +template< class DeviceType, bool PWRTest > double ReduceTestFunctor() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef Kokkos::View< double**, DeviceType > view_type; + typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; - typedef Kokkos::TeamPolicy policy_type ; - typedef Kokkos::View view_type ; - typedef Kokkos::View unmanaged_result ; + view_type a( "A", 100, 5 ); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); - view_type a("A",100,5); - typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); - - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); double result = 0.0; - if(PWRTest==false) - Kokkos::parallel_reduce(100,FunctorReduceTest(a), unmanaged_result( & result )); - else - Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),FunctorReduceTest(a), unmanaged_result( & result )); + if ( PWRTest == false ) { + Kokkos::parallel_reduce( 100, FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); + } + else { + Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); + } return result; } -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -template +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +template< class DeviceType, bool PWRTest > double ReduceTestLambda() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef Kokkos::View< double**, DeviceType > view_type; + typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; - typedef Kokkos::TeamPolicy policy_type ; - typedef Kokkos::View view_type ; - typedef Kokkos::View unmanaged_result ; + view_type a( "A", 100, 5 ); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); - view_type a("A",100,5); - typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); - - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); double result = 0.0; - if(PWRTest==false) { - Kokkos::parallel_reduce(100,KOKKOS_LAMBDA(const int& i, double& sum) { - sum += a(i,1) + a(i,2); - sum += a(i,0) - a(i,3); - sum += a(i,4) + a(i,0); - sum += a(i,2) - a(i,1); - sum += a(i,3) + a(i,4); + if ( PWRTest == false ) { + Kokkos::parallel_reduce( 100, KOKKOS_LAMBDA( const int & i, double & sum ) { + sum += a( i, 1 ) + a( i, 2 ); + sum += a( i, 0 ) - a( i, 3 ); + sum += a( i, 4 ) + a( i, 0 ); + sum += a( i, 2 ) - a( i, 1 ); + sum += a( i, 3 ) + a( i, 4 ); }, unmanaged_result( & result ) ); - } else { - typedef typename policy_type::member_type team_member ; - Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),KOKKOS_LAMBDA(const team_member & dev, double& sum) { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - sum += a(i,1) + a(i,2); - sum += a(i,0) - a(i,3); - sum += a(i,4) + a(i,0); - sum += a(i,2) - a(i,1); - sum += a(i,3) + a(i,4); + } + else { + typedef typename policy_type::member_type team_member; + Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), KOKKOS_LAMBDA( const team_member & dev, double & sum ) { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + sum += a( i, 1 ) + a( i, 2 ); + sum += a( i, 0 ) - a( i, 3 ); + sum += a( i, 4 ) + a( i, 0 ); + sum += a( i, 2 ) - a( i, 1 ); + sum += a( i, 3 ) + a( i, 4 ); } }, unmanaged_result( & result ) ); } return result; } - #else -template +template< class DeviceType, bool PWRTest > double ReduceTestLambda() { - return ReduceTestFunctor(); + return ReduceTestFunctor< DeviceType, PWRTest >(); } #endif -template -double TestVariantLambda(int test) { - switch (test) { - case 1: return AddTestLambda(); - case 2: return AddTestLambda(); - case 3: return ReduceTestLambda(); - case 4: return ReduceTestLambda(); +template< class DeviceType > +double TestVariantLambda( int test ) { + switch ( test ) { + case 1: return AddTestLambda< DeviceType, false >(); + case 2: return AddTestLambda< DeviceType, true >(); + case 3: return ReduceTestLambda< DeviceType, false >(); + case 4: return ReduceTestLambda< DeviceType, true >(); } + return 0; } - -template -double TestVariantFunctor(int test) { - switch (test) { - case 1: return AddTestFunctor(); - case 2: return AddTestFunctor(); - case 3: return ReduceTestFunctor(); - case 4: return ReduceTestFunctor(); +template< class DeviceType > +double TestVariantFunctor( int test ) { + switch ( test ) { + case 1: return AddTestFunctor< DeviceType, false >(); + case 2: return AddTestFunctor< DeviceType, true >(); + case 3: return ReduceTestFunctor< DeviceType, false >(); + case 4: return ReduceTestFunctor< DeviceType, true >(); } + return 0; } -template -bool Test(int test) { - +template< class DeviceType > +bool Test( int test ) { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - double res_functor = TestVariantFunctor(test); - double res_lambda = TestVariantLambda(test); + double res_functor = TestVariantFunctor< DeviceType >( test ); + double res_lambda = TestVariantLambda< DeviceType >( test ); - char testnames[5][256] = {" " - ,"AddTest","AddTest TeamPolicy" - ,"ReduceTest","ReduceTest TeamPolicy" + char testnames[5][256] = { " " + , "AddTest", "AddTest TeamPolicy" + , "ReduceTest", "ReduceTest TeamPolicy" }; bool passed = true; @@ -322,13 +333,13 @@ bool Test(int test) { std::cout << "CXX11 ( test = '" << testnames[test] << "' FAILED : " << res_functor << " != " << res_lambda - << std::endl ; + << std::endl; } - return passed ; + return passed; #else return true; #endif } -} +} // namespace TestCXX11 diff --git a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp index 359e17a44f..b53b42b8e0 100644 --- a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,10 +36,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + #include #ifndef TESTCXX11DEDUCTION_HPP @@ -52,43 +53,40 @@ struct TestReductionDeductionTagB {}; template < class ExecSpace > struct TestReductionDeductionFunctor { - // KOKKOS_INLINE_FUNCTION - // void operator()( long i , long & value ) const - // { value += i + 1 ; } + // void operator()( long i, long & value ) const + // { value += i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( TestReductionDeductionTagA , long i , long & value ) const + void operator()( TestReductionDeductionTagA, long i, long & value ) const { value += ( 2 * i + 1 ) + ( 2 * i + 2 ); } KOKKOS_INLINE_FUNCTION - void operator()( const TestReductionDeductionTagB & , const long i , long & value ) const - { value += ( 3 * i + 1 ) + ( 3 * i + 2 ) + ( 3 * i + 3 ) ; } - + void operator()( const TestReductionDeductionTagB &, const long i, long & value ) const + { value += ( 3 * i + 1 ) + ( 3 * i + 2 ) + ( 3 * i + 3 ); } }; template< class ExecSpace > void test_reduction_deduction() { - typedef TestReductionDeductionFunctor< ExecSpace > Functor ; + typedef TestReductionDeductionFunctor< ExecSpace > Functor; - const long N = 50 ; - // const long answer = N % 2 ? ( N * ((N+1)/2 )) : ( (N/2) * (N+1) ); - const long answerA = N % 2 ? ( (2*N) * (((2*N)+1)/2 )) : ( ((2*N)/2) * ((2*N)+1) ); - const long answerB = N % 2 ? ( (3*N) * (((3*N)+1)/2 )) : ( ((3*N)/2) * ((3*N)+1) ); - long result = 0 ; + const long N = 50; + // const long answer = N % 2 ? ( N * ( ( N + 1 ) / 2 ) ) : ( ( N / 2 ) * ( N + 1 ) ); + const long answerA = N % 2 ? ( ( 2 * N ) * ( ( ( 2 * N ) + 1 ) / 2 ) ) : ( ( ( 2 * N ) / 2 ) * ( ( 2 * N ) + 1 ) ); + const long answerB = N % 2 ? ( ( 3 * N ) * ( ( ( 3 * N ) + 1 ) / 2 ) ) : ( ( ( 3 * N ) / 2 ) * ( ( 3 * N ) + 1 ) ); + long result = 0; - // Kokkos::parallel_reduce( Kokkos::RangePolicy(0,N) , Functor() , result ); - // ASSERT_EQ( answer , result ); - - Kokkos::parallel_reduce( Kokkos::RangePolicy(0,N) , Functor() , result ); - ASSERT_EQ( answerA , result ); - - Kokkos::parallel_reduce( Kokkos::RangePolicy(0,N) , Functor() , result ); - ASSERT_EQ( answerB , result ); + // Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), Functor(), result ); + // ASSERT_EQ( answer, result ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TestReductionDeductionTagA >( 0, N ), Functor(), result ); + ASSERT_EQ( answerA, result ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TestReductionDeductionTagB >( 0, N ), Functor(), result ); + ASSERT_EQ( answerB, result ); } -} +} // namespace TestCXX11 #endif - diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index 5add656a4d..4555438344 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,17 +47,17 @@ namespace TestCompilerMacros { -template +template< class DEVICE_TYPE > struct AddFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View type; - type a,b; + typedef typename Kokkos::View< int**, execution_space > type; + type a, b; int length; - AddFunctor(type a_, type b_):a(a_),b(b_),length(a.dimension_1()) {} + AddFunctor( type a_, type b_ ) : a( a_ ), b( b_ ), length( a.dimension_1() ) {} KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()( int i ) const { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -75,21 +75,23 @@ struct AddFunctor { #pragma simd #endif #endif - for(int j=0;j +template< class DeviceType > bool Test() { - typedef typename Kokkos::View type; - type a("A",1024,128); - type b("B",1024,128); + typedef typename Kokkos::View< int**, DeviceType > type; + type a( "A", 1024, 128 ); + type b( "B", 1024, 128 ); - AddFunctor f(a,b); - Kokkos::parallel_for(1024,f); + AddFunctor< DeviceType > f( a, b ); + Kokkos::parallel_for( 1024, f ); DeviceType::fence(); + return true; } -} +} // namespace TestCompilerMacros diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp index 7e08f67e69..f85a35c096 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp @@ -45,13 +45,10 @@ #include -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include - #include - #include #include #include @@ -78,24 +75,25 @@ protected: TEST_F( defaultdevicetype, host_space_access ) { - typedef Kokkos::HostSpace::execution_space host_exec_space ; - typedef Kokkos::Device< host_exec_space , Kokkos::HostSpace > device_space ; - typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space ; + typedef Kokkos::HostSpace::execution_space host_exec_space; + typedef Kokkos::Device< host_exec_space, Kokkos::HostSpace > device_space; + typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space; static_assert( - Kokkos::Impl::SpaceAccessibility< host_exec_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< host_exec_space, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< device_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< device_space, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< mirror_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< mirror_space, Kokkos::HostSpace >::accessible, "" ); } -TEST_F( defaultdevicetype, view_api) { - TestViewAPI< double , Kokkos::DefaultExecutionSpace >(); +TEST_F( defaultdevicetype, view_api ) +{ + TestViewAPI< double, Kokkos::DefaultExecutionSpace >(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 7778efde30..401da58a58 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,376 +44,425 @@ #include #include + #ifdef KOKKOS_ENABLE_OPENMP #include #endif -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) namespace Test { namespace Impl { - char** init_kokkos_args(bool do_threads,bool do_numa,bool do_device,bool do_other, int& nargs, Kokkos::InitArguments& init_args) { - nargs = (do_threads?1:0) + - (do_numa?1:0) + - (do_device?1:0) + - (do_other?4:0); - char** args_kokkos = new char*[nargs]; - for(int i = 0; i < nargs; i++) - args_kokkos[i] = new char[20]; +char** init_kokkos_args( bool do_threads, bool do_numa, bool do_device, bool do_other, int & nargs, Kokkos::InitArguments & init_args ) { + nargs = ( do_threads ? 1 : 0 ) + + ( do_numa ? 1 : 0 ) + + ( do_device ? 1 : 0 ) + + ( do_other ? 4 : 0 ); - int threads_idx = do_other?1:0; - int numa_idx = (do_other?3:0) + (do_threads?1:0); - int device_idx = (do_other?3:0) + (do_threads?1:0) + (do_numa?1:0); - - - if(do_threads) { - int nthreads = 3; - -#ifdef KOKKOS_ENABLE_OPENMP - if(omp_get_max_threads() < 3) - nthreads = omp_get_max_threads(); -#endif - - if(Kokkos::hwloc::available()) { - if(Kokkos::hwloc::get_available_threads_per_core()<3) - nthreads = Kokkos::hwloc::get_available_threads_per_core() - * Kokkos::hwloc::get_available_numa_count(); - } - -#ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) { - nthreads = 1; - } -#endif - init_args.num_threads = nthreads; - sprintf(args_kokkos[threads_idx],"--threads=%i",nthreads); - } - - if(do_numa) { - int numa = 1; - if(Kokkos::hwloc::available()) - numa = Kokkos::hwloc::get_available_numa_count(); -#ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) { - numa = 1; - } -#endif - - init_args.num_numa = numa; - sprintf(args_kokkos[numa_idx],"--numa=%i",numa); - } - - if(do_device) { - - init_args.device_id = 0; - sprintf(args_kokkos[device_idx],"--device=%i",0); - } - - if(do_other) { - sprintf(args_kokkos[0],"--dummyarg=1"); - sprintf(args_kokkos[threads_idx+(do_threads?1:0)],"--dummy2arg"); - sprintf(args_kokkos[threads_idx+(do_threads?1:0)+1],"dummy3arg"); - sprintf(args_kokkos[device_idx+(do_device?1:0)],"dummy4arg=1"); - } - - - return args_kokkos; + char** args_kokkos = new char*[nargs]; + for ( int i = 0; i < nargs; i++ ) { + args_kokkos[i] = new char[20]; } - Kokkos::InitArguments init_initstruct(bool do_threads, bool do_numa, bool do_device) { - Kokkos::InitArguments args; + int threads_idx = do_other ? 1 : 0; + int numa_idx = ( do_other ? 3 : 0 ) + ( do_threads ? 1 : 0 ); + int device_idx = ( do_other ? 3 : 0 ) + ( do_threads ? 1 : 0 ) + ( do_numa ? 1 : 0 ); - if(do_threads) { - int nthreads = 3; + if ( do_threads ) { + int nthreads = 3; #ifdef KOKKOS_ENABLE_OPENMP - if(omp_get_max_threads() < 3) - nthreads = omp_get_max_threads(); + if ( omp_get_max_threads() < 3 ) + nthreads = omp_get_max_threads(); #endif - if(Kokkos::hwloc::available()) { - if(Kokkos::hwloc::get_available_threads_per_core()<3) - nthreads = Kokkos::hwloc::get_available_threads_per_core() - * Kokkos::hwloc::get_available_numa_count(); - } + if ( Kokkos::hwloc::available() ) { + if ( Kokkos::hwloc::get_available_threads_per_core() < 3 ) + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); + } + #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) { - nthreads = 1; - } + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + nthreads = 1; + } #endif - args.num_threads = nthreads; - } - - if(do_numa) { - int numa = 1; - if(Kokkos::hwloc::available()) - numa = Kokkos::hwloc::get_available_numa_count(); -#ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) { - numa = 1; - } -#endif - args.num_numa = numa; - } - - if(do_device) { - args.device_id = 0; - } - - return args; + init_args.num_threads = nthreads; + sprintf( args_kokkos[threads_idx], "--threads=%i", nthreads ); } - void check_correct_initialization(const Kokkos::InitArguments& argstruct) { - ASSERT_EQ( Kokkos::DefaultExecutionSpace::is_initialized(), 1); - ASSERT_EQ( Kokkos::HostSpace::execution_space::is_initialized(), 1); + if ( do_numa ) { + int numa = 1; + if ( Kokkos::hwloc::available() ) { + numa = Kokkos::hwloc::get_available_numa_count(); + } - //Figure out the number of threads the HostSpace ExecutionSpace should have initialized to - int expected_nthreads = argstruct.num_threads; - if(expected_nthreads<1) { - if(Kokkos::hwloc::available()) { - expected_nthreads = Kokkos::hwloc::get_available_numa_count() - * Kokkos::hwloc::get_available_cores_per_numa() - * Kokkos::hwloc::get_available_threads_per_core(); - } else { - #ifdef KOKKOS_ENABLE_OPENMP - if(std::is_same::value) { - expected_nthreads = omp_get_max_threads(); - } else - #endif - expected_nthreads = 1; +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + numa = 1; + } +#endif + init_args.num_numa = numa; + sprintf( args_kokkos[numa_idx], "--numa=%i", numa ); + } + + if ( do_device ) { + init_args.device_id = 0; + sprintf( args_kokkos[device_idx], "--device=%i", 0 ); + } + + if ( do_other ) { + sprintf( args_kokkos[0], "--dummyarg=1" ); + sprintf( args_kokkos[ threads_idx + ( do_threads ? 1 : 0 ) ], "--dummy2arg" ); + sprintf( args_kokkos[ threads_idx + ( do_threads ? 1 : 0 ) + 1 ], "dummy3arg" ); + sprintf( args_kokkos[ device_idx + ( do_device ? 1 : 0 ) ], "dummy4arg=1" ); + } + + return args_kokkos; +} + +Kokkos::InitArguments init_initstruct( bool do_threads, bool do_numa, bool do_device ) { + Kokkos::InitArguments args; + + if ( do_threads ) { + int nthreads = 3; + +#ifdef KOKKOS_ENABLE_OPENMP + if ( omp_get_max_threads() < 3 ) { + nthreads = omp_get_max_threads(); + } +#endif + + if ( Kokkos::hwloc::available() ) { + if ( Kokkos::hwloc::get_available_threads_per_core() < 3 ) { + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); } - #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) + } + +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + nthreads = 1; + } +#endif + + args.num_threads = nthreads; + } + + if ( do_numa ) { + int numa = 1; + if ( Kokkos::hwloc::available() ) { + numa = Kokkos::hwloc::get_available_numa_count(); + } + +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + numa = 1; + } +#endif + + args.num_numa = numa; + } + + if ( do_device ) { + args.device_id = 0; + } + + return args; +} + +void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { + ASSERT_EQ( Kokkos::DefaultExecutionSpace::is_initialized(), 1 ); + ASSERT_EQ( Kokkos::HostSpace::execution_space::is_initialized(), 1 ); + + // Figure out the number of threads the HostSpace ExecutionSpace should have initialized to. + int expected_nthreads = argstruct.num_threads; + + if ( expected_nthreads < 1 ) { + if ( Kokkos::hwloc::available() ) { + expected_nthreads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core(); + } + else { +#ifdef KOKKOS_ENABLE_OPENMP + if ( std::is_same< Kokkos::HostSpace::execution_space, Kokkos::OpenMP >::value ) { + expected_nthreads = omp_get_max_threads(); + } + else +#endif expected_nthreads = 1; - #endif } - int expected_numa = argstruct.num_numa; - if(expected_numa<1) { - if(Kokkos::hwloc::available()) { - expected_numa = Kokkos::hwloc::get_available_numa_count(); - } else { - expected_numa = 1; - } - #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same::value || - std::is_same::value ) - expected_numa = 1; - #endif +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Serial >::value ) { + expected_nthreads = 1; } - ASSERT_EQ(Kokkos::HostSpace::execution_space::thread_pool_size(),expected_nthreads); +#endif + } + + int expected_numa = argstruct.num_numa; + + if ( expected_numa < 1 ) { + if ( Kokkos::hwloc::available() ) { + expected_numa = Kokkos::hwloc::get_available_numa_count(); + } + else { + expected_numa = 1; + } + +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Serial >::value ) + expected_numa = 1; +#endif + } + + ASSERT_EQ( Kokkos::HostSpace::execution_space::thread_pool_size(), expected_nthreads ); + #ifdef KOKKOS_ENABLE_CUDA - if(std::is_same::value) { - int device; - cudaGetDevice( &device ); - int expected_device = argstruct.device_id; - if(argstruct.device_id<0) { - expected_device = 0; - } - ASSERT_EQ(expected_device,device); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Cuda >::value ) { + int device; + cudaGetDevice( &device ); + + int expected_device = argstruct.device_id; + if ( argstruct.device_id < 0 ) { + expected_device = 0; } + + ASSERT_EQ( expected_device, device ); + } #endif - } - - //ToDo: Add check whether correct number of threads are actually started - void test_no_arguments() { - Kokkos::initialize(); - check_correct_initialization(Kokkos::InitArguments()); - Kokkos::finalize(); - } - - void test_commandline_args(int nargs, char** args, const Kokkos::InitArguments& argstruct) { - Kokkos::initialize(nargs,args); - check_correct_initialization(argstruct); - Kokkos::finalize(); - } - - void test_initstruct_args(const Kokkos::InitArguments& args) { - Kokkos::initialize(args); - check_correct_initialization(args); - Kokkos::finalize(); - } } +// TODO: Add check whether correct number of threads are actually started. +void test_no_arguments() { + Kokkos::initialize(); + check_correct_initialization( Kokkos::InitArguments() ); + Kokkos::finalize(); +} + +void test_commandline_args( int nargs, char** args, const Kokkos::InitArguments & argstruct ) { + Kokkos::initialize( nargs, args ); + check_correct_initialization( argstruct ); + Kokkos::finalize(); +} + +void test_initstruct_args( const Kokkos::InitArguments & args ) { + Kokkos::initialize( args ); + check_correct_initialization( args ); + Kokkos::finalize(); +} + +} // namespace Impl + class defaultdevicetypeinit : public ::testing::Test { protected: - static void SetUpTestCase() - { - } + static void SetUpTestCase() {} - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_01 -TEST_F( defaultdevicetypeinit, no_args) { +TEST_F( defaultdevicetypeinit, no_args ) +{ Impl::test_no_arguments(); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_02 -TEST_F( defaultdevicetypeinit, commandline_args_empty) { +TEST_F( defaultdevicetypeinit, commandline_args_empty ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_03 -TEST_F( defaultdevicetypeinit, commandline_args_other) { +TEST_F( defaultdevicetypeinit, commandline_args_other ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,false,true,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, false, true, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_04 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,false,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, false, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_05 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_06 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_07 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_device) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,false,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, false, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_08 -TEST_F( defaultdevicetypeinit, commandline_args_numa_device) { +TEST_F( defaultdevicetypeinit, commandline_args_numa_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,true,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, true, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_09 -TEST_F( defaultdevicetypeinit, commandline_args_device) { +TEST_F( defaultdevicetypeinit, commandline_args_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_10 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device_other) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device_other ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,true,true,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, true, true, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_11 -TEST_F( defaultdevicetypeinit, initstruct_default) { +TEST_F( defaultdevicetypeinit, initstruct_default ) +{ Kokkos::InitArguments args; - Impl::test_initstruct_args(args); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_12 -TEST_F( defaultdevicetypeinit, initstruct_nthreads) { - Kokkos::InitArguments args = Impl::init_initstruct(true,false,false); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, false, false ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_13 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa) { - Kokkos::InitArguments args = Impl::init_initstruct(true,true,false); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, true, false ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_14 -TEST_F( defaultdevicetypeinit, initstruct_device) { - Kokkos::InitArguments args = Impl::init_initstruct(false,false,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( false, false, true ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_15 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_device) { - Kokkos::InitArguments args = Impl::init_initstruct(true,false,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, false, true ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_16 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa_device) { - Kokkos::InitArguments args = Impl::init_initstruct(true,true,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, true, true ); + Impl::test_initstruct_args( args ); } #endif - -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp index dd148a0624..4fdfa95910 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_a) { +TEST_F( defaultdevicetype, reduce_instantiation_a ) +{ TestReduceCombinatoricalInstantiation<>::execute_a(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp index c8edfdd5c3..841f34e03d 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_b) { +TEST_F( defaultdevicetype, reduce_instantiation_b ) +{ TestReduceCombinatoricalInstantiation<>::execute_b(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp index 405d49a9b8..602863be38 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_c) { +TEST_F( defaultdevicetype, reduce_instantiation_c ) +{ TestReduceCombinatoricalInstantiation<>::execute_c(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp index 426cc4f06c..5d3665b905 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp @@ -45,13 +45,10 @@ #include -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include - #include - #include #include #include @@ -76,162 +73,165 @@ protected: } }; -TEST_F( defaultdevicetype, test_utilities) { +TEST_F( defaultdevicetype, test_utilities ) +{ test_utilities(); } -TEST_F( defaultdevicetype, long_reduce) { - TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce) { - TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 ); -} - - -TEST_F( defaultdevicetype , atomics ) +TEST_F( defaultdevicetype, long_reduce ) { - const int loop_count = 1e4 ; - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); + TestReduce< long, Kokkos::DefaultExecutionSpace >( 100000 ); } -/*TEST_F( defaultdevicetype , view_remap ) +TEST_F( defaultdevicetype, double_reduce ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + TestReduce< double, Kokkos::DefaultExecutionSpace >( 100000 ); +} - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::DefaultExecutionSpace > output_type ; +TEST_F( defaultdevicetype, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::DefaultExecutionSpace >( 100000 ); +} - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > input_type ; +TEST_F( defaultdevicetype, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::DefaultExecutionSpace >( 100000 ); +} - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > diff_type ; +TEST_F( defaultdevicetype, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::DefaultExecutionSpace >( 100000 ); +} - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); +TEST_F( defaultdevicetype, atomics ) +{ + const int loop_count = 1e4; - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 3 ) ) ); +} + +/*TEST_F( defaultdevicetype, view_remap ) +{ + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::DefaultExecutionSpace > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::DefaultExecutionSpace > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::DefaultExecutionSpace > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + for ( size_t i3 = 0; i3 < N3; ++i3 ) { + for ( size_t i2 = 0; i2 < N2; ++i2 ) { + for ( size_t i1 = 0; i1 < N1; ++i1 ) { + for ( size_t i0 = 0; i0 < N0; ++i0 ) { + input( i0, i1, i2, i3 ) = ++value; + } + } + } + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + for ( size_t i3 = 0; i3 < N3; ++i3 ) { + for ( size_t i2 = 0; i2 < N2; ++i2 ) { + for ( size_t i1 = 0; i1 < N1; ++i1 ) { + for ( size_t i0 = 0; i0 < N0; ++i0 ) { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } + } + } + } }*/ -//---------------------------------------------------------------------------- - - -TEST_F( defaultdevicetype , view_aggregate ) +TEST_F( defaultdevicetype, view_aggregate ) { TestViewAggregate< Kokkos::DefaultExecutionSpace >(); } -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , scan ) +TEST_F( defaultdevicetype, scan ) { - TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 ); + TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1, 1000 ); TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); Kokkos::DefaultExecutionSpace::fence(); } - -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , compiler_macros ) +TEST_F( defaultdevicetype, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); } - -//---------------------------------------------------------------------------- -TEST_F( defaultdevicetype , cxx11 ) +TEST_F( defaultdevicetype, cxx11 ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); } -TEST_F( defaultdevicetype , team_vector ) +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) +TEST_F( defaultdevicetype, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 5 ) ) ); +} +#endif + +TEST_F( defaultdevicetype, malloc ) +{ + int* data = (int*) Kokkos::kokkos_malloc( 100 * sizeof( int ) ); + ASSERT_NO_THROW( data = (int*) Kokkos::kokkos_realloc( data, 120 * sizeof( int ) ) ); + Kokkos::kokkos_free( data ); + + int* data2 = (int*) Kokkos::kokkos_malloc( 0 ); + ASSERT_TRUE( data2 == NULL ); + Kokkos::kokkos_free( data2 ); } -TEST_F( defaultdevicetype , malloc ) -{ - int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int)); - ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int))); - Kokkos::kokkos_free(data); - - int* data2 = (int*) Kokkos::kokkos_malloc(0); - ASSERT_TRUE(data2==NULL); - Kokkos::kokkos_free(data2); -} - -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestHWLOC.cpp b/lib/kokkos/core/unit_test/TestHWLOC.cpp index 1637dec5de..d03d9b816f 100644 --- a/lib/kokkos/core/unit_test/TestHWLOC.cpp +++ b/lib/kokkos/core/unit_test/TestHWLOC.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,26 +44,24 @@ #include #include + #include namespace Test { class hwloc : public ::testing::Test { protected: - static void SetUpTestCase() - {} + static void SetUpTestCase() {} - static void TearDownTestCase() - {} + static void TearDownTestCase() {} }; -TEST_F( hwloc, query) +TEST_F( hwloc, query ) { std::cout << " NUMA[" << Kokkos::hwloc::get_available_numa_count() << "]" << " CORE[" << Kokkos::hwloc::get_available_cores_per_numa() << "]" << " PU[" << Kokkos::hwloc::get_available_threads_per_core() << "]" - << std::endl ; -} - + << std::endl; } +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index 9894d1ce69..1dc349cc12 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -47,509 +47,1675 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template struct TestMDRange_2D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType**, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; - using DataType = int ; - using ViewType = typename Kokkos::View< DataType** , ExecSpace > ; - using HostViewType = typename ViewType::HostMirror ; + ViewType input_view; - ViewType input_view ; - - TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view("input_view", N0, N1) {} + TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view( "input_view", N0, N1 ) {} KOKKOS_INLINE_FUNCTION - void operator()( const int i , const int j ) const + void operator()( const int i, const int j ) const { - input_view(i,j) = 1; + input_view( i, j ) = 1; } - - static void test_for2( const int64_t N0, const int64_t N1 ) + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, double &lsum ) const { + lsum += input_view( i, j ) * 2; + } + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j ) const + { + input_view( i, j ) = 3; + } + + static void test_reduce2( const int N0, const int N1 ) + { using namespace Kokkos::Experimental; { - using range_type = MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType >; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {3,3} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + } // end test_reduce2 + + static void test_for2( const int N0, const int N1 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {7,7} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, InitTag > range_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {16,16} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {5,16} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 4, 4 } } ); + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "D D: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "L L: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 7, 7 } } ); + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "L R: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 16, 16 } } ); + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "R L: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 5, 16 } } ); + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "R R: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } // end test_for2 +}; // MDRange_2D template struct TestMDRange_3D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType***, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; - using DataType = int ; - using ViewType = typename Kokkos::View< DataType*** , ExecSpace > ; - using HostViewType = typename ViewType::HostMirror ; + ViewType input_view; - ViewType input_view ; - - TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view("input_view", N0, N1, N2) {} + TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view( "input_view", N0, N1, N2 ) {} KOKKOS_INLINE_FUNCTION - void operator()( const int i , const int j , const int k ) const + void operator()( const int i, const int j, const int k ) const { - input_view(i,j,k) = 1; + input_view( i, j, k ) = 1; } - static void test_for3( const int64_t N0, const int64_t N1, const int64_t N2 ) + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, double &lsum ) const + { + lsum += input_view( i, j, k ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k ) const + { + input_view( i, j, k ) = 3; + } + + static void test_reduce3( const int N0, const int N1, const int N2 ) { using namespace Kokkos::Experimental; { - using range_type = MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); - HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); - - int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + } // end test_reduce3 + + static void test_for3( const int N0, const int N1, const int N2 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {3,5,7} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {8,8,8} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 5, 7 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 8, 8, 8 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } // end test_for3 }; -} /* namespace */ -} /* namespace Test */ +template +struct TestMDRange_4D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType****, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; -/*--------------------------------------------------------------------------*/ + ViewType input_view; + TestMDRange_4D( const DataType N0, const DataType N1, const DataType N2, const DataType N3 ) : input_view( "input_view", N0, N1, N2, N3 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l ) const + { + input_view( i, j, k, l ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, double &lsum ) const + { + lsum += input_view( i, j, k, l ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l ) const + { + input_view( i, j, k, l ) = 3; + } + + static void test_for4( const int N0, const int N1, const int N2, const int N3 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } } ); + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } ); + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf("Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } // end test_for4 +}; + +template +struct TestMDRange_5D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType*****, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; + + ViewType input_view; + + TestMDRange_5D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4 ) : input_view( "input_view", N0, N1, N2, N3, N4 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m ) const + { + input_view( i, j, k, l, m ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, double &lsum ) const + { + lsum += input_view( i, j, k, l, m ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m ) const + { + input_view( i, j, k, l, m ) = 3; + } + + static void test_for5( const int N0, const int N1, const int N2, const int N3, const int N4 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } } ); + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 7 } } ); + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } +}; + +template +struct TestMDRange_6D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType******, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; + + ViewType input_view; + + TestMDRange_6D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4, const DataType N5 ) : input_view( "input_view", N0, N1, N2, N3, N4, N5 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, const int n ) const + { + input_view( i, j, k, l, m, n ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, const int n, double &lsum ) const + { + lsum += input_view( i, j, k, l, m, n ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m, const int n ) const + { + input_view( i, j, k, l, m, n ) = 3; + } + + static void test_for6( const int N0, const int N1, const int N2, const int N3, const int N4, const int N5 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } } ); + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } +}; + +} // namespace + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp index 868e64e9da..925f0e35ed 100644 --- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -156,7 +156,7 @@ struct fill_memory { void operator()( size_type i ) const { if ( i % STRIDE == 0 ) { - *m_pointers[i / STRIDE].ptr = i / STRIDE ; + *m_pointers[i / STRIDE].ptr = i / STRIDE; } } }; @@ -493,12 +493,12 @@ T smallest_power2_ge( T val ) // Find the most significant nonzero bit. int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val ); - // If val is an integral power of 2, ceil( log2(val) ) is equal to the + // If val is an integral power of 2, ceil( log2( val ) ) is equal to the // most significant nonzero bit. Otherwise, you need to add 1. int lg2_size = first_nonzero_bit + !Kokkos::Impl::is_integral_power_of_two( val ); - return T(1) << T(lg2_size); + return T( 1 ) << T( lg2_size ); } // This test makes allocation requests for multiple sizes and interleaves @@ -547,7 +547,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) * num_chunk_sizes; - // Make sure the phase 2 size is multiples of (2 * num_chunk_sizes). + // Make sure the phase 2 size is multiples of ( 2 * num_chunk_sizes ). phase2_size = ( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) * 2 * num_chunk_sizes; @@ -567,7 +567,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // each chunk size. work_view phase1_work( "Phase 1 Work", phase1_size ); typename work_view::HostMirror host_phase1_work = - create_mirror_view(phase1_work); + create_mirror_view( phase1_work ); size_t inner_size = phase1_size / num_chunk_sizes; unsigned chunk_size = base_chunk_size; @@ -589,7 +589,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // deallocations with an equal number of allocations for each chunk size. work_view phase2_work( "Phase 2 Work", phase2_size ); typename work_view::HostMirror host_phase2_work = - create_mirror_view(phase2_work); + create_mirror_view( phase2_work ); inner_size = half_phase2_size / num_chunk_sizes; chunk_size = base_chunk_size; @@ -614,7 +614,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // Initialize the phase 3 work view with all deallocations. work_view phase3_work( "Phase 3 Work", phase3_size ); typename work_view::HostMirror host_phase3_work = - create_mirror_view(phase3_work); + create_mirror_view( phase3_work ); inner_size = phase3_size / num_chunk_sizes; diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index 1bb45481c9..6f2ca6a61c 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -48,7 +48,7 @@ #include #include -struct SomeTag{}; +struct SomeTag {}; template< class ExecutionSpace > class TestRangePolicyConstruction { @@ -56,179 +56,194 @@ public: TestRangePolicyConstruction() { test_compile_time_parameters(); } + private: void test_compile_time_parameters() { { Kokkos::Impl::expand_variadic(); - Kokkos::Impl::expand_variadic(1,2,3); + Kokkos::Impl::expand_variadic( 1, 2, 3 ); } + { typedef Kokkos::RangePolicy<> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy, ExecutionSpace,Kokkos::Schedule > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::IndexType, ExecutionSpace, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::RangePolicy,ExecutionSpace,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::Schedule, ExecutionSpace, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType,ExecutionSpace > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< SomeTag, Kokkos::Schedule, Kokkos::IndexType, ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::RangePolicy > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy, Kokkos::Schedule > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::IndexType, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::RangePolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::RangePolicy< SomeTag, Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } } }; @@ -240,258 +255,274 @@ public: test_compile_time_parameters(); test_run_time_parameters(); } + private: void test_compile_time_parameters() { { typedef Kokkos::TeamPolicy<> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy, ExecutionSpace,Kokkos::Schedule > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::IndexType, ExecutionSpace, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::TeamPolicy,ExecutionSpace,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::Schedule, ExecutionSpace, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType,ExecutionSpace > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< SomeTag, Kokkos::Schedule, Kokkos::IndexType, ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::TeamPolicy > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy, Kokkos::Schedule > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::IndexType, Kokkos::Schedule > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< Kokkos::Schedule, Kokkos::IndexType, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } - { - typedef Kokkos::TeamPolicy,Kokkos::IndexType > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same::value)); - ASSERT_TRUE((std::is_same >::value)); - ASSERT_TRUE((std::is_same::value)); + { + typedef Kokkos::TeamPolicy< SomeTag, Kokkos::Schedule, Kokkos::IndexType > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } } - template + template< class policy_t > void test_run_time_parameters_type() { int league_size = 131; - int team_size = 4 0); - ASSERT_EQ (p1.scratch_size(0), 0); + int scratch_size = per_team_scratch + per_thread_scratch * team_size; - policy_t p2 = p1.set_chunk_size(chunk_size); - ASSERT_EQ (p1.league_size() , league_size); - ASSERT_EQ (p1.team_size() , team_size); - ASSERT_TRUE(p1.chunk_size() > 0); - ASSERT_EQ (p1.scratch_size(0), 0); + policy_t p1( league_size, team_size ); + ASSERT_EQ ( p1.league_size(), league_size ); + ASSERT_EQ ( p1.team_size(), team_size ); + ASSERT_TRUE( p1.chunk_size() > 0 ); + ASSERT_EQ ( p1.scratch_size( 0 ), 0 ); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); + policy_t p2 = p1.set_chunk_size( chunk_size ); + ASSERT_EQ ( p1.league_size(), league_size ); + ASSERT_EQ ( p1.team_size(), team_size ); + ASSERT_TRUE( p1.chunk_size() > 0 ); + ASSERT_EQ ( p1.scratch_size( 0 ), 0 ); - policy_t p3 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p3.league_size() , league_size); - ASSERT_EQ (p3.team_size() , team_size); - ASSERT_EQ (p3.chunk_size() , chunk_size); - ASSERT_EQ (p3.scratch_size(0), per_team_scratch); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); - policy_t p4 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p4.league_size() , league_size); - ASSERT_EQ (p4.team_size() , team_size); - ASSERT_EQ (p4.chunk_size() , chunk_size); - ASSERT_EQ (p4.scratch_size(0), per_thread_scratch*team_size); + policy_t p3 = p2.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p3.league_size(), league_size ); + ASSERT_EQ ( p3.team_size(), team_size ); + ASSERT_EQ ( p3.chunk_size(), chunk_size ); + ASSERT_EQ ( p3.scratch_size( 0 ), per_team_scratch ); - policy_t p5 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch),Kokkos::PerTeam(per_team_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p5.league_size() , league_size); - ASSERT_EQ (p5.team_size() , team_size); - ASSERT_EQ (p5.chunk_size() , chunk_size); - ASSERT_EQ (p5.scratch_size(0), scratch_size); + policy_t p4 = p2.set_scratch_size( 0, Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p4.league_size(), league_size ); + ASSERT_EQ ( p4.team_size(), team_size ); + ASSERT_EQ ( p4.chunk_size(), chunk_size ); + ASSERT_EQ ( p4.scratch_size( 0 ), per_thread_scratch * team_size ); - policy_t p6 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p6.league_size() , league_size); - ASSERT_EQ (p6.team_size() , team_size); - ASSERT_EQ (p6.chunk_size() , chunk_size); - ASSERT_EQ (p6.scratch_size(0), scratch_size); + policy_t p5 = p2.set_scratch_size( 0, Kokkos::PerThread( per_thread_scratch ), Kokkos::PerTeam( per_team_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p5.league_size(), league_size ); + ASSERT_EQ ( p5.team_size(), team_size ); + ASSERT_EQ ( p5.chunk_size(), chunk_size ); + ASSERT_EQ ( p5.scratch_size( 0 ), scratch_size ); + + policy_t p6 = p2.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ), Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p6.league_size(), league_size ); + ASSERT_EQ ( p6.team_size(), team_size ); + ASSERT_EQ ( p6.chunk_size(), chunk_size ); + ASSERT_EQ ( p6.scratch_size( 0 ), scratch_size ); + + policy_t p7 = p3.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ), Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p3.league_size(), league_size ); + ASSERT_EQ ( p3.team_size(), team_size ); + ASSERT_EQ ( p3.chunk_size(), chunk_size ); + ASSERT_EQ ( p3.scratch_size( 0 ), per_team_scratch ); + ASSERT_EQ ( p7.league_size(), league_size ); + ASSERT_EQ ( p7.team_size(), team_size ); + ASSERT_EQ ( p7.chunk_size(), chunk_size ); + ASSERT_EQ ( p7.scratch_size( 0 ), scratch_size ); + } - policy_t p7 = p3.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p3.league_size() , league_size); - ASSERT_EQ (p3.team_size() , team_size); - ASSERT_EQ (p3.chunk_size() , chunk_size); - ASSERT_EQ (p3.scratch_size(0), per_team_scratch); - ASSERT_EQ (p7.league_size() , league_size); - ASSERT_EQ (p7.team_size() , team_size); - ASSERT_EQ (p7.chunk_size() , chunk_size); - ASSERT_EQ (p7.scratch_size(0), scratch_size); -} void test_run_time_parameters() { - test_run_time_parameters_type >(); - test_run_time_parameters_type,Kokkos::IndexType > >(); - test_run_time_parameters_type, ExecutionSpace, Kokkos::Schedule > >(); - test_run_time_parameters_type,Kokkos::IndexType,ExecutionSpace,SomeTag > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy >(); + test_run_time_parameters_type< Kokkos::TeamPolicy, Kokkos::IndexType > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy, ExecutionSpace, Kokkos::Schedule > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy, Kokkos::IndexType, ExecutionSpace, SomeTag > >(); } }; diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp deleted file mode 100644 index a465f39ca8..0000000000 --- a/lib/kokkos/core/unit_test/TestQthread.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -#include -#include - -//---------------------------------------------------------------------------- - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -// #include - -namespace Test { - -class qthread : public ::testing::Test { -protected: - static void SetUpTestCase() - { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - int threads_count = std::max( 1u , numa_count ) - * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); - Kokkos::Qthread::initialize( threads_count ); - Kokkos::Qthread::print_configuration( std::cout , true ); - } - - static void TearDownTestCase() - { - Kokkos::Qthread::finalize(); - } -}; - -TEST_F( qthread , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthread >() ) ); -} - -TEST_F( qthread, view_impl) { - test_view_impl< Kokkos::Qthread >(); -} - -TEST_F( qthread, view_api) { - TestViewAPI< double , Kokkos::Qthread >(); -} - -TEST_F( qthread , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Qthread >(); -} - -TEST_F( qthread , range_tag ) -{ - TestRange< Kokkos::Qthread , Kokkos::Schedule >::test_for(1000); - TestRange< Kokkos::Qthread , Kokkos::Schedule >::test_reduce(1000); - TestRange< Kokkos::Qthread , Kokkos::Schedule >::test_scan(1000); -} - -TEST_F( qthread , team_tag ) -{ - TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule >::test_reduce( 1000 ); -} - -TEST_F( qthread, long_reduce) { - TestReduce< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, double_reduce) { - TestReduce< double , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, team_long_reduce) { - TestReduceTeam< long , Kokkos::Qthread , Kokkos::Schedule >( 1000000 ); -} - -TEST_F( qthread, team_double_reduce) { - TestReduceTeam< double , Kokkos::Qthread , Kokkos::Schedule >( 1000000 ); -} - - -TEST_F( qthread , atomics ) -{ - const int loop_count = 1e4 ; - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); - -#if defined( KOKKOS_ENABLE_ASM ) - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Qthread>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Qthread>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Qthread>(100,3) ) ); -#endif - -} - -TEST_F( qthread , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Qthread > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Qthread > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Qthread > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -} - -//---------------------------------------------------------------------------- - -TEST_F( qthread , view_aggregate ) -{ - TestViewAggregate< Kokkos::Qthread >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( qthread , scan ) -{ - TestScan< Kokkos::Qthread >::test_range( 1 , 1000 ); - TestScan< Kokkos::Qthread >( 1000000 ); - TestScan< Kokkos::Qthread >( 10000000 ); - Kokkos::Qthread::fence(); -} - -TEST_F( qthread, team_shared ) { - TestSharedTeam< Kokkos::Qthread , Kokkos::Schedule >(); -} - -TEST_F( qthread, shmem_size) { - TestShmemSize< Kokkos::Qthread >(); -} - -TEST_F( qthread , team_scan ) -{ - TestScanTeam< Kokkos::Qthread , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Qthread , Kokkos::Schedule >( 10000 ); -} - -#if 0 /* disable */ -TEST_F( qthread , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(4) ) ); -} -#endif - -//---------------------------------------------------------------------------- - -TEST_F( qthread , task_policy ) -{ - TestTaskScheduler::test_task_dep< Kokkos::Qthread >( 10 ); - for ( long i = 0 ; i < 25 ; ++i ) TestTaskScheduler::test_fib< Kokkos::Qthread >(i); - for ( long i = 0 ; i < 35 ; ++i ) TestTaskScheduler::test_fib2< Kokkos::Qthread >(i); -} - -TEST_F( qthread , task_team ) -{ - TestTaskScheduler::test_task_team< Kokkos::Qthread >(1000); -} - -//---------------------------------------------------------------------------- - -} // namespace test - diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp index e342e844c7..90411a57a0 100644 --- a/lib/kokkos/core/unit_test/TestRange.hpp +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,198 +45,204 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template< class ExecSpace, class ScheduleType > struct TestRange { + typedef int value_type; ///< typedef required for the parallel_reduce - typedef int value_type ; ///< typedef required for the parallel_reduce + typedef Kokkos::View< int*, ExecSpace > view_type; - typedef Kokkos::View view_type ; - - view_type m_flags ; + view_type m_flags; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; TestRange( const size_t N ) - : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags"), N ) + : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N ) {} static void test_for( const size_t N ) - { - TestRange functor(N); + { + TestRange functor( N ); - typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); + typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); - Kokkos::parallel_for( Kokkos::RangePolicy(0,N) , functor ); - Kokkos::parallel_for( Kokkos::RangePolicy(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), functor ); - Kokkos::deep_copy( host_flags , functor.m_flags ); + Kokkos::deep_copy( host_flags, functor.m_flags ); - size_t error_count = 0 ; - for ( size_t i = 0 ; i < N ; ++i ) { - if ( int(i) != host_flags(i) ) ++error_count ; - } - ASSERT_EQ( error_count , size_t(0) ); - - Kokkos::parallel_for( Kokkos::RangePolicy(0,N) , functor ); - Kokkos::parallel_for( std::string("TestKernelFor") , Kokkos::RangePolicy(0,N) , functor ); - - Kokkos::deep_copy( host_flags , functor.m_flags ); - - error_count = 0 ; - for ( size_t i = 0 ; i < N ; ++i ) { - if ( int(2*i) != host_flags(i) ) ++error_count ; - } - ASSERT_EQ( error_count , size_t(0) ); + size_t error_count = 0; + for ( size_t i = 0; i < N; ++i ) { + if ( int( i ) != host_flags( i ) ) ++error_count; } + ASSERT_EQ( error_count, size_t( 0 ) ); + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, ResetTag >( 0, N ), functor ); + Kokkos::parallel_for( std::string( "TestKernelFor" ), Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyResetTag >( 0, N ), functor ); + + Kokkos::deep_copy( host_flags, functor.m_flags ); + + error_count = 0; + for ( size_t i = 0; i < N; ++i ) { + if ( int( 2 * i ) != host_flags( i ) ) ++error_count; + } + ASSERT_EQ( error_count, size_t( 0 ) ); + } KOKKOS_INLINE_FUNCTION void operator()( const int i ) const - { m_flags(i) = i ; } + { m_flags( i ) = i; } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyInitTag & , const int i ) const - { if ( i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + void operator()( const VerifyInitTag &, const int i ) const + { + if ( i != m_flags( i ) ) { + printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); + } + } KOKKOS_INLINE_FUNCTION - void operator()( const ResetTag & , const int i ) const - { m_flags(i) = 2 * m_flags(i); } + void operator()( const ResetTag &, const int i ) const + { m_flags( i ) = 2 * m_flags( i ); } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyResetTag & , const int i ) const - { if ( 2 * i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + void operator()( const VerifyResetTag &, const int i ) const + { + if ( 2 * i != m_flags( i ) ) + { + printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); + } + } //---------------------------------------- struct OffsetTag {}; static void test_reduce( const size_t N ) - { - TestRange functor(N); - int total = 0 ; + { + TestRange functor( N ); + int total = 0; - Kokkos::parallel_for( Kokkos::RangePolicy(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); - Kokkos::parallel_reduce( "TestKernelReduce" , Kokkos::RangePolicy(0,N) , functor , total ); - // sum( 0 .. N-1 ) - ASSERT_EQ( size_t((N-1)*(N)/2) , size_t(total) ); + Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor, total ); + // sum( 0 .. N-1 ) + ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::RangePolicy(0,N) , functor , total ); - // sum( 1 .. N ) - ASSERT_EQ( size_t((N)*(N+1)/2) , size_t(total) ); - } + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor, total ); + // sum( 1 .. N ) + ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) ); + } KOKKOS_INLINE_FUNCTION - void operator()( const int i , value_type & update ) const - { update += m_flags(i); } + void operator()( const int i, value_type & update ) const + { update += m_flags( i ); } KOKKOS_INLINE_FUNCTION - void operator()( const OffsetTag & , const int i , value_type & update ) const - { update += 1 + m_flags(i); } + void operator()( const OffsetTag &, const int i, value_type & update ) const + { update += 1 + m_flags( i ); } //---------------------------------------- static void test_scan( const size_t N ) - { - TestRange functor(N); + { + TestRange functor( N ); - Kokkos::parallel_for( Kokkos::RangePolicy(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); - Kokkos::parallel_scan( "TestKernelScan" , Kokkos::RangePolicy(0,N) , functor ); - } + Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor ); + } KOKKOS_INLINE_FUNCTION - void operator()( const OffsetTag & , const int i , value_type & update , bool final ) const - { - update += m_flags(i); + void operator()( const OffsetTag &, const int i, value_type & update, bool final ) const + { + update += m_flags( i ); - if ( final ) { - if ( update != (i*(i+1))/2 ) { - printf("TestRange::test_scan error %d : %d != %d\n",i,(i*(i+1))/2,m_flags(i)); - } + if ( final ) { + if ( update != ( i * ( i + 1 ) ) / 2 ) { + printf( "TestRange::test_scan error %d : %d != %d\n", i, ( i * ( i + 1 ) ) / 2, m_flags( i ) ); } } + } - static void test_dynamic_policy( const size_t N ) { - - - typedef Kokkos::RangePolicy > policy_t; + static void test_dynamic_policy( const size_t N ) + { + typedef Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule > policy_t; { - Kokkos::View > count("Count",ExecSpace::concurrency()); - Kokkos::View a("A",N); + Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits > count( "Count", ExecSpace::concurrency() ); + Kokkos::View< int*, ExecSpace > a( "A", N ); - Kokkos::parallel_for( policy_t(0,N), - KOKKOS_LAMBDA (const typename policy_t::member_type& i) { - for(int k=0; k<(i(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { - lsum += ( a(i)!= (i( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); + }, error ); + ASSERT_EQ( error, 0 ); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast(4*ExecSpace::concurrency())) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; - for(int t=0; tmax) max = count(t); + for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { + if ( count( t ) < min ) min = count( t ); + if ( count( t ) > max ) max = count( t ); } - ASSERT_TRUE(min2) - // ASSERT_TRUE(2*min 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} } - } { - Kokkos::View > count("Count",ExecSpace::concurrency()); - Kokkos::View a("A",N); + Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits > count( "Count", ExecSpace::concurrency() ); + Kokkos::View< int*, ExecSpace> a( "A", N ); int sum = 0; - Kokkos::parallel_reduce( policy_t(0,N), - KOKKOS_LAMBDA (const typename policy_t::member_type& i, int& lsum) { - for(int k=0; k<(i(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { - lsum += ( a(i)!= (i( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); + }, error ); + ASSERT_EQ( error, 0 ); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast(4*ExecSpace::concurrency())) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; - for(int t=0; tmax) max = count(t); + for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { + if ( count( t ) < min ) min = count( t ); + if ( count( t ) > max ) max = count( t ); } - ASSERT_TRUE(min2) - // ASSERT_TRUE(2*min 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} } } - } }; -} /* namespace */ -} /* namespace Test */ - -/*--------------------------------------------------------------------------*/ +} // namespace +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index 645fc9e31b..7e77dadf62 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -48,24 +48,23 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class ReduceFunctor { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; struct value_type { - ScalarType value[3] ; + ScalarType value[3]; }; - const size_type nwork ; + const size_type nwork; - ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + ReduceFunctor( const size_type & arg_nwork ) + : nwork( arg_nwork ) {} ReduceFunctor( const ReduceFunctor & rhs ) : nwork( rhs.nwork ) {} @@ -74,66 +73,63 @@ public: KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { - dst.value[0] = 0 ; - dst.value[1] = 0 ; - dst.value[2] = 0 ; + dst.value[0] = 0; + dst.value[1] = 0; + dst.value[2] = 0; } */ KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst , + void join( volatile value_type & dst, const volatile value_type & src ) const { - dst.value[0] += src.value[0] ; - dst.value[1] += src.value[1] ; - dst.value[2] += src.value[2] ; + dst.value[0] += src.value[0]; + dst.value[1] += src.value[1]; + dst.value[2] += src.value[2]; } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , value_type & dst ) const + void operator()( size_type iwork, value_type & dst ) const { - dst.value[0] += 1 ; - dst.value[1] += iwork + 1 ; - dst.value[2] += nwork - iwork ; + dst.value[0] += 1; + dst.value[1] += iwork + 1; + dst.value[2] += nwork - iwork; } }; template< class DeviceType > -class ReduceFunctorFinal : public ReduceFunctor< long , DeviceType > { +class ReduceFunctorFinal : public ReduceFunctor< long, DeviceType > { public: - - typedef typename ReduceFunctor< long , DeviceType >::value_type value_type ; + typedef typename ReduceFunctor< long, DeviceType >::value_type value_type; ReduceFunctorFinal( const size_t n ) - : ReduceFunctor(n) - {} + : ReduceFunctor< long, DeviceType >( n ) {} KOKKOS_INLINE_FUNCTION void final( value_type & dst ) const { - dst.value[0] = - dst.value[0] ; - dst.value[1] = - dst.value[1] ; - dst.value[2] = - dst.value[2] ; + dst.value[0] = -dst.value[0]; + dst.value[1] = -dst.value[1]; + dst.value[2] = -dst.value[2]; } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class RuntimeReduceFunctor { public: // Required for functor: - typedef DeviceType execution_space ; - typedef ScalarType value_type[] ; - const unsigned value_count ; - + typedef DeviceType execution_space; + typedef ScalarType value_type[]; + const unsigned value_count; // Unit test details: - typedef typename execution_space::size_type size_type ; + typedef typename execution_space::size_type size_type; - const size_type nwork ; + const size_type nwork; - RuntimeReduceFunctor( const size_type arg_nwork , + RuntimeReduceFunctor( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) {} @@ -141,247 +137,251 @@ public: KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile ScalarType dst[] , + void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , ScalarType dst[] ) const + void operator()( size_type iwork, ScalarType dst[] ) const { - const size_type tmp[3] = { 1 , iwork + 1 , nwork - iwork }; + const size_type tmp[3] = { 1, iwork + 1, nwork - iwork }; - for ( size_type i = 0 ; i < value_count ; ++i ) { + for ( size_type i = 0; i < value_count; ++i ) { dst[i] += tmp[ i % 3 ]; } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class RuntimeReduceMinMax { public: // Required for functor: - typedef DeviceType execution_space ; - typedef ScalarType value_type[] ; - const unsigned value_count ; + typedef DeviceType execution_space; + typedef ScalarType value_type[]; + const unsigned value_count; // Unit test details: - typedef typename execution_space::size_type size_type ; + typedef typename execution_space::size_type size_type; - const size_type nwork ; - const ScalarType amin ; - const ScalarType amax ; + const size_type nwork; + const ScalarType amin; + const ScalarType amax; - RuntimeReduceMinMax( const size_type arg_nwork , + RuntimeReduceMinMax( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) - , amin( std::numeric_limits::min() ) - , amax( std::numeric_limits::max() ) + , amin( std::numeric_limits< ScalarType >::min() ) + , amax( std::numeric_limits< ScalarType >::max() ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) { - dst[i] = i % 2 ? amax : amin ; + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] = i % 2 ? amax : amin; } } KOKKOS_INLINE_FUNCTION - void join( volatile ScalarType dst[] , + void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) { + for ( unsigned i = 0; i < value_count; ++i ) { dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min : ( dst[i] > src[i] ? dst[i] : src[i] ); // max } } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , ScalarType dst[] ) const + void operator()( size_type iwork, ScalarType dst[] ) const { - const ScalarType tmp[2] = { ScalarType(iwork + 1) - , ScalarType(nwork - iwork) }; + const ScalarType tmp[2] = { ScalarType( iwork + 1 ) + , ScalarType( nwork - iwork ) }; - for ( size_type i = 0 ; i < value_count ; ++i ) { - dst[i] = i % 2 ? ( dst[i] < tmp[i%2] ? dst[i] : tmp[i%2] ) - : ( dst[i] > tmp[i%2] ? dst[i] : tmp[i%2] ); + for ( size_type i = 0; i < value_count; ++i ) { + dst[i] = i % 2 ? ( dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2] ) + : ( dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2] ); } } }; template< class DeviceType > -class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long , DeviceType > { +class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long, DeviceType > { public: + typedef RuntimeReduceFunctor< long, DeviceType > base_type; + typedef typename base_type::value_type value_type; + typedef long scalar_type; - typedef RuntimeReduceFunctor< long , DeviceType > base_type ; - typedef typename base_type::value_type value_type ; - typedef long scalar_type ; - - RuntimeReduceFunctorFinal( const size_t theNwork , const size_t count ) : base_type(theNwork,count) {} + RuntimeReduceFunctorFinal( const size_t theNwork, const size_t count ) + : base_type( theNwork, count ) {} KOKKOS_INLINE_FUNCTION void final( value_type dst ) const { - for ( unsigned i = 0 ; i < base_type::value_count ; ++i ) { - dst[i] = - dst[i] ; + for ( unsigned i = 0; i < base_type::value_count; ++i ) { + dst[i] = -dst[i]; } } }; + } // namespace Test namespace { -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduce( const size_type & nwork ) { - run_test(nwork); - run_test_final(nwork); + run_test( nwork ); + run_test_final( nwork ); } void run_test( const size_type & nwork ) { - typedef Test::ReduceFunctor< ScalarType , execution_space > functor_type ; - typedef typename functor_type::value_type value_type ; + typedef Test::ReduceFunctor< ScalarType, execution_space > functor_type; + typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i].value[j] ); } } } void run_test_final( const size_type & nwork ) { - typedef Test::ReduceFunctorFinal< execution_space > functor_type ; - typedef typename functor_type::value_type value_type ; + typedef Test::ReduceFunctorFinal< execution_space > functor_type; + typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , - result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, -result[i].value[j] ); } } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduceDynamic { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduceDynamic( const size_type nwork ) { - run_test_dynamic(nwork); - run_test_dynamic_minmax(nwork); - run_test_dynamic_final(nwork); + run_test_dynamic( nwork ); + run_test_dynamic_minmax( nwork ); + run_test_dynamic_final( nwork ); } void run_test_dynamic( const size_type nwork ) { - typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; - ScalarType result[ Repeat ][ Count ] ; + ScalarType result[ Repeat ][ Count ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } void run_test_dynamic_minmax( const size_type nwork ) { - typedef Test::RuntimeReduceMinMax< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceMinMax< ScalarType, execution_space > functor_type; enum { Count = 2 }; enum { Repeat = 100 }; - ScalarType result[ Repeat ][ Count ] ; + ScalarType result[ Repeat ][ Count ]; - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { if ( nwork == 0 ) { - ScalarType amin( std::numeric_limits::min() ); - ScalarType amax( std::numeric_limits::max() ); - const ScalarType correct = (j%2) ? amax : amin; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); - } else { - const unsigned long correct = j % 2 ? 1 : nwork ; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); + ScalarType amin( std::numeric_limits< ScalarType >::min() ); + ScalarType amax( std::numeric_limits< ScalarType >::max() ); + const ScalarType correct = ( j % 2 ) ? amax : amin; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); + } + else { + const unsigned long correct = j % 2 ? 1 : nwork; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } @@ -389,169 +389,172 @@ public: void run_test_dynamic_final( const size_type nwork ) { - typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type ; + typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; - typename functor_type::scalar_type result[ Repeat ][ Count ] ; + typename functor_type::scalar_type result[ Repeat ][ Count ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "TestKernelReduce" , nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "TestKernelReduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , - result[i][j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, -result[i][j] ); } } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduceDynamicView { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduceDynamicView( const size_type nwork ) { - run_test_dynamic_view(nwork); + run_test_dynamic_view( nwork ); } void run_test_dynamic_view( const size_type nwork ) { - typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; - typedef Kokkos::View< ScalarType* , DeviceType > result_type ; - typedef typename result_type::HostMirror result_host_type ; + typedef Kokkos::View< ScalarType*, DeviceType > result_type; + typedef typename result_type::HostMirror result_host_type; - const unsigned CountLimit = 23 ; + const unsigned CountLimit = 23; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned count = 0 ; count < CountLimit ; ++count ) { + for ( unsigned count = 0; count < CountLimit; ++count ) { - result_type result("result",count); + result_type result( "result", count ); result_host_type host_result = Kokkos::create_mirror( result ); // Test result to host pointer: - std::string str("TestKernelReduce"); - if(count%2==0) - Kokkos::parallel_reduce( nw , functor_type(nw,count) , host_result.ptr_on_device() ); - else - Kokkos::parallel_reduce( str , nw , functor_type(nw,count) , host_result.ptr_on_device() ); + std::string str( "TestKernelReduce" ); + if ( count % 2 == 0 ) { + Kokkos::parallel_reduce( nw, functor_type( nw, count ), host_result.ptr_on_device() ); + } + else { + Kokkos::parallel_reduce( str, nw, functor_type( nw, count ), host_result.ptr_on_device() ); + } - for ( unsigned j = 0 ; j < count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( host_result(j), (ScalarType) correct ); - host_result(j) = 0 ; + for ( unsigned j = 0; j < count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( host_result( j ), (ScalarType) correct ); + host_result( j ) = 0; } } } }; -} + +} // namespace // Computes y^T*A*x -// (modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) +// ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; - //------------------------------------ - - TestTripleNestedReduce( const size_type & nrows , const size_type & ncols - , const size_type & team_size , const size_type & vector_length ) + TestTripleNestedReduce( const size_type & nrows, const size_type & ncols + , const size_type & team_size, const size_type & vector_length ) { - run_test( nrows , ncols , team_size, vector_length ); + run_test( nrows, ncols, team_size, vector_length ); } - void run_test( const size_type & nrows , const size_type & ncols + void run_test( const size_type & nrows, const size_type & ncols , const size_type & team_size, const size_type & vector_length ) { //typedef Kokkos::LayoutLeft Layout; typedef Kokkos::LayoutRight Layout; - typedef Kokkos::View ViewVector; - typedef Kokkos::View ViewMatrix; - ViewVector y( "y" , nrows ); - ViewVector x( "x" , ncols ); - ViewMatrix A( "A" , nrows , ncols ); + typedef Kokkos::View< ScalarType*, DeviceType > ViewVector; + typedef Kokkos::View< ScalarType**, Layout, DeviceType > ViewMatrix; + + ViewVector y( "y", nrows ); + ViewVector x( "x", ncols ); + ViewMatrix A( "A", nrows, ncols ); typedef Kokkos::RangePolicy range_policy; - // Initialize y vector - Kokkos::parallel_for( range_policy( 0 , nrows ) , KOKKOS_LAMBDA( const int i ) { y( i ) = 1; } ); + // Initialize y vector. + Kokkos::parallel_for( range_policy( 0, nrows ), KOKKOS_LAMBDA ( const int i ) { y( i ) = 1; } ); - // Initialize x vector - Kokkos::parallel_for( range_policy( 0 , ncols ) , KOKKOS_LAMBDA( const int i ) { x( i ) = 1; } ); + // Initialize x vector. + Kokkos::parallel_for( range_policy( 0, ncols ), KOKKOS_LAMBDA ( const int i ) { x( i ) = 1; } ); - typedef Kokkos::TeamPolicy team_policy; - typedef typename Kokkos::TeamPolicy::member_type member_type; + typedef Kokkos::TeamPolicy< DeviceType > team_policy; + typedef typename Kokkos::TeamPolicy< DeviceType >::member_type member_type; - // Initialize A matrix, note 2D indexing computation - Kokkos::parallel_for( team_policy( nrows , Kokkos::AUTO ) , KOKKOS_LAMBDA( const member_type& teamMember ) { + // Initialize A matrix, note 2D indexing computation. + Kokkos::parallel_for( team_policy( nrows, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type & teamMember ) { const int j = teamMember.league_rank(); - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , ncols ) , [&] ( const int i ) { - A( j , i ) = 1; + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, ncols ), [&] ( const int i ) { + A( j, i ) = 1; } ); } ); - // Three level parallelism kernel to force caching of vector x + // Three level parallelism kernel to force caching of vector x. ScalarType result = 0.0; int chunk_size = 128; - Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) { + Kokkos::parallel_reduce( team_policy( nrows / chunk_size, team_size, vector_length ), + KOKKOS_LAMBDA ( const member_type & teamMember, double & update ) { const int row_start = teamMember.league_rank() * chunk_size; const int row_end = row_start + chunk_size; - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , row_start , row_end ) , [&] ( const int i ) { + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, row_start, row_end ), [&] ( const int i ) { ScalarType sum_i = 0.0; - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember , ncols ) , [&] ( const int j , ScalarType &innerUpdate ) { - innerUpdate += A( i , j ) * x( j ); - } , sum_i ); - Kokkos::single( Kokkos::PerThread( teamMember ) , [&] () { + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember, ncols ), [&] ( const int j, ScalarType &innerUpdate ) { + innerUpdate += A( i, j ) * x( j ); + }, sum_i ); + Kokkos::single( Kokkos::PerThread( teamMember ), [&] () { update += y( i ) * sum_i; } ); } ); - } , result ); + }, result ); - const ScalarType solution= ( ScalarType ) nrows * ( ScalarType ) ncols; - ASSERT_EQ( solution , result ); + const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; + ASSERT_EQ( solution, result ); } }; -#else /* #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) */ +#else // #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; - TestTripleNestedReduce( const size_type & , const size_type - , const size_type & , const size_type ) - { } + TestTripleNestedReduce( const size_type &, const size_type + , const size_type &, const size_type ) + {} }; #endif @@ -559,38 +562,38 @@ public: //-------------------------------------------------------------------------- namespace Test { + namespace ReduceCombinatorical { -template +template< class Scalar, class Space = Kokkos::HostSpace > struct AddPlus { public: - //Required + // Required. typedef AddPlus reducer_type; typedef Scalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View< value_type, Space, Kokkos::MemoryTraits > result_view_type; private: result_view_type result; public: + AddPlus( value_type & result_ ) : result( &result_ ) {} - AddPlus(value_type& result_):result(&result_) {} - - //Required + // Required. KOKKOS_INLINE_FUNCTION - void join(value_type& dest, const value_type& src) const { + void join( value_type & dest, const value_type & src ) const { dest += src + 1; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { + void join( volatile value_type & dest, const volatile value_type & src ) const { dest += src + 1; } - //Optional + // Optional. KOKKOS_INLINE_FUNCTION - void init( value_type& val) const { + void init( value_type & val ) const { val = value_type(); } @@ -599,624 +602,651 @@ public: } }; -template +template< int ISTEAM > struct FunctorScalar; template<> -struct FunctorScalar<0>{ - FunctorScalar(Kokkos::View r):result(r) {} - Kokkos::View result; +struct FunctorScalar< 0 > { + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i,double& update) const { - update+=i; + void operator()( const int & i, double & update ) const { + update += i; } }; template<> -struct FunctorScalar<1>{ - FunctorScalar(Kokkos::View r):result(r) {} - Kokkos::View result; - +struct FunctorScalar< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } }; -template +template< int ISTEAM > struct FunctorScalarInit; template<> -struct FunctorScalarInit<0> { - FunctorScalarInit(Kokkos::View r):result(r) {} +struct FunctorScalarInit< 0 > { + Kokkos::View< double > result; - Kokkos::View result; + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarInit<1> { - FunctorScalarInit(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; -template +template< int ISTEAM > struct FunctorScalarFinal; - template<> -struct FunctorScalarFinal<0> { - FunctorScalarFinal(Kokkos::View r):result(r) {} - +struct FunctorScalarFinal< 0 > { Kokkos::View result; + + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; template<> -struct FunctorScalarFinal<1> { - FunctorScalarFinal(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarFinal< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + Kokkos::View< double > result; + + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team, double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } + KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; -template +template< int ISTEAM > struct FunctorScalarJoin; template<> -struct FunctorScalarJoin<0> { - FunctorScalarJoin(Kokkos::View r):result(r) {} - +struct FunctorScalarJoin< 0 > { Kokkos::View result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; template<> -struct FunctorScalarJoin<1> { - FunctorScalarJoin(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarJoin< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; -template +template< int ISTEAM > struct FunctorScalarJoinFinal; template<> -struct FunctorScalarJoinFinal<0> { - FunctorScalarJoinFinal(Kokkos::View r):result(r) {} +struct FunctorScalarJoinFinal< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} - Kokkos::View result; KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; template<> -struct FunctorScalarJoinFinal<1> { - FunctorScalarJoinFinal(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarJoinFinal< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; -template +template< int ISTEAM > struct FunctorScalarJoinInit; template<> -struct FunctorScalarJoinInit<0> { - FunctorScalarJoinInit(Kokkos::View r):result(r) {} +struct FunctorScalarJoinInit< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} - Kokkos::View result; KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarJoinInit<1> { - FunctorScalarJoinInit(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarJoinInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; -template +template< int ISTEAM > struct FunctorScalarJoinFinalInit; template<> -struct FunctorScalarJoinFinalInit<0> { - FunctorScalarJoinFinalInit(Kokkos::View r):result(r) {} - +struct FunctorScalarJoinFinalInit< 0 > { Kokkos::View result; + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarJoinFinalInit<1> { - FunctorScalarJoinFinalInit(Kokkos::View r):result(r) {} - - Kokkos::View result; - +struct FunctorScalarJoinFinalInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; + struct Functor1 { KOKKOS_INLINE_FUNCTION - void operator() (const int& i,double& update) const { - update+=i; + void operator()( const int & i, double & update ) const { + update += i; } }; struct Functor2 { typedef double value_type[]; + const unsigned value_count; - Functor2(unsigned n):value_count(n){} + Functor2( unsigned n ) : value_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator() (const unsigned& i,double update[]) const { - for(unsigned j=0;j +template< class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReduceCombinatoricalInstantiation { - template - static void CallParallelReduce(Args... args) { - Kokkos::parallel_reduce(args...); + template< class ... Args > + static void CallParallelReduce( Args... args ) { + Kokkos::parallel_reduce( args... ); } - template - static void AddReturnArgument(Args... args) { - Kokkos::View result_view("ResultView"); - double expected_result = 1000.0*999.0/2.0; + template< class ... Args > + static void AddReturnArgument( Args... args ) { + Kokkos::View< double, Kokkos::HostSpace > result_view( "ResultView" ); + double expected_result = 1000.0 * 999.0 / 2.0; double value = 0; - Kokkos::parallel_reduce(args...,value); - ASSERT_EQ(expected_result,value); + Kokkos::parallel_reduce( args..., value ); + ASSERT_EQ( expected_result, value ); result_view() = 0; - CallParallelReduce(args...,result_view); - ASSERT_EQ(expected_result,result_view()); + CallParallelReduce( args..., result_view ); + ASSERT_EQ( expected_result, result_view() ); value = 0; - CallParallelReduce(args...,Kokkos::View>(&value)); - ASSERT_EQ(expected_result,value); + CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits >( &value ) ); + ASSERT_EQ( expected_result, value ); result_view() = 0; - const Kokkos::View> result_view_const_um = result_view; - CallParallelReduce(args...,result_view_const_um); - ASSERT_EQ(expected_result,result_view_const_um()); + const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits > result_view_const_um = result_view; + CallParallelReduce( args..., result_view_const_um ); + ASSERT_EQ( expected_result, result_view_const_um() ); value = 0; - CallParallelReduce(args...,Test::ReduceCombinatorical::AddPlus(value)); - if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result 1) || (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<=value); - else - ASSERT_EQ(expected_result,value); + CallParallelReduce( args..., Test::ReduceCombinatorical::AddPlus< double >( value ) ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } value = 0; - Test::ReduceCombinatorical::AddPlus add(value); - CallParallelReduce(args...,add); - if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result 1) || (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<=value); - else - ASSERT_EQ(expected_result,value); + Test::ReduceCombinatorical::AddPlus< double > add( value ); + CallParallelReduce( args..., add ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } } - - template - static void AddLambdaRange(void*,Args... args) { - AddReturnArgument(args..., KOKKOS_LAMBDA (const int&i , double& lsum) { + template< class ... Args > + static void AddLambdaRange( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const int & i, double & lsum ) { lsum += i; }); } - template - static void AddLambdaTeam(void*,Args... args) { - AddReturnArgument(args..., KOKKOS_LAMBDA (const Kokkos::TeamPolicy<>::member_type& team, double& update) { - update+=1.0/team.team_size()*team.league_rank(); + template< class ... Args > + static void AddLambdaTeam( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type & team, double & update ) { + update += 1.0 / team.team_size() * team.league_rank(); }); } - template - static void AddLambdaRange(Kokkos::InvalidType,Args... args) { - } + template< class ... Args > + static void AddLambdaRange( Kokkos::InvalidType, Args... args ) {} - template - static void AddLambdaTeam(Kokkos::InvalidType,Args... args) { - } + template< class ... Args > + static void AddLambdaTeam( Kokkos::InvalidType, Args... args ) {} - template - static void AddFunctor(Args... args) { - Kokkos::View result_view("FunctorView"); - auto h_r = Kokkos::create_mirror_view(result_view); - Test::ReduceCombinatorical::FunctorScalar functor(result_view); - double expected_result = 1000.0*999.0/2.0; + template< int ISTEAM, class ... Args > + static void AddFunctor( Args... args ) { + Kokkos::View< double > result_view( "FunctorView" ); + auto h_r = Kokkos::create_mirror_view( result_view ); + Test::ReduceCombinatorical::FunctorScalar< ISTEAM > functor( result_view ); + double expected_result = 1000.0 * 999.0 / 2.0; - AddReturnArgument(args..., functor); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalar(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarInit(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoin(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoinInit(result_view)); + AddReturnArgument( args..., functor ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalar< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarInit< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoin< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoinInit< ISTEAM >( result_view ) ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarFinal(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); } - template - static void AddFunctorLambdaRange(Args... args) { - AddFunctor<0,Args...>(args...); - #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaRange(typename std::conditional::value,void*,Kokkos::InvalidType>::type(), args...); - #endif + template< class ... Args > + static void AddFunctorLambdaRange( Args... args ) { + AddFunctor< 0, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaRange( typename std::conditional< std::is_same::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif } - template - static void AddFunctorLambdaTeam(Args... args) { - AddFunctor<1,Args...>(args...); - #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaTeam(typename std::conditional::value,void*,Kokkos::InvalidType>::type(), args...); - #endif + template< class ... Args > + static void AddFunctorLambdaTeam( Args... args ) { + AddFunctor< 1, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaTeam( typename std::conditional< std::is_same::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif } - template - static void AddPolicy(Args... args) { + template< class ... Args > + static void AddPolicy( Args... args ) { int N = 1000; - Kokkos::RangePolicy policy(0,N); + Kokkos::RangePolicy< ExecSpace > policy( 0, N ); - AddFunctorLambdaRange(args...,1000); - AddFunctorLambdaRange(args...,N); - AddFunctorLambdaRange(args...,policy); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy(0,N)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy >(0,N)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy >(0,N).set_chunk_size(10)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy >(0,N).set_chunk_size(10)); + AddFunctorLambdaRange( args..., 1000 ); + AddFunctorLambdaRange( args..., N ); + AddFunctorLambdaRange( args..., policy ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ).set_chunk_size( 10 ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ).set_chunk_size( 10 ) ); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy(N,Kokkos::AUTO)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy >(N,Kokkos::AUTO)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy >(N,Kokkos::AUTO).set_chunk_size(10)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy >(N,Kokkos::AUTO).set_chunk_size(10)); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); } - static void execute_a() { AddPolicy(); } static void execute_b() { - std::string s("Std::String"); - AddPolicy(s.c_str()); - AddPolicy("Char Constant"); + std::string s( "Std::String" ); + AddPolicy( s.c_str() ); + AddPolicy( "Char Constant" ); } static void execute_c() { - std::string s("Std::String"); - AddPolicy(s); + std::string s( "Std::String" ); + AddPolicy( s ); } }; -template +template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReducers { - struct SumFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value += values(i); + void operator()( const int & i, Scalar & value ) const { + value += values( i ); } }; struct ProdFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value *= values(i); + void operator()( const int & i, Scalar & value ) const { + value *= values( i ); } }; struct MinFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - if(values(i) < value) - value = values(i); + void operator()( const int & i, Scalar & value ) const { + if ( values( i ) < value ) value = values( i ); } }; struct MaxFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - if(values(i) > value) - value = values(i); + void operator()( const int & i, Scalar & value ) const { + if ( values( i ) > value ) value = values( i ); } }; struct MinLocFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MinLoc::value_type& value) const { - if(values(i) < value.val) { - value.val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) < value.val ) { + value.val = values( i ); value.loc = i; } } }; struct MaxLocFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MaxLoc::value_type& value) const { - if(values(i) > value.val) { - value.val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.val ) { + value.val = values( i ); value.loc = i; } } }; struct MinMaxLocFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MinMaxLoc::value_type& value) const { - if(values(i) > value.max_val) { - value.max_val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.max_val ) { + value.max_val = values( i ); value.max_loc = i; } - if(values(i) < value.min_val) { - value.min_val = values(i); + + if ( values( i ) < value.min_val ) { + value.min_val = values( i ); value.min_loc = i; } } }; struct BAndFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value & values(i); + void operator()( const int & i, Scalar & value ) const { + value = value & values( i ); } }; struct BOrFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value | values(i); + void operator()( const int & i, Scalar & value ) const { + value = value | values( i ); } }; struct BXorFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value ^ values(i); + void operator()( const int & i, Scalar & value ) const { + value = value ^ values( i ); } }; struct LAndFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value && values(i); + void operator()( const int & i, Scalar & value ) const { + value = value && values( i ); } }; struct LOrFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value || values(i); + void operator()( const int & i, Scalar & value ) const { + value = value || values( i ); } }; struct LXorFunctor { - Kokkos::View values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value ? (!values(i)) : values(i); + void operator()( const int & i, Scalar & value ) const { + value = value ? ( !values( i ) ) : values( i ); } }; - static void test_sum(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_sum( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_sum = 0; - for(int i=0; i reducer_scalar(sum_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(sum_scalar,reference_sum); + Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( sum_scalar, reference_sum ); + Scalar sum_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(sum_scalar_view,reference_sum); + ASSERT_EQ( sum_scalar_view, reference_sum ); } + { Scalar sum_scalar_init = init; - Kokkos::Experimental::Sum reducer_scalar_init(sum_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(sum_scalar_init,reference_sum); + Kokkos::Experimental::Sum< Scalar > reducer_scalar_init( sum_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( sum_scalar_init, reference_sum ); + Scalar sum_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(sum_scalar_init_view,reference_sum); + ASSERT_EQ( sum_scalar_init_view, reference_sum ); } + { - Kokkos::View sum_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace> sum_view( "View" ); sum_view() = init; - Kokkos::Experimental::Sum reducer_view(sum_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::Sum< Scalar > reducer_view( sum_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar sum_view_scalar = sum_view(); - ASSERT_EQ(sum_view_scalar,reference_sum); + ASSERT_EQ( sum_view_scalar, reference_sum ); + Scalar sum_view_view = reducer_view.result_view()(); - ASSERT_EQ(sum_view_view,reference_sum); + ASSERT_EQ( sum_view_view, reference_sum ); } + { - Kokkos::View sum_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > sum_view_init( "View" ); sum_view_init() = init; - Kokkos::Experimental::Sum reducer_view_init(sum_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::Experimental::Sum< Scalar > reducer_view_init( sum_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar sum_view_init_scalar = sum_view_init(); - ASSERT_EQ(sum_view_init_scalar,reference_sum); + ASSERT_EQ( sum_view_init_scalar, reference_sum ); + Scalar sum_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(sum_view_init_view,reference_sum); + ASSERT_EQ( sum_view_init_view, reference_sum ); } } - static void test_prod(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_prod( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_prod = 1; - for(int i=0; i::value) + if ( std::is_arithmetic< Scalar >::value ) { Scalar prod_scalar = init; - Kokkos::Experimental::Prod reducer_scalar(prod_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(prod_scalar,reference_prod); + Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( prod_scalar, reference_prod ); + Scalar prod_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(prod_scalar_view,reference_prod); - } - { - Scalar prod_scalar_init = init; - Kokkos::Experimental::Prod reducer_scalar_init(prod_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(prod_scalar_init,reference_prod); - Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(prod_scalar_init_view,reference_prod); + ASSERT_EQ( prod_scalar_view, reference_prod ); } - if(std::is_arithmetic::value) { - Kokkos::View prod_view("View"); - prod_view() = init; - Kokkos::Experimental::Prod reducer_view(prod_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); - Scalar prod_view_scalar = prod_view(); - ASSERT_EQ(prod_view_scalar,reference_prod); - Scalar prod_view_view = reducer_view.result_view()(); - ASSERT_EQ(prod_view_view,reference_prod); + Scalar prod_scalar_init = init; + Kokkos::Experimental::Prod< Scalar > reducer_scalar_init( prod_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( prod_scalar_init, reference_prod ); + + Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); + ASSERT_EQ( prod_scalar_init_view, reference_prod ); } + + if ( std::is_arithmetic< Scalar >::value ) { - Kokkos::View prod_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > prod_view( "View" ); + prod_view() = init; + Kokkos::Experimental::Prod< Scalar > reducer_view( prod_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + + Scalar prod_view_scalar = prod_view(); + ASSERT_EQ( prod_view_scalar, reference_prod ); + + Scalar prod_view_view = reducer_view.result_view()(); + ASSERT_EQ( prod_view_view, reference_prod ); + } + + { + Kokkos::View< Scalar, Kokkos::HostSpace > prod_view_init( "View" ); prod_view_init() = init; - Kokkos::Experimental::Prod reducer_view_init(prod_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::Experimental::Prod< Scalar > reducer_view_init( prod_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar prod_view_init_scalar = prod_view_init(); - ASSERT_EQ(prod_view_init_scalar,reference_prod); + ASSERT_EQ( prod_view_init_scalar, reference_prod ); + Scalar prod_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(prod_view_init_view,reference_prod); + ASSERT_EQ( prod_view_init_view, reference_prod ); } } - static void test_min(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_min = std::numeric_limits::max(); - for(int i=0; i values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_min = std::numeric_limits< Scalar >::max(); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); + + if ( h_values( i ) < reference_min ) reference_min = h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MinFunctor f; f.values = values; - Scalar init = std::numeric_limits::max(); + Scalar init = std::numeric_limits< Scalar >::max(); { Scalar min_scalar = init; - Kokkos::Experimental::Min reducer_scalar(min_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(min_scalar,reference_min); + Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( min_scalar, reference_min ); + Scalar min_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(min_scalar_view,reference_min); + ASSERT_EQ( min_scalar_view, reference_min ); } + { Scalar min_scalar_init = init; - Kokkos::Experimental::Min reducer_scalar_init(min_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(min_scalar_init,reference_min); + Kokkos::Experimental::Min< Scalar > reducer_scalar_init( min_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( min_scalar_init, reference_min ); + Scalar min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(min_scalar_init_view,reference_min); + ASSERT_EQ( min_scalar_init_view, reference_min ); } + { - Kokkos::View min_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > min_view( "View" ); min_view() = init; - Kokkos::Experimental::Min reducer_view(min_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::Min< Scalar > reducer_view( min_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar min_view_scalar = min_view(); - ASSERT_EQ(min_view_scalar,reference_min); + ASSERT_EQ( min_view_scalar, reference_min ); + Scalar min_view_view = reducer_view.result_view()(); - ASSERT_EQ(min_view_view,reference_min); + ASSERT_EQ( min_view_view, reference_min ); } + { - Kokkos::View min_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > min_view_init( "View" ); min_view_init() = init; - Kokkos::Experimental::Min reducer_view_init(min_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::Experimental::Min< Scalar > reducer_view_init( min_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar min_view_init_scalar = min_view_init(); - ASSERT_EQ(min_view_init_scalar,reference_min); + ASSERT_EQ( min_view_init_scalar, reference_min ); + Scalar min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(min_view_init_view,reference_min); + ASSERT_EQ( min_view_init_view, reference_min ); } } - static void test_max(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits::min(); - for(int i=0; ireference_max) - reference_max = h_values(i); + static void test_max( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); + + if ( h_values( i ) > reference_max ) reference_max = h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MaxFunctor f; f.values = values; - Scalar init = std::numeric_limits::min(); + Scalar init = std::numeric_limits< Scalar >::min(); { Scalar max_scalar = init; - Kokkos::Experimental::Max reducer_scalar(max_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(max_scalar,reference_max); + Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( max_scalar, reference_max ); + Scalar max_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(max_scalar_view,reference_max); + ASSERT_EQ( max_scalar_view, reference_max ); } + { Scalar max_scalar_init = init; - Kokkos::Experimental::Max reducer_scalar_init(max_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(max_scalar_init,reference_max); + Kokkos::Experimental::Max< Scalar > reducer_scalar_init( max_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( max_scalar_init, reference_max ); + Scalar max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(max_scalar_init_view,reference_max); + ASSERT_EQ( max_scalar_init_view, reference_max ); } + { - Kokkos::View max_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > max_view( "View" ); max_view() = init; - Kokkos::Experimental::Max reducer_view(max_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::Max< Scalar > reducer_view( max_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar max_view_scalar = max_view(); - ASSERT_EQ(max_view_scalar,reference_max); + ASSERT_EQ( max_view_scalar, reference_max ); + Scalar max_view_view = reducer_view.result_view()(); - ASSERT_EQ(max_view_view,reference_max); + ASSERT_EQ( max_view_view, reference_max ); } + { - Kokkos::View max_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > max_view_init( "View" ); max_view_init() = init; - Kokkos::Experimental::Max reducer_view_init(max_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::Experimental::Max< Scalar > reducer_view_init( max_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar max_view_init_scalar = max_view_init(); - ASSERT_EQ(max_view_init_scalar,reference_max); + ASSERT_EQ( max_view_init_scalar, reference_max ); + Scalar max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(max_view_init_view,reference_max); + ASSERT_EQ( max_view_init_view, reference_max ); } } - static void test_minloc(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_min = std::numeric_limits::max(); + static void test_minloc( int N ) { + typedef typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_loc = -1; - for(int i=0; i::epsilon(); + } + else if ( h_values( i ) == reference_min ) { + // Make min unique. + h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MinLocFunctor f; - typedef typename Kokkos::Experimental::MinLoc::value_type value_type; f.values = values; - Scalar init = std::numeric_limits::max(); - + Scalar init = std::numeric_limits< Scalar >::max(); { value_type min_scalar; - Kokkos::Experimental::MinLoc reducer_scalar(min_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(min_scalar.val,reference_min); - ASSERT_EQ(min_scalar.loc,reference_loc); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( min_scalar.val, reference_min ); + ASSERT_EQ( min_scalar.loc, reference_loc ); + value_type min_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(min_scalar_view.val,reference_min); - ASSERT_EQ(min_scalar_view.loc,reference_loc); + ASSERT_EQ( min_scalar_view.val, reference_min ); + ASSERT_EQ( min_scalar_view.loc, reference_loc ); } + { value_type min_scalar_init; - Kokkos::Experimental::MinLoc reducer_scalar_init(min_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(min_scalar_init.val,reference_min); - ASSERT_EQ(min_scalar_init.loc,reference_loc); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar_init( min_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( min_scalar_init.val, reference_min ); + ASSERT_EQ( min_scalar_init.loc, reference_loc ); + value_type min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(min_scalar_init_view.val,reference_min); - ASSERT_EQ(min_scalar_init_view.loc,reference_loc); + ASSERT_EQ( min_scalar_init_view.val, reference_min ); + ASSERT_EQ( min_scalar_init_view.loc, reference_loc ); } + { - Kokkos::View min_view("View"); - Kokkos::Experimental::MinLoc reducer_view(min_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_view( min_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type min_view_scalar = min_view(); - ASSERT_EQ(min_view_scalar.val,reference_min); - ASSERT_EQ(min_view_scalar.loc,reference_loc); + ASSERT_EQ( min_view_scalar.val, reference_min ); + ASSERT_EQ( min_view_scalar.loc, reference_loc ); + value_type min_view_view = reducer_view.result_view()(); - ASSERT_EQ(min_view_view.val,reference_min); - ASSERT_EQ(min_view_view.loc,reference_loc); + ASSERT_EQ( min_view_view.val, reference_min ); + ASSERT_EQ( min_view_view.loc, reference_loc ); } + { - Kokkos::View min_view_init("View"); - Kokkos::Experimental::MinLoc reducer_view_init(min_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > min_view_init( "View" ); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_view_init( min_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type min_view_init_scalar = min_view_init(); - ASSERT_EQ(min_view_init_scalar.val,reference_min); - ASSERT_EQ(min_view_init_scalar.loc,reference_loc); + ASSERT_EQ( min_view_init_scalar.val, reference_min ); + ASSERT_EQ( min_view_init_scalar.loc, reference_loc ); + value_type min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(min_view_init_view.val,reference_min); - ASSERT_EQ(min_view_init_view.loc,reference_loc); + ASSERT_EQ( min_view_init_view.val, reference_min ); + ASSERT_EQ( min_view_init_view.loc, reference_loc ); } } - static void test_maxloc(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits::min(); + static void test_maxloc( int N ) { + typedef typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); int reference_loc = -1; - for(int i=0; ireference_max) { - reference_max = h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); + + if ( h_values( i ) > reference_max ) { + reference_max = h_values( i ); reference_loc = i; - } else if (h_values(i) == reference_max) { - // make max unique - h_values(i) -= std::numeric_limits::epsilon(); + } + else if ( h_values( i ) == reference_max ) { + // Make max unique. + h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MaxLocFunctor f; - typedef typename Kokkos::Experimental::MaxLoc::value_type value_type; f.values = values; - Scalar init = std::numeric_limits::min(); - + Scalar init = std::numeric_limits< Scalar >::min(); { value_type max_scalar; - Kokkos::Experimental::MaxLoc reducer_scalar(max_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(max_scalar.val,reference_max); - ASSERT_EQ(max_scalar.loc,reference_loc); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( max_scalar.val, reference_max ); + ASSERT_EQ( max_scalar.loc, reference_loc ); + value_type max_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(max_scalar_view.val,reference_max); - ASSERT_EQ(max_scalar_view.loc,reference_loc); + ASSERT_EQ( max_scalar_view.val, reference_max ); + ASSERT_EQ( max_scalar_view.loc, reference_loc ); } + { value_type max_scalar_init; - Kokkos::Experimental::MaxLoc reducer_scalar_init(max_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(max_scalar_init.val,reference_max); - ASSERT_EQ(max_scalar_init.loc,reference_loc); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar_init( max_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( max_scalar_init.val, reference_max ); + ASSERT_EQ( max_scalar_init.loc, reference_loc ); + value_type max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(max_scalar_init_view.val,reference_max); - ASSERT_EQ(max_scalar_init_view.loc,reference_loc); + ASSERT_EQ( max_scalar_init_view.val, reference_max ); + ASSERT_EQ( max_scalar_init_view.loc, reference_loc ); } + { - Kokkos::View max_view("View"); - Kokkos::Experimental::MaxLoc reducer_view(max_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view( max_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type max_view_scalar = max_view(); - ASSERT_EQ(max_view_scalar.val,reference_max); - ASSERT_EQ(max_view_scalar.loc,reference_loc); + ASSERT_EQ( max_view_scalar.val, reference_max ); + ASSERT_EQ( max_view_scalar.loc, reference_loc ); + value_type max_view_view = reducer_view.result_view()(); - ASSERT_EQ(max_view_view.val,reference_max); - ASSERT_EQ(max_view_view.loc,reference_loc); + ASSERT_EQ( max_view_view.val, reference_max ); + ASSERT_EQ( max_view_view.loc, reference_loc ); } + { - Kokkos::View max_view_init("View"); - Kokkos::Experimental::MaxLoc reducer_view_init(max_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > max_view_init( "View" ); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view_init( max_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type max_view_init_scalar = max_view_init(); - ASSERT_EQ(max_view_init_scalar.val,reference_max); - ASSERT_EQ(max_view_init_scalar.loc,reference_loc); + ASSERT_EQ( max_view_init_scalar.val, reference_max ); + ASSERT_EQ( max_view_init_scalar.loc, reference_loc ); + value_type max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(max_view_init_view.val,reference_max); - ASSERT_EQ(max_view_init_view.loc,reference_loc); + ASSERT_EQ( max_view_init_view.val, reference_max ); + ASSERT_EQ( max_view_init_view.loc, reference_loc ); } } - static void test_minmaxloc(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits::min(); - Scalar reference_min = std::numeric_limits::max(); + static void test_minmaxloc( int N ) { + typedef typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); + Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_minloc = -1; int reference_maxloc = -1; - for(int i=0; ireference_max) { - reference_max = h_values(i); + + for ( int i = 0; i < N; i++ ) { + if ( h_values( i ) > reference_max ) { + reference_max = h_values( i ); reference_maxloc = i; - } else if (h_values(i) == reference_max) { - // make max unique - h_values(i) -= std::numeric_limits::epsilon(); + } + else if ( h_values( i ) == reference_max ) { + // Make max unique. + h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } - for(int i=0; i::epsilon(); + } + else if ( h_values( i ) == reference_min ) { + // Make min unique. + h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + + Kokkos::deep_copy( values, h_values ); MinMaxLocFunctor f; - typedef typename Kokkos::Experimental::MinMaxLoc::value_type value_type; f.values = values; - Scalar init_min = std::numeric_limits::max(); - Scalar init_max = std::numeric_limits::min(); - + Scalar init_min = std::numeric_limits< Scalar >::max(); + Scalar init_max = std::numeric_limits< Scalar >::min(); { value_type minmax_scalar; - Kokkos::Experimental::MinMaxLoc reducer_scalar(minmax_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(minmax_scalar.min_val,reference_min); - for(int i=0; i reducer_scalar( minmax_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( minmax_scalar.min_val, reference_min ); + + for ( int i = 0; i < N; i++ ) { + if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) { reference_minloc = i; + } } - ASSERT_EQ(minmax_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar.max_val,reference_max); - for(int i=0; i reducer_scalar_init(minmax_scalar_init,init_min,init_max); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar_init); - ASSERT_EQ(minmax_scalar_init.min_val,reference_min); - ASSERT_EQ(minmax_scalar_init.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar_init.max_val,reference_max); - ASSERT_EQ(minmax_scalar_init.max_loc,reference_maxloc); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar_init( minmax_scalar_init, init_min, init_max ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( minmax_scalar_init.min_val, reference_min ); + ASSERT_EQ( minmax_scalar_init.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar_init.max_val, reference_max ); + ASSERT_EQ( minmax_scalar_init.max_loc, reference_maxloc ); + value_type minmax_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(minmax_scalar_init_view.min_val,reference_min); - ASSERT_EQ(minmax_scalar_init_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar_init_view.max_val,reference_max); - ASSERT_EQ(minmax_scalar_init_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_scalar_init_view.min_val, reference_min ); + ASSERT_EQ( minmax_scalar_init_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar_init_view.max_val, reference_max ); + ASSERT_EQ( minmax_scalar_init_view.max_loc, reference_maxloc ); } + { - Kokkos::View minmax_view("View"); - Kokkos::Experimental::MinMaxLoc reducer_view(minmax_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type minmax_view_scalar = minmax_view(); - ASSERT_EQ(minmax_view_scalar.min_val,reference_min); - ASSERT_EQ(minmax_view_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_scalar.max_val,reference_max); - ASSERT_EQ(minmax_view_scalar.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_scalar.min_val, reference_min ); + ASSERT_EQ( minmax_view_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_scalar.max_val, reference_max ); + ASSERT_EQ( minmax_view_scalar.max_loc, reference_maxloc ); + value_type minmax_view_view = reducer_view.result_view()(); - ASSERT_EQ(minmax_view_view.min_val,reference_min); - ASSERT_EQ(minmax_view_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_view.max_val,reference_max); - ASSERT_EQ(minmax_view_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_view.min_val, reference_min ); + ASSERT_EQ( minmax_view_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_view.max_val, reference_max ); + ASSERT_EQ( minmax_view_view.max_loc, reference_maxloc ); } + { - Kokkos::View minmax_view_init("View"); - Kokkos::Experimental::MinMaxLoc reducer_view_init(minmax_view_init,init_min,init_max); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > minmax_view_init( "View" ); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view_init( minmax_view_init, init_min, init_max ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type minmax_view_init_scalar = minmax_view_init(); - ASSERT_EQ(minmax_view_init_scalar.min_val,reference_min); - ASSERT_EQ(minmax_view_init_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_init_scalar.max_val,reference_max); - ASSERT_EQ(minmax_view_init_scalar.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_init_scalar.min_val, reference_min ); + ASSERT_EQ( minmax_view_init_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_init_scalar.max_val, reference_max ); + ASSERT_EQ( minmax_view_init_scalar.max_loc, reference_maxloc ); + value_type minmax_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(minmax_view_init_view.min_val,reference_min); - ASSERT_EQ(minmax_view_init_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_init_view.max_val,reference_max); - ASSERT_EQ(minmax_view_init_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_init_view.min_val, reference_min ); + ASSERT_EQ( minmax_view_init_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_init_view.max_val, reference_max ); + ASSERT_EQ( minmax_view_init_view.max_loc, reference_maxloc ); } } - static void test_BAnd(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_band = Scalar() | (~Scalar()); - for(int i=0; i values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_band = Scalar() | ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); + reference_band = reference_band & h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BAndFunctor f; f.values = values; - Scalar init = Scalar() | (~Scalar()); + Scalar init = Scalar() | ( ~Scalar() ); { Scalar band_scalar = init; - Kokkos::Experimental::BAnd reducer_scalar(band_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(band_scalar,reference_band); + Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( band_scalar, reference_band ); Scalar band_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(band_scalar_view,reference_band); + + ASSERT_EQ( band_scalar_view, reference_band ); } { - Kokkos::View band_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > band_view( "View" ); band_view() = init; - Kokkos::Experimental::BAnd reducer_view(band_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::BAnd< Scalar > reducer_view( band_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar band_view_scalar = band_view(); - ASSERT_EQ(band_view_scalar,reference_band); + ASSERT_EQ( band_view_scalar, reference_band ); + Scalar band_view_view = reducer_view.result_view()(); - ASSERT_EQ(band_view_view,reference_band); + ASSERT_EQ( band_view_view, reference_band ); } } - static void test_BOr(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_bor = Scalar() & (~Scalar()); - for(int i=0; i values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_bor = Scalar() & ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); + reference_bor = reference_bor | h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BOrFunctor f; f.values = values; - Scalar init = Scalar() & (~Scalar()); + Scalar init = Scalar() & ( ~Scalar() ); { Scalar bor_scalar = init; - Kokkos::Experimental::BOr reducer_scalar(bor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(bor_scalar,reference_bor); + Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( bor_scalar, reference_bor ); + Scalar bor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(bor_scalar_view,reference_bor); + ASSERT_EQ( bor_scalar_view, reference_bor ); } { - Kokkos::View bor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > bor_view( "View" ); bor_view() = init; - Kokkos::Experimental::BOr reducer_view(bor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::BOr< Scalar > reducer_view( bor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar bor_view_scalar = bor_view(); - ASSERT_EQ(bor_view_scalar,reference_bor); + ASSERT_EQ( bor_view_scalar, reference_bor ); + Scalar bor_view_view = reducer_view.result_view()(); - ASSERT_EQ(bor_view_view,reference_bor); + ASSERT_EQ( bor_view_view, reference_bor ); } } - static void test_BXor(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_bxor = Scalar() & (~Scalar()); - for(int i=0; i values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_bxor = Scalar() & ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); + reference_bxor = reference_bxor ^ h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BXorFunctor f; f.values = values; - Scalar init = Scalar() & (~Scalar()); + Scalar init = Scalar() & ( ~Scalar() ); { Scalar bxor_scalar = init; - Kokkos::Experimental::BXor reducer_scalar(bxor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(bxor_scalar,reference_bxor); + Kokkos::Experimental::BXor< Scalar > reducer_scalar( bxor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( bxor_scalar, reference_bxor ); + Scalar bxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(bxor_scalar_view,reference_bxor); + ASSERT_EQ( bxor_scalar_view, reference_bxor ); } { - Kokkos::View bxor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > bxor_view( "View" ); bxor_view() = init; - Kokkos::Experimental::BXor reducer_view(bxor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::BXor< Scalar > reducer_view( bxor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar bxor_view_scalar = bxor_view(); - ASSERT_EQ(bxor_view_scalar,reference_bxor); + ASSERT_EQ( bxor_view_scalar, reference_bxor ); + Scalar bxor_view_view = reducer_view.result_view()(); - ASSERT_EQ(bxor_view_view,reference_bxor); + ASSERT_EQ( bxor_view_view, reference_bxor ); } } - static void test_LAnd(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LAnd( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_land = 1; - for(int i=0; i reducer_scalar(land_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(land_scalar,reference_land); + Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( land_scalar, reference_land ); + Scalar land_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(land_scalar_view,reference_land); + ASSERT_EQ( land_scalar_view, reference_land ); } { - Kokkos::View land_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > land_view( "View" ); land_view() = init; - Kokkos::Experimental::LAnd reducer_view(land_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::LAnd< Scalar > reducer_view( land_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar land_view_scalar = land_view(); - ASSERT_EQ(land_view_scalar,reference_land); + ASSERT_EQ( land_view_scalar, reference_land ); + Scalar land_view_view = reducer_view.result_view()(); - ASSERT_EQ(land_view_view,reference_land); + ASSERT_EQ( land_view_view, reference_land ); } } - static void test_LOr(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LOr( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lor = 0; - for(int i=0; i reducer_scalar(lor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(lor_scalar,reference_lor); + Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( lor_scalar, reference_lor ); + Scalar lor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(lor_scalar_view,reference_lor); + ASSERT_EQ( lor_scalar_view, reference_lor ); } { - Kokkos::View lor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > lor_view( "View" ); lor_view() = init; - Kokkos::Experimental::LOr reducer_view(lor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::LOr< Scalar > reducer_view( lor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar lor_view_scalar = lor_view(); - ASSERT_EQ(lor_view_scalar,reference_lor); + ASSERT_EQ( lor_view_scalar, reference_lor ); + Scalar lor_view_view = reducer_view.result_view()(); - ASSERT_EQ(lor_view_view,reference_lor); + ASSERT_EQ( lor_view_view, reference_lor ); } } - static void test_LXor(int N) { - Kokkos::View values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LXor( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lxor = 0; - for(int i=0; i reducer_scalar(lxor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_scalar); - ASSERT_EQ(lxor_scalar,reference_lxor); + Kokkos::Experimental::LXor< Scalar > reducer_scalar( lxor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( lxor_scalar, reference_lxor ); + Scalar lxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(lxor_scalar_view,reference_lxor); + ASSERT_EQ( lxor_scalar_view, reference_lxor ); } { - Kokkos::View lxor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > lxor_view( "View" ); lxor_view() = init; - Kokkos::Experimental::LXor reducer_view(lxor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,N),f,reducer_view); + Kokkos::Experimental::LXor< Scalar > reducer_view( lxor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar lxor_view_scalar = lxor_view(); - ASSERT_EQ(lxor_view_scalar,reference_lxor); + ASSERT_EQ( lxor_view_scalar, reference_lxor ); + Scalar lxor_view_view = reducer_view.result_view()(); - ASSERT_EQ(lxor_view_view,reference_lxor); + ASSERT_EQ( lxor_view_view, reference_lxor ); } } static void execute_float() { - test_sum(10001); - test_prod(35); - test_min(10003); - test_minloc(10003); - test_max(10007); - test_maxloc(10007); - test_minmaxloc(10007); + test_sum( 10001 ); + test_prod( 35 ); + test_min( 10003 ); + test_minloc( 10003 ); + test_max( 10007 ); + test_maxloc( 10007 ); + test_minmaxloc( 10007 ); } static void execute_integer() { - test_sum(10001); - test_prod(35); - test_min(10003); - test_minloc(10003); - test_max(10007); - test_maxloc(10007); - test_minmaxloc(10007); - test_BAnd(35); - test_BOr(35); - test_BXor(35); - test_LAnd(35); - test_LOr(35); - test_LXor(35); + test_sum( 10001 ); + test_prod( 35 ); + test_min( 10003 ); + test_minloc( 10003 ); + test_max( 10007 ); + test_maxloc( 10007 ); + test_minmaxloc( 10007 ); + test_BAnd( 35 ); + test_BOr( 35 ); + test_BXor( 35 ); + test_LAnd( 35 ); + test_LOr( 35 ); + test_LXor( 35 ); } static void execute_basic() { - test_sum(10001); - test_prod(35); + test_sum( 10001 ); + test_prod( 35 ); } }; -} - -/*--------------------------------------------------------------------------*/ +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp index 1a9811a854..547e034976 100644 --- a/lib/kokkos/core/unit_test/TestScan.hpp +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,82 +36,81 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -/*--------------------------------------------------------------------------*/ - #include namespace Test { -template< class Device , class WorkSpec = size_t > +template< class Device, class WorkSpec = size_t > struct TestScan { + typedef Device execution_space; + typedef long int value_type; - typedef Device execution_space ; - typedef long int value_type ; - - Kokkos::View > errors; + Kokkos::View< int, Device, Kokkos::MemoryTraits > errors; KOKKOS_INLINE_FUNCTION - void operator()( const int iwork , value_type & update , const bool final_pass ) const + void operator()( const int iwork, value_type & update, const bool final_pass ) const { - const value_type n = iwork + 1 ; - const value_type imbalance = ( (1000 <= n) && (0 == n % 1000) ) ? 1000 : 0 ; + const value_type n = iwork + 1; + const value_type imbalance = ( ( 1000 <= n ) && ( 0 == n % 1000 ) ) ? 1000 : 0; // Insert an artificial load imbalance - for ( value_type i = 0 ; i < imbalance ; ++i ) { ++update ; } + for ( value_type i = 0; i < imbalance; ++i ) { ++update; } - update += n - imbalance ; + update += n - imbalance; if ( final_pass ) { const value_type answer = n & 1 ? ( n * ( ( n + 1 ) / 2 ) ) : ( ( n / 2 ) * ( n + 1 ) ); if ( answer != update ) { errors()++; - if(errors()<20) - printf("TestScan(%d,%ld) != %ld\n",iwork,update,answer); + + if ( errors() < 20 ) { + printf( "TestScan(%d,%ld) != %ld\n", iwork, update, answer ); + } } } } KOKKOS_INLINE_FUNCTION - void init( value_type & update ) const { update = 0 ; } + void init( value_type & update ) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & update , + void join( volatile value_type & update, volatile const value_type & input ) const - { update += input ; } + { update += input; } TestScan( const WorkSpec & N ) - { - Kokkos::View errors_a("Errors"); - Kokkos::deep_copy(errors_a,0); - errors = errors_a; - parallel_scan( N , *this ); - } + { + Kokkos::View< int, Device > errors_a( "Errors" ); + Kokkos::deep_copy( errors_a, 0 ); + errors = errors_a; + + parallel_scan( N , *this ); + } TestScan( const WorkSpec & Start , const WorkSpec & N ) - { - typedef Kokkos::RangePolicy exec_policy ; + { + typedef Kokkos::RangePolicy< execution_space > exec_policy ; - Kokkos::View errors_a("Errors"); - Kokkos::deep_copy(errors_a,0); - errors = errors_a; + Kokkos::View< int, Device > errors_a( "Errors" ); + Kokkos::deep_copy( errors_a, 0 ); + errors = errors_a; - parallel_scan( exec_policy( Start , N ) , *this ); - } - - static void test_range( const WorkSpec & begin , const WorkSpec & end ) - { - for ( WorkSpec i = begin ; i < end ; ++i ) { - (void) TestScan( i ); - } + parallel_scan( exec_policy( Start , N ) , *this ); + } + + static void test_range( const WorkSpec & begin, const WorkSpec & end ) + { + for ( WorkSpec i = begin; i < end; ++i ) { + (void) TestScan( i ); } + } }; -} - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp index 291f9f60e4..6eca6bb38d 100644 --- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -54,162 +54,157 @@ namespace Test { struct SharedAllocDestroy { + volatile int * count; - volatile int * count ; - - SharedAllocDestroy() = default ; + SharedAllocDestroy() = default; SharedAllocDestroy( int * arg ) : count( arg ) {} void destroy_shared_allocation() - { - Kokkos::atomic_increment( count ); - } - + { + Kokkos::atomic_increment( count ); + } }; -template< class MemorySpace , class ExecutionSpace > +template< class MemorySpace, class ExecutionSpace > void test_shared_alloc() { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + typedef const Kokkos::Impl::SharedAllocationHeader Header; + typedef Kokkos::Impl::SharedAllocationTracker Tracker; + typedef Kokkos::Impl::SharedAllocationRecord< void, void > RecordBase; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace, void > RecordMemS; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace, SharedAllocDestroy > RecordFull; - typedef const Kokkos::Impl::SharedAllocationHeader Header ; - typedef Kokkos::Impl::SharedAllocationTracker Tracker ; - typedef Kokkos::Impl::SharedAllocationRecord< void , void > RecordBase ; - typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ; - typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ; + static_assert( sizeof( Tracker ) == sizeof( int* ), "SharedAllocationTracker has wrong size!" ); - static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" ); + MemorySpace s; - MemorySpace s ; - - const size_t N = 1200 ; - const size_t size = 8 ; + const size_t N = 1200; + const size_t size = 8; RecordMemS * rarray[ N ]; Header * harray[ N ]; - RecordMemS ** const r = rarray ; - Header ** const h = harray ; + RecordMemS ** const r = rarray; + Header ** const h = harray; + + Kokkos::RangePolicy< ExecutionSpace > range( 0, N ); - Kokkos::RangePolicy< ExecutionSpace > range(0,N); - - //---------------------------------------- { - // Since always executed on host space, leave [=] - Kokkos::parallel_for( range , [=]( size_t i ){ - char name[64] ; - sprintf(name,"test_%.2d",int(i)); + // Since always executed on host space, leave [=] + Kokkos::parallel_for( range, [=] ( size_t i ) { + char name[64]; + sprintf( name, "test_%.2d", int( i ) ); - r[i] = RecordMemS::allocate( s , name , size * ( i + 1 ) ); + r[i] = RecordMemS::allocate( s, name, size * ( i + 1 ) ); h[i] = Header::get_header( r[i]->data() ); - ASSERT_EQ( r[i]->use_count() , 0 ); + ASSERT_EQ( r[i]->use_count(), 0 ); - for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + for ( size_t j = 0; j < ( i / 10 ) + 1; ++j ) RecordBase::increment( r[i] ); - ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); - ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + ASSERT_EQ( r[i]->use_count(), ( i / 10 ) + 1 ); + ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); // Sanity check for the whole set of allocation records to which this record belongs. RecordBase::is_sane( r[0] ); - // RecordMemS::print_records( std::cout , s , true ); + // RecordMemS::print_records( std::cout, s, true ); - Kokkos::parallel_for( range , [=]( size_t i ){ - while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + Kokkos::parallel_for( range, [=] ( size_t i ) { + while ( 0 != ( r[i] = static_cast< RecordMemS * >( RecordBase::decrement( r[i] ) ) ) ) { if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); } }); } - //---------------------------------------- + { - int destroy_count = 0 ; - SharedAllocDestroy counter( & destroy_count ); + int destroy_count = 0; + SharedAllocDestroy counter( &destroy_count ); - Kokkos::parallel_for( range , [=]( size_t i ){ - char name[64] ; - sprintf(name,"test_%.2d",int(i)); + Kokkos::parallel_for( range, [=] ( size_t i ) { + char name[64]; + sprintf( name, "test_%.2d", int( i ) ); - RecordFull * rec = RecordFull::allocate( s , name , size * ( i + 1 ) ); + RecordFull * rec = RecordFull::allocate( s, name, size * ( i + 1 ) ); - rec->m_destroy = counter ; + rec->m_destroy = counter; - r[i] = rec ; + r[i] = rec; h[i] = Header::get_header( r[i]->data() ); - ASSERT_EQ( r[i]->use_count() , 0 ); + ASSERT_EQ( r[i]->use_count(), 0 ); - for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + for ( size_t j = 0; j < ( i / 10 ) + 1; ++j ) RecordBase::increment( r[i] ); - ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); - ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + ASSERT_EQ( r[i]->use_count(), ( i / 10 ) + 1 ); + ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); RecordBase::is_sane( r[0] ); - Kokkos::parallel_for( range , [=]( size_t i ){ - while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + Kokkos::parallel_for( range, [=] ( size_t i ) { + while ( 0 != ( r[i] = static_cast< RecordMemS * >( RecordBase::decrement( r[i] ) ) ) ) { if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); } }); - ASSERT_EQ( destroy_count , int(N) ); + ASSERT_EQ( destroy_count, int( N ) ); } - //---------------------------------------- { - int destroy_count = 0 ; + int destroy_count = 0; { - RecordFull * rec = RecordFull::allocate( s , "test" , size ); + RecordFull * rec = RecordFull::allocate( s, "test", size ); - // ... Construction of the allocated { rec->data() , rec->size() } + // ... Construction of the allocated { rec->data(), rec->size() } - // Copy destruction function object into the allocation record + // Copy destruction function object into the allocation record. rec->m_destroy = SharedAllocDestroy( & destroy_count ); - ASSERT_EQ( rec->use_count() , 0 ); + ASSERT_EQ( rec->use_count(), 0 ); - // Start tracking, increments the use count from 0 to 1 - Tracker track ; + // Start tracking, increments the use count from 0 to 1. + Tracker track; track.assign_allocated_record_to_uninitialized( rec ); - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); + + // Verify construction / destruction increment. + for ( size_t i = 0; i < N; ++i ) { + ASSERT_EQ( rec->use_count(), 1 ); - // Verify construction / destruction increment - for ( size_t i = 0 ; i < N ; ++i ) { - ASSERT_EQ( rec->use_count() , 1 ); { - Tracker local_tracker ; + Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); - ASSERT_EQ( rec->use_count() , 2 ); - ASSERT_EQ( local_tracker.use_count() , 2 ); + ASSERT_EQ( rec->use_count(), 2 ); + ASSERT_EQ( local_tracker.use_count(), 2 ); } - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); } - Kokkos::parallel_for( range , [=]( size_t i ){ - Tracker local_tracker ; + Kokkos::parallel_for( range, [=] ( size_t i ) { + Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); - ASSERT_GT( rec->use_count() , 1 ); + ASSERT_GT( rec->use_count(), 1 ); }); - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); // Destruction of 'track' object deallocates the 'rec' and invokes the destroy function object. } - ASSERT_EQ( destroy_count , 1 ); + ASSERT_EQ( destroy_count, 1 ); } #endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */ } - -} - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp deleted file mode 100644 index dc1abbd8b3..0000000000 --- a/lib/kokkos/core/unit_test/TestSynchronic.cpp +++ /dev/null @@ -1,449 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -//#undef _WIN32_WINNT -//#define _WIN32_WINNT 0x0602 - -#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || \ - defined(__APPLE__) || defined(__ARM_ARCH_8A) || defined(_CRAYC) - -// Skip for now - -#else - -#include - -#ifdef USEOMP -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -//#include
-//#undef __SYNCHRONIC_COMPATIBLE - -#include -#include - -#include "TestSynchronic.hpp" - -// Uncomment to allow test to dump output -//#define VERBOSE_TEST - -namespace Test { - -unsigned next_table[] = - { - 0, 1, 2, 3, //0-3 - 4, 4, 6, 6, //4-7 - 8, 8, 8, 8, //8-11 - 12, 12, 12, 12, //12-15 - 16, 16, 16, 16, //16-19 - 16, 16, 16, 16, //20-23 - 24, 24, 24, 24, //24-27 - 24, 24, 24, 24, //28-31 - 32, 32, 32, 32, //32-35 - 32, 32, 32, 32, //36-39 - 40, 40, 40, 40, //40-43 - 40, 40, 40, 40, //44-47 - 48, 48, 48, 48, //48-51 - 48, 48, 48, 48, //52-55 - 56, 56, 56, 56, //56-59 - 56, 56, 56, 56, //60-63 - }; - -//change this if you want to allow oversubscription of the system, by default only the range {1-(system size)} is tested -#define FOR_GAUNTLET(x) for(unsigned x = (std::min)(std::thread::hardware_concurrency()*8,unsigned(sizeof(next_table)/sizeof(unsigned))); x; x = next_table[x-1]) - -//set this to override the benchmark of barriers to use OMP barriers instead of n3998 std::barrier -//#define USEOMP - -#if defined(__SYNCHRONIC_COMPATIBLE) - #define PREFIX "futex-" -#else - #define PREFIX "backoff-" -#endif - -//this test uses a custom Mersenne twister to eliminate implementation variation -MersenneTwister mt; - -int dummya = 1, dummyb =1; - -int dummy1 = 1; -std::atomic dummy2(1); -std::atomic dummy3(1); - -double time_item(int const count = (int)1E8) { - - clock_t const start = clock(); - - for(int i = 0;i < count; ++i) - mt.integer(); - - clock_t const end = clock(); - double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); - - return elapsed_seconds / count; -} -double time_nil(int const count = (int)1E08) { - - clock_t const start = clock(); - - dummy3 = count; - for(int i = 0;i < (int)1E6; ++i) { - if(dummy1) { - // Do some work while holding the lock - int workunits = dummy3;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); - for (int j = 1; j < workunits; j++) - dummy1 &= j; // Do one work unit - dummy2.fetch_add(dummy1,std::memory_order_relaxed); - } - } - - clock_t const end = clock(); - double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); - - return elapsed_seconds / count; -} - - -template -void testmutex_inner(mutex_type& m, std::atomic& t,std::atomic& wc,std::atomic& wnc, int const num_iterations, - int const num_items_critical, int const num_items_noncritical, MersenneTwister& mtc, MersenneTwister& mtnc, bool skip) { - - for(int k = 0; k < num_iterations; ++k) { - - if(num_items_noncritical) { - // Do some work without holding the lock - int workunits = num_items_noncritical;//(int) (mtnc.poissonInterval((float)num_items_noncritical) + 0.5f); - for (int i = 1; i < workunits; i++) - mtnc.integer(); // Do one work unit - wnc.fetch_add(workunits,std::memory_order_relaxed); - } - - t.fetch_add(1,std::memory_order_relaxed); - - if(!skip) { - std::unique_lock l(m); - if(num_items_critical) { - // Do some work while holding the lock - int workunits = num_items_critical;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); - for (int i = 1; i < workunits; i++) - mtc.integer(); // Do one work unit - wc.fetch_add(workunits,std::memory_order_relaxed); - } - } - } -} -template -void testmutex_outer(std::map>& results, std::string const& name, double critical_fraction, double critical_duration) { - - std::ostringstream truename; - truename << name << " (f=" << critical_fraction << ",d=" << critical_duration << ")"; - - std::vector& data = results[truename.str()]; - - double const workItemTime = time_item() , - nilTime = time_nil(); - - int const num_items_critical = (critical_duration <= 0 ? 0 : (std::max)( int(critical_duration / workItemTime + 0.5), int(100 * nilTime / workItemTime + 0.5))), - num_items_noncritical = (num_items_critical <= 0 ? 0 : int( ( 1 - critical_fraction ) * num_items_critical / critical_fraction + 0.5 )); - - FOR_GAUNTLET(num_threads) { - - //Kokkos::Impl::portable_sleep(std::chrono::microseconds(2000000)); - - int const num_iterations = (num_items_critical + num_items_noncritical != 0) ? -#ifdef __SYNCHRONIC_JUST_YIELD - int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : -#else - int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : -#endif -#ifdef WIN32 - int( 1 / workItemTime / (20 * num_threads * num_threads) ); -#else - int( 1 / workItemTime / (200 * num_threads * num_threads) ); -#endif - -#ifdef VERBOSE_TEST - std::cerr << "running " << truename.str() << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\n" << std::flush; -#endif - - - std::atomic t[2], wc[2], wnc[2]; - - clock_t start[2], end[2]; - for(int pass = 0; pass < 2; ++pass) { - - t[pass] = 0; - wc[pass] = 0; - wnc[pass] = 0; - - srand(num_threads); - std::vector randomsnc(num_threads), - randomsc(num_threads); - - mutex_type m; - - start[pass] = clock(); -#ifdef USEOMP - omp_set_num_threads(num_threads); - std::atomic _j(0); - #pragma omp parallel - { - int const j = _j.fetch_add(1,std::memory_order_relaxed); - testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); - num_threads = omp_get_num_threads(); - } -#else - std::vector threads(num_threads); - for(unsigned j = 0; j < num_threads; ++j) - threads[j] = new std::thread([&,j](){ - testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); - } - ); - for(unsigned j = 0; j < num_threads; ++j) { - threads[j]->join(); - delete threads[j]; - } -#endif - end[pass] = clock(); - } - if(t[0] != t[1]) throw std::string("mismatched iteration counts"); - if(wnc[0] != wnc[1]) throw std::string("mismatched work item counts"); - - double elapsed_seconds_0 = (end[0] - start[0]) / double(CLOCKS_PER_SEC), - elapsed_seconds_1 = (end[1] - start[1]) / double(CLOCKS_PER_SEC); - double time = (elapsed_seconds_1 - elapsed_seconds_0 - wc[1]*workItemTime) / num_iterations; - - data.push_back(time); -#ifdef VERBOSE_TEST - std::cerr << truename.str() << " : " << num_threads << "," << elapsed_seconds_1 / num_iterations << " - " << elapsed_seconds_0 / num_iterations << " - " << wc[1]*workItemTime/num_iterations << " = " << time << " \n"; -#endif - } -} - -template -void testbarrier_inner(barrier_type& b, int const num_threads, int const j, std::atomic& t,std::atomic& w, - int const num_iterations_odd, int const num_iterations_even, - int const num_items_noncritical, MersenneTwister& arg_mt, bool skip) { - - for(int k = 0; k < (std::max)(num_iterations_even,num_iterations_odd); ++k) { - - if(k >= (~j & 0x1 ? num_iterations_odd : num_iterations_even )) { - if(!skip) - b.arrive_and_drop(); - break; - } - - if(num_items_noncritical) { - // Do some work without holding the lock - int workunits = (int) (arg_mt.poissonInterval((float)num_items_noncritical) + 0.5f); - for (int i = 1; i < workunits; i++) - arg_mt.integer(); // Do one work unit - w.fetch_add(workunits,std::memory_order_relaxed); - } - - t.fetch_add(1,std::memory_order_relaxed); - - if(!skip) { - int const thiscount = (std::min)(k+1,num_iterations_odd)*((num_threads>>1)+(num_threads&1)) + (std::min)(k+1,num_iterations_even)*(num_threads>>1); - if(t.load(std::memory_order_relaxed) > thiscount) { - std::cerr << "FAILURE: some threads have run ahead of the barrier (" << t.load(std::memory_order_relaxed) << ">" << thiscount << ").\n"; - EXPECT_TRUE(false); - } -#ifdef USEOMP - #pragma omp barrier -#else - b.arrive_and_wait(); -#endif - if(t.load(std::memory_order_relaxed) < thiscount) { - std::cerr << "FAILURE: some threads have fallen behind the barrier (" << t.load(std::memory_order_relaxed) << "<" << thiscount << ").\n"; - EXPECT_TRUE(false); - } - } - } -} -template -void testbarrier_outer(std::map>& results, std::string const& name, double barrier_frequency, double phase_duration, bool randomIterations = false) { - - std::vector& data = results[name]; - - double const workItemTime = time_item(); - int const num_items_noncritical = int( phase_duration / workItemTime + 0.5 ); - - FOR_GAUNTLET(num_threads) { - - int const num_iterations = int( barrier_frequency ); -#ifdef VERBOSE_TEST - std::cerr << "running " << name << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\r" << std::flush; -#endif - - srand(num_threads); - - MersenneTwister local_mt; - int const num_iterations_odd = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations, - num_iterations_even = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations; - - std::atomic t[2], w[2]; - std::chrono::time_point start[2], end[2]; - for(int pass = 0; pass < 2; ++pass) { - - t[pass] = 0; - w[pass] = 0; - - srand(num_threads); - std::vector randoms(num_threads); - - barrier_type b(num_threads); - - start[pass] = std::chrono::high_resolution_clock::now(); -#ifdef USEOMP - omp_set_num_threads(num_threads); - std::atomic _j(0); - #pragma omp parallel - { - int const j = _j.fetch_add(1,std::memory_order_relaxed); - testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); - num_threads = omp_get_num_threads(); - } -#else - std::vector threads(num_threads); - for(unsigned j = 0; j < num_threads; ++j) - threads[j] = new std::thread([&,j](){ - testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); - }); - for(unsigned j = 0; j < num_threads; ++j) { - threads[j]->join(); - delete threads[j]; - } -#endif - end[pass] = std::chrono::high_resolution_clock::now(); - } - - if(t[0] != t[1]) throw std::string("mismatched iteration counts"); - if(w[0] != w[1]) throw std::string("mismatched work item counts"); - - int const phases = (std::max)(num_iterations_odd, num_iterations_even); - - std::chrono::duration elapsed_seconds_0 = end[0]-start[0], - elapsed_seconds_1 = end[1]-start[1]; - double const time = (elapsed_seconds_1.count() - elapsed_seconds_0.count()) / phases; - - data.push_back(time); -#ifdef VERBOSE_TEST - std::cerr << name << " : " << num_threads << "," << elapsed_seconds_1.count() / phases << " - " << elapsed_seconds_0.count() / phases << " = " << time << " \n"; -#endif - } -} - -template -struct mutex_tester; -template -struct mutex_tester { - static void run(std::map>& results, std::string const name[], double critical_fraction, double critical_duration) { - testmutex_outer(results, *name, critical_fraction, critical_duration); - } -}; -template -struct mutex_tester { - static void run(std::map>& results, std::string const name[], double critical_fraction, double critical_duration) { - mutex_tester::run(results, name, critical_fraction, critical_duration); - mutex_tester::run(results, ++name, critical_fraction, critical_duration); - } -}; - -TEST( synchronic, main ) -{ - //warm up - time_item(); - - //measure up -#ifdef VERBOSE_TEST - std::cerr << "measuring work item speed...\r"; - std::cerr << "work item speed is " << time_item() << " per item, nil is " << time_nil() << "\n"; -#endif - try { - - std::pair testpoints[] = { {1, 0}, /*{1E-1, 10E-3}, {5E-1, 2E-6}, {3E-1, 50E-9},*/ }; - for(auto x : testpoints ) { - - std::map> results; - - //testbarrier_outer(results, PREFIX"bar 1khz 100us", 1E3, x.second); - - std::string const names[] = { - PREFIX"tkt", PREFIX"mcs", PREFIX"ttas", PREFIX"std" -#ifdef WIN32 - ,PREFIX"srw" -#endif - }; - - //run --> - - mutex_tester< - ticket_mutex, mcs_mutex, ttas_mutex, std::mutex -#ifdef WIN32 - ,srw_mutex -#endif - >::run(results, names, x.first, x.second); - - //<-- run - -#ifdef VERBOSE_TEST - std::cout << "threads"; - for(auto & i : results) - std::cout << ",\"" << i.first << '\"'; - std::cout << std::endl; - int j = 0; - FOR_GAUNTLET(num_threads) { - std::cout << num_threads; - for(auto & i : results) - std::cout << ',' << i.second[j]; - std::cout << std::endl; - ++j; - } -#endif - } - } - catch(std::string & e) { - std::cerr << "EXCEPTION : " << e << std::endl; - EXPECT_TRUE( false ); - } -} - -} // namespace Test - -#endif diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp deleted file mode 100644 index f4341b9781..0000000000 --- a/lib/kokkos/core/unit_test/TestSynchronic.hpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef TEST_SYNCHRONIC_HPP -#define TEST_SYNCHRONIC_HPP - -#include -#include -#include - -namespace Test { - -template -struct dumb_mutex { - - dumb_mutex () : locked(0) { - } - - void lock() { - while(1) { - bool state = false; - if (locked.compare_exchange_weak(state,true,std::memory_order_acquire)) { - break; - } - while (locked.load(std::memory_order_relaxed)) { - if (!truly) { - Kokkos::Impl::portable_yield(); - } - } - } - } - - void unlock() { - locked.store(false,std::memory_order_release); - } - -private : - std::atomic locked; -}; - -#ifdef WIN32 -#include -#include -#include -struct srw_mutex { - - srw_mutex () { - InitializeSRWLock(&_lock); - } - - void lock() { - AcquireSRWLockExclusive(&_lock); - } - void unlock() { - ReleaseSRWLockExclusive(&_lock); - } - -private : - SRWLOCK _lock; -}; -#endif - -struct ttas_mutex { - - ttas_mutex() : locked(false) { - } - - ttas_mutex(const ttas_mutex&) = delete; - ttas_mutex& operator=(const ttas_mutex&) = delete; - - void lock() { - for(int i = 0;; ++i) { - bool state = false; - if(locked.compare_exchange_weak(state,true,std::memory_order_relaxed,Kokkos::Impl::notify_none)) - break; - locked.expect_update(true); - } - std::atomic_thread_fence(std::memory_order_acquire); - } - void unlock() { - locked.store(false,std::memory_order_release); - } - -private : - Kokkos::Impl::synchronic locked; -}; - -struct ticket_mutex { - - ticket_mutex() : active(0), queue(0) { - } - - ticket_mutex(const ticket_mutex&) = delete; - ticket_mutex& operator=(const ticket_mutex&) = delete; - - void lock() { - int const me = queue.fetch_add(1, std::memory_order_relaxed); - while(me != active.load_when_equal(me, std::memory_order_acquire)) - ; - } - - void unlock() { - active.fetch_add(1,std::memory_order_release); - } -private : - Kokkos::Impl::synchronic active; - std::atomic queue; -}; - -struct mcs_mutex { - - mcs_mutex() : head(nullptr) { - } - - mcs_mutex(const mcs_mutex&) = delete; - mcs_mutex& operator=(const mcs_mutex&) = delete; - - struct unique_lock { - - unique_lock(mcs_mutex & arg_m) : m(arg_m), next(nullptr), ready(false) { - - unique_lock * const h = m.head.exchange(this,std::memory_order_acquire); - if(__builtin_expect(h != nullptr,0)) { - h->next.store(this,std::memory_order_seq_cst,Kokkos::Impl::notify_one); - while(!ready.load_when_not_equal(false,std::memory_order_acquire)) - ; - } - } - - unique_lock(const unique_lock&) = delete; - unique_lock& operator=(const unique_lock&) = delete; - - ~unique_lock() { - unique_lock * h = this; - if(__builtin_expect(!m.head.compare_exchange_strong(h,nullptr,std::memory_order_release, std::memory_order_relaxed),0)) { - unique_lock * n = next.load(std::memory_order_relaxed); - while(!n) - n = next.load_when_not_equal(n,std::memory_order_relaxed); - n->ready.store(true,std::memory_order_release,Kokkos::Impl::notify_one); - } - } - - private: - mcs_mutex & m; - Kokkos::Impl::synchronic next; - Kokkos::Impl::synchronic ready; - }; - -private : - std::atomic head; -}; - -} - -namespace std { -template<> -struct unique_lock : Test::mcs_mutex::unique_lock { - unique_lock(Test::mcs_mutex & arg_m) : Test::mcs_mutex::unique_lock(arg_m) { - } - unique_lock(const unique_lock&) = delete; - unique_lock& operator=(const unique_lock&) = delete; -}; - -} - -/* #include */ -#include - -namespace Test { - -//------------------------------------- -// MersenneTwister -//------------------------------------- -#define MT_IA 397 -#define MT_LEN 624 - -class MersenneTwister -{ - volatile unsigned long m_buffer[MT_LEN][64/sizeof(unsigned long)]; - volatile int m_index; - -public: - MersenneTwister() { - for (int i = 0; i < MT_LEN; i++) - m_buffer[i][0] = rand(); - m_index = 0; - for (int i = 0; i < MT_LEN * 100; i++) - integer(); - } - unsigned long integer() { - // Indices - int i = m_index; - int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around - int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around - - // Twist - unsigned long s = (m_buffer[i][0] & 0x80000000) | (m_buffer[i2][0] & 0x7fffffff); - unsigned long r = m_buffer[j][0] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF); - m_buffer[m_index][0] = r; - m_index = i2; - - // Swizzle - r ^= (r >> 11); - r ^= (r << 7) & 0x9d2c5680UL; - r ^= (r << 15) & 0xefc60000UL; - r ^= (r >> 18); - return r; - } - float poissonInterval(float ooLambda) { - return -logf(1.0f - integer() * 2.3283e-10f) * ooLambda; - } -}; - -} // namespace Test - -#endif //TEST_HPP diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index 1134553980..57e47d4baa 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP #define KOKKOS_UNITTEST_TASKSCHEDULER_HPP @@ -51,9 +50,6 @@ #if defined( KOKKOS_ENABLE_TASKDAG ) -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - namespace TestTaskScheduler { namespace { @@ -61,14 +57,14 @@ namespace { inline long eval_fib( long n ) { - constexpr long mask = 0x03 ; + constexpr long mask = 0x03; - long fib[4] = { 0 , 1 , 1 , 2 }; + long fib[4] = { 0, 1, 1, 2 }; - for ( long i = 2 ; i <= n ; ++i ) { + for ( long i = 2; i <= n; ++i ) { fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; } - + return fib[ n & mask ]; } @@ -77,100 +73,93 @@ long eval_fib( long n ) template< typename Space > struct TestFib { - typedef Kokkos::TaskScheduler policy_type ; - typedef Kokkos::Future future_type ; - typedef long value_type ; + typedef Kokkos::TaskScheduler< Space > sched_type; + typedef Kokkos::Future< long, Space > future_type; + typedef long value_type; - policy_type policy ; - future_type fib_m1 ; - future_type fib_m2 ; - const value_type n ; + sched_type sched; + future_type fib_m1; + future_type fib_m2; + const value_type n; KOKKOS_INLINE_FUNCTION - TestFib( const policy_type & arg_policy , const value_type arg_n ) - : policy(arg_policy) - , fib_m1() , fib_m2() - , n( arg_n ) - {} + TestFib( const sched_type & arg_sched, const value_type arg_n ) + : sched( arg_sched ), fib_m1(), fib_m2(), n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & , value_type & result ) - { + void operator()( typename sched_type::member_type &, value_type & result ) + { #if 0 - printf( "\nTestFib(%ld) %d %d\n" - , n - , int( ! fib_m1.is_null() ) - , int( ! fib_m2.is_null() ) - ); + printf( "\nTestFib(%ld) %d %d\n", n, int( !fib_m1.is_null() ), int( !fib_m2.is_null() ) ); #endif - if ( n < 2 ) { - result = n ; - } - else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) { - result = fib_m1.get() + fib_m2.get(); + if ( n < 2 ) { + result = n; + } + else if ( !fib_m2.is_null() && !fib_m1.is_null() ) { + result = fib_m1.get() + fib_m2.get(); + } + else { + // Spawn new children and respawn myself to sum their results. + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + fib_m2 = Kokkos::task_spawn( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) + , TestFib( sched, n - 2 ) ); + + fib_m1 = Kokkos::task_spawn( Kokkos::TaskSingle( sched ) + , TestFib( sched, n - 1 ) ); + + Kokkos::Future< Space > dep[] = { fib_m1, fib_m2 }; + Kokkos::Future< Space > fib_all = Kokkos::when_all( dep, 2 ); + + if ( !fib_m2.is_null() && !fib_m1.is_null() && !fib_all.is_null() ) { + // High priority to retire this branch. + Kokkos::respawn( this, fib_all, Kokkos::TaskPriority::High ); } else { - - // Spawn new children and respawn myself to sum their results: - // Spawn lower value at higher priority as it has a shorter - // path to completion. - - fib_m2 = policy.task_spawn( TestFib(policy,n-2) - , Kokkos::TaskSingle - , Kokkos::TaskHighPriority ); - - fib_m1 = policy.task_spawn( TestFib(policy,n-1) - , Kokkos::TaskSingle ); - - Kokkos::Future dep[] = { fib_m1 , fib_m2 }; - - Kokkos::Future fib_all = policy.when_all( 2 , dep ); - - if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) { - // High priority to retire this branch - policy.respawn( this , Kokkos::TaskHighPriority , fib_all ); - } - else { #if 1 - printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , n - , policy.allocation_capacity() - , policy.allocated_task_count_max() - , policy.allocated_task_count_accum() - ); + printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , n + , sched.allocation_capacity() + , sched.allocated_task_count_max() + , sched.allocated_task_count_accum() + ); #endif - Kokkos::abort("TestFib insufficient memory"); - } + Kokkos::abort( "TestFib insufficient memory" ); + } } + } - static void run( int i , size_t MemoryCapacity = 16000 ) - { - typedef typename policy_type::memory_space memory_space ; + static void run( int i, size_t MemoryCapacity = 16000 ) + { + typedef typename sched_type::memory_space memory_space; - enum { Log2_SuperBlockSize = 12 }; + enum { Log2_SuperBlockSize = 12 }; - policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + sched_type root_sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); - future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle ); - Kokkos::wait( root_policy ); - ASSERT_EQ( eval_fib(i) , f.get() ); + future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) + , TestFib( root_sched, i ) ); + + Kokkos::wait( root_sched ); + + ASSERT_EQ( eval_fib( i ), f.get() ); #if 0 - fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , i - , int(root_policy.template spawn_allocation_size()) - , int(root_policy.when_all_allocation_size(2)) - , root_policy.allocation_capacity() - , root_policy.allocated_task_count_max() - , root_policy.allocated_task_count_accum() - ); - fflush( stdout ); + fprintf( stdout, "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , i + , int(root_sched.template spawn_allocation_size()) + , int(root_sched.when_all_allocation_size(2)) + , root_sched.allocation_capacity() + , root_sched.allocated_task_count_max() + , root_sched.allocated_task_count_accum() + ); + fflush( stdout ); #endif - } - + } }; } // namespace TestTaskScheduler @@ -181,73 +170,71 @@ namespace TestTaskScheduler { template< class Space > struct TestTaskDependence { + typedef Kokkos::TaskScheduler< Space > sched_type; + typedef Kokkos::Future< Space > future_type; + typedef Kokkos::View< long, Space > accum_type; + typedef void value_type; - typedef Kokkos::TaskScheduler policy_type ; - typedef Kokkos::Future future_type ; - typedef Kokkos::View accum_type ; - typedef void value_type ; - - policy_type m_policy ; - accum_type m_accum ; - long m_count ; + sched_type m_sched; + accum_type m_accum; + long m_count; KOKKOS_INLINE_FUNCTION TestTaskDependence( long n - , const policy_type & arg_policy - , const accum_type & arg_accum ) - : m_policy( arg_policy ) + , const sched_type & arg_sched + , const accum_type & arg_accum ) + : m_sched( arg_sched ) , m_accum( arg_accum ) - , m_count( n ) - {} + , m_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & ) - { - enum { CHUNK = 8 }; - const int n = CHUNK < m_count ? CHUNK : m_count ; + void operator()( typename sched_type::member_type & ) + { + enum { CHUNK = 8 }; + const int n = CHUNK < m_count ? CHUNK : m_count; - if ( 1 < m_count ) { - future_type f[ CHUNK ] ; + if ( 1 < m_count ) { + future_type f[ CHUNK ]; - const int inc = ( m_count + n - 1 ) / n ; + const int inc = ( m_count + n - 1 ) / n; - for ( int i = 0 ; i < n ; ++i ) { - long begin = i * inc ; - long count = begin + inc < m_count ? inc : m_count - begin ; - f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle ); - } + for ( int i = 0; i < n; ++i ) { + long begin = i * inc; + long count = begin + inc < m_count ? inc : m_count - begin; + f[i] = Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) + , TestTaskDependence( count, m_sched, m_accum ) ); + } - m_count = 0 ; + m_count = 0; - m_policy.respawn( this , m_policy.when_all( n , f ) ); - } - else if ( 1 == m_count ) { - Kokkos::atomic_increment( & m_accum() ); - } + Kokkos::respawn( this, Kokkos::when_all( f, n ) ); } + else if ( 1 == m_count ) { + Kokkos::atomic_increment( & m_accum() ); + } + } static void run( int n ) - { - typedef typename policy_type::memory_space memory_space ; + { + typedef typename sched_type::memory_space memory_space; - // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool - enum { MemoryCapacity = 16000 }; - enum { Log2_SuperBlockSize = 12 }; - policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool. + enum { MemoryCapacity = 16000 }; + enum { Log2_SuperBlockSize = 12 }; + sched_type sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); - accum_type accum("accum"); + accum_type accum( "accum" ); - typename accum_type::HostMirror host_accum = - Kokkos::create_mirror_view( accum ); + typename accum_type::HostMirror host_accum = Kokkos::create_mirror_view( accum ); - policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle ); + Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, sched, accum ) ); - Kokkos::wait( policy ); + Kokkos::wait( sched ); - Kokkos::deep_copy( host_accum , accum ); + Kokkos::deep_copy( host_accum, accum ); - ASSERT_EQ( host_accum() , n ); - } + ASSERT_EQ( host_accum(), n ); + } }; } // namespace TestTaskScheduler @@ -258,294 +245,317 @@ namespace TestTaskScheduler { template< class ExecSpace > struct TestTaskTeam { - //enum { SPAN = 8 }; enum { SPAN = 33 }; //enum { SPAN = 1 }; - typedef void value_type ; - typedef Kokkos::TaskScheduler policy_type ; - typedef Kokkos::Future future_type ; - typedef Kokkos::View view_type ; + typedef void value_type; + typedef Kokkos::TaskScheduler< ExecSpace > sched_type; + typedef Kokkos::Future< ExecSpace > future_type; + typedef Kokkos::View< long*, ExecSpace > view_type; - policy_type policy ; - future_type future ; + sched_type sched; + future_type future; - view_type parfor_result ; - view_type parreduce_check ; - view_type parscan_result ; - view_type parscan_check ; - const long nvalue ; + view_type parfor_result; + view_type parreduce_check; + view_type parscan_result; + view_type parscan_check; + const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeam( const policy_type & arg_policy - , const view_type & arg_parfor_result - , const view_type & arg_parreduce_check - , const view_type & arg_parscan_result - , const view_type & arg_parscan_check - , const long arg_nvalue ) - : policy(arg_policy) + TestTaskTeam( const sched_type & arg_sched + , const view_type & arg_parfor_result + , const view_type & arg_parreduce_check + , const view_type & arg_parscan_result + , const view_type & arg_parscan_check + , const long arg_nvalue ) + : sched( arg_sched ) , future() , parfor_result( arg_parfor_result ) , parreduce_check( arg_parreduce_check ) , parscan_result( arg_parscan_result ) , parscan_check( arg_parscan_check ) - , nvalue( arg_nvalue ) - {} + , nvalue( arg_nvalue ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & member ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + void operator()( typename sched_type::member_type & member ) + { + const long end = nvalue + 1; + const long begin = 0 < end - SPAN ? end - SPAN : 0; - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - future = policy.task_spawn - ( TestTaskTeam( policy , - parfor_result , - parreduce_check, - parscan_result, - parscan_check, - begin - 1 ) - , Kokkos::TaskTeam ); - - assert( ! future.is_null() ); - - policy.respawn( this , future ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parfor_result[i] = i ; } - ); - - // test parallel_reduce without join - - long tot = 0; - long expected = (begin+end-1)*(end-begin)*0.5; - - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] = expected-tot ; } - ); - - // test parallel_reduce with join - - tot = 0; - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , [&]( long& val1, const long& val2) { val1 += val2; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] += expected-tot ; } - ); - - // test parallel_scan - - // Exclusive scan - Kokkos::parallel_scan( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - if ( final ) { parscan_result[i] = val; } - val += i; - } - ); + if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i]; - } + future = Kokkos::task_spawn( Kokkos::TaskTeam( sched ) + , TestTaskTeam( sched + , parfor_result + , parreduce_check + , parscan_result + , parscan_check + , begin - 1 ) + ); + + assert( !future.is_null() ); + + Kokkos::respawn( this, future ); } - // Inclusive scan - Kokkos::parallel_scan( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - val += i; - if ( final ) { parscan_result[i] = val; } - } - ); - if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i]; - } - } - // ThreadVectorRange check - /* - long result = 0; - expected = (begin+end-1)*(end-begin)*0.5; - Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member , 0 , 1 ) - , [&] ( const int i , long & outerUpdate ) { - long sum_j = 0.0; - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member , end - begin ) - , [&] ( const int j , long &innerUpdate ) { - innerUpdate += begin+j; - } , sum_j ); - outerUpdate += sum_j ; - } , result ); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { - parreduce_check[i] += result-expected ; - } - ); - */ + return; } + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parfor_result[i] = i; } + ); + + // Test parallel_reduce without join. + + long tot = 0; + long expected = ( begin + end - 1 ) * ( end - begin ) * 0.5; + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & res ) { res += parfor_result[i]; } + , tot + ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parreduce_check[i] = expected - tot; } + ); + + // Test parallel_reduce with join. + + tot = 0; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & res ) { res += parfor_result[i]; } +#if 0 + , Kokkos::Sum( tot ) +#else + , [] ( long & dst, const long & src ) { dst += src; } + , tot +#endif + ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parreduce_check[i] += expected - tot; } + ); + + // Test parallel_scan. + + // Exclusive scan. + Kokkos::parallel_scan( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & val, const bool final ) + { + if ( final ) { parscan_result[i] = val; } + + val += i; + }); + + // Wait for 'parscan_result' before testing it. + member.team_barrier(); + + if ( member.team_rank() == 0 ) { + for ( long i = begin; i < end; ++i ) { + parscan_check[i] = ( i * ( i - 1 ) - begin * ( begin - 1 ) ) * 0.5 - parscan_result[i]; + } + } + + // Don't overwrite 'parscan_result' until it has been tested. + member.team_barrier(); + + // Inclusive scan. + Kokkos::parallel_scan( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & val, const bool final ) + { + val += i; + + if ( final ) { parscan_result[i] = val; } + }); + + // Wait for 'parscan_result' before testing it. + member.team_barrier(); + + if ( member.team_rank() == 0 ) { + for ( long i = begin; i < end; ++i ) { + parscan_check[i] += ( i * ( i + 1 ) - begin * ( begin - 1 ) ) * 0.5 - parscan_result[i]; + } + } + + // ThreadVectorRange check. +/* + long result = 0; + expected = ( begin + end - 1 ) * ( end - begin ) * 0.5; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, 0, 1 ) + , [&] ( const int i, long & outerUpdate ) + { + long sum_j = 0.0; + + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member, end - begin ) + , [&] ( const int j, long & innerUpdate ) + { + innerUpdate += begin + j; + }, sum_j ); + + outerUpdate += sum_j; + }, result ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) + { + parreduce_check[i] += result - expected; + }); +*/ + } + static void run( long n ) - { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP - const unsigned memory_capacity = 400000 ; + { + //const unsigned memory_capacity = 10000; // Causes memory pool infinite loop. + //const unsigned memory_capacity = 100000; // Fails with SPAN=1 for serial and OMP. + const unsigned memory_capacity = 400000; - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); + sched_type root_sched( typename sched_type::memory_space(), memory_capacity ); - view_type root_parfor_result("parfor_result",n+1); - view_type root_parreduce_check("parreduce_check",n+1); - view_type root_parscan_result("parscan_result",n+1); - view_type root_parscan_check("parscan_check",n+1); + view_type root_parfor_result( "parfor_result", n + 1 ); + view_type root_parreduce_check( "parreduce_check", n + 1 ); + view_type root_parscan_result( "parscan_result", n + 1 ); + view_type root_parscan_check( "parscan_check", n + 1 ); - typename view_type::HostMirror - host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); - typename view_type::HostMirror - host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); - typename view_type::HostMirror - host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); - typename view_type::HostMirror - host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); + typename view_type::HostMirror + host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); + typename view_type::HostMirror + host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); + typename view_type::HostMirror + host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); + typename view_type::HostMirror + host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); - future_type f = root_policy.host_spawn( - TestTaskTeam( root_policy , - root_parfor_result , - root_parreduce_check , - root_parscan_result, - root_parscan_check, - n ) , - Kokkos::TaskTeam ); + future_type f = Kokkos::host_spawn( Kokkos::TaskTeam( root_sched ) + , TestTaskTeam( root_sched + , root_parfor_result + , root_parreduce_check + , root_parscan_result + , root_parscan_check + , n ) + ); - Kokkos::wait( root_policy ); + Kokkos::wait( root_sched ); - Kokkos::deep_copy( host_parfor_result , root_parfor_result ); - Kokkos::deep_copy( host_parreduce_check , root_parreduce_check ); - Kokkos::deep_copy( host_parscan_result , root_parscan_result ); - Kokkos::deep_copy( host_parscan_check , root_parscan_check ); + Kokkos::deep_copy( host_parfor_result, root_parfor_result ); + Kokkos::deep_copy( host_parreduce_check, root_parreduce_check ); + Kokkos::deep_copy( host_parscan_result, root_parscan_result ); + Kokkos::deep_copy( host_parscan_check, root_parscan_check ); - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i ; - if ( host_parfor_result(i) != answer ) { - std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " - << host_parfor_result(i) << " != " << answer << std::endl ; - } - if ( host_parreduce_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " - << host_parreduce_check(i) << " != 0" << std::endl ; - } - if ( host_parscan_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " - << host_parscan_check(i) << " != 0" << std::endl ; - } + for ( long i = 0; i <= n; ++i ) { + const long answer = i; + + if ( host_parfor_result( i ) != answer ) { + std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " + << host_parfor_result( i ) << " != " << answer << std::endl; + } + + if ( host_parreduce_check( i ) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " + << host_parreduce_check( i ) << " != 0" << std::endl; + } + + if ( host_parscan_check( i ) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " + << host_parscan_check( i ) << " != 0" << std::endl; } } + } }; template< class ExecSpace > struct TestTaskTeamValue { - enum { SPAN = 8 }; - typedef long value_type ; - typedef Kokkos::TaskScheduler policy_type ; - typedef Kokkos::Future future_type ; - typedef Kokkos::View view_type ; + typedef long value_type; + typedef Kokkos::TaskScheduler< ExecSpace > sched_type; + typedef Kokkos::Future< value_type, ExecSpace > future_type; + typedef Kokkos::View< long*, ExecSpace > view_type; - policy_type policy ; - future_type future ; + sched_type sched; + future_type future; - view_type result ; - const long nvalue ; + view_type result; + const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeamValue( const policy_type & arg_policy - , const view_type & arg_result - , const long arg_nvalue ) - : policy(arg_policy) + TestTaskTeamValue( const sched_type & arg_sched + , const view_type & arg_result + , const long arg_nvalue ) + : sched( arg_sched ) , future() , result( arg_result ) - , nvalue( arg_nvalue ) - {} + , nvalue( arg_nvalue ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type const & member + void operator()( typename sched_type::member_type const & member , value_type & final ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - - future = policy.task_spawn - ( TestTaskTeamValue( policy , result , begin - 1 ) - , Kokkos::TaskTeam ); - - assert( ! future.is_null() ); - - policy.respawn( this , future ); - } - return ; - } - - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { result[i] = i + 1 ; } - ); + { + const long end = nvalue + 1; + const long begin = 0 < end - SPAN ? end - SPAN : 0; + if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { - final = result[nvalue] ; + future = sched.task_spawn( TestTaskTeamValue( sched, result, begin - 1 ) + , Kokkos::TaskTeam ); + + assert( !future.is_null() ); + + sched.respawn( this , future ); } - Kokkos::memory_fence(); + return; } + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { result[i] = i + 1; } + ); + + if ( member.team_rank() == 0 ) { + final = result[nvalue]; + } + + Kokkos::memory_fence(); + } + static void run( long n ) - { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - const unsigned memory_capacity = 100000 ; + { + //const unsigned memory_capacity = 10000; // Causes memory pool infinite loop. + const unsigned memory_capacity = 100000; - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); + sched_type root_sched( typename sched_type::memory_space() + , memory_capacity ); - view_type root_result("result",n+1); + view_type root_result( "result", n + 1 ); - typename view_type::HostMirror - host_result = Kokkos::create_mirror_view( root_result ); + typename view_type::HostMirror host_result = Kokkos::create_mirror_view( root_result ); - future_type fv = root_policy.host_spawn - ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam ); + future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_sched, root_result, n ) + , Kokkos::TaskTeam ); - Kokkos::wait( root_policy ); + Kokkos::wait( root_sched ); - Kokkos::deep_copy( host_result , root_result ); + Kokkos::deep_copy( host_result, root_result ); - if ( fv.get() != n + 1 ) { - std::cerr << "TestTaskTeamValue ERROR future = " - << fv.get() << " != " << n + 1 << std::endl ; - } - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i + 1 ; - if ( host_result(i) != answer ) { - std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " - << host_result(i) << " != " << answer << std::endl ; - } + if ( fv.get() != n + 1 ) { + std::cerr << "TestTaskTeamValue ERROR future = " + << fv.get() << " != " << n + 1 << std::endl; + } + + for ( long i = 0; i <= n; ++i ) { + const long answer = i + 1; + + if ( host_result( i ) != answer ) { + std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " + << host_result( i ) << " != " << answer << std::endl; } } + } }; + } // namespace TestTaskScheduler -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP */ - +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) +#endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index bcf4d3a173..11a523921d 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -48,177 +48,169 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template< class ExecSpace, class ScheduleType > struct TestTeamPolicy { + typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member; + typedef Kokkos::View< int**, ExecSpace > view_type; - typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ; - typedef Kokkos::View view_type ; - - view_type m_flags ; + view_type m_flags; TestTeamPolicy( const size_t league_size ) - : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags") - , Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ) - , league_size ) - {} + : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), + Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ), + league_size ) {} struct VerifyInitTag {}; KOKKOS_INLINE_FUNCTION void operator()( const team_member & member ) const - { - const int tid = member.team_rank() + member.team_size() * member.league_rank(); + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); - m_flags( member.team_rank() , member.league_rank() ) = tid ; - } + m_flags( member.team_rank(), member.league_rank() ) = tid; + } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyInitTag & , const team_member & member ) const - { - const int tid = member.team_rank() + member.team_size() * member.league_rank(); + void operator()( const VerifyInitTag &, const team_member & member ) const + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); - if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) { - printf("TestTeamPolicy member(%d,%d) error %d != %d\n" - , member.league_rank() , member.team_rank() - , tid , m_flags( member.team_rank() , member.league_rank() ) ); - } + if ( tid != m_flags( member.team_rank(), member.league_rank() ) ) { + printf( "TestTeamPolicy member(%d,%d) error %d != %d\n", + member.league_rank(), member.team_rank(), + tid, m_flags( member.team_rank(), member.league_rank() ) ); } + } - // included for test_small_league_size - TestTeamPolicy() - : m_flags() - {} + // Included for test_small_league_size. + TestTeamPolicy() : m_flags() {} + + // Included for test_small_league_size. + struct NoOpTag {}; - // included for test_small_league_size - struct NoOpTag {} ; KOKKOS_INLINE_FUNCTION - void operator()( const NoOpTag & , const team_member & member ) const - {} + void operator()( const NoOpTag &, const team_member & member ) const {} static void test_small_league_size() { - int bs = 8; // batch size (number of elements per batch) int ns = 16; // total number of "problems" to process - // calculate total scratch memory space size + // Calculate total scratch memory space size. const int level = 0; int mem_size = 960; - const int num_teams = ns/bs; - const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO()); + const int num_teams = ns / bs; + const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy( num_teams, Kokkos::AUTO() ); - Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0)) - , TestTeamPolicy() - ); + Kokkos::parallel_for( policy.set_scratch_size( level, Kokkos::PerTeam( mem_size ), Kokkos::PerThread( 0 ) ), + TestTeamPolicy() ); } static void test_for( const size_t league_size ) - { - TestTeamPolicy functor( league_size ); + { + TestTeamPolicy functor( league_size ); - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor ); + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor ); + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace, VerifyInitTag >( league_size, team_size ), functor ); - test_small_league_size(); - } + test_small_league_size(); + } struct ReduceTag {}; - typedef long value_type ; + typedef long value_type; KOKKOS_INLINE_FUNCTION - void operator()( const team_member & member , value_type & update ) const - { - update += member.team_rank() + member.team_size() * member.league_rank(); - } + void operator()( const team_member & member, value_type & update ) const + { + update += member.team_rank() + member.team_size() * member.league_rank(); + } KOKKOS_INLINE_FUNCTION - void operator()( const ReduceTag & , const team_member & member , value_type & update ) const - { - update += 1 + member.team_rank() + member.team_size() * member.league_rank(); - } + void operator()( const ReduceTag &, const team_member & member, value_type & update ) const + { + update += 1 + member.team_rank() + member.team_size() * member.league_rank(); + } static void test_reduce( const size_t league_size ) - { - TestTeamPolicy functor( league_size ); + { + TestTeamPolicy functor( league_size ); - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); - const long N = team_size * league_size ; + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const long N = team_size * league_size; - long total = 0 ; + long total = 0; - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total ); - ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) ); + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor, total ); + ASSERT_EQ( size_t( ( N - 1 ) * ( N ) ) / 2, size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total ); - ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) ); - } + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace, ReduceTag >( league_size, team_size ), functor, total ); + ASSERT_EQ( ( size_t( N ) * size_t( N + 1 ) ) / 2, size_t( total ) ); + } }; -} -} +} // namespace + +} // namespace Test /*--------------------------------------------------------------------------*/ namespace Test { -template< typename ScalarType , class DeviceType, class ScheduleType > +template< typename ScalarType, class DeviceType, class ScheduleType > class ReduceTeamFunctor { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef typename execution_space::size_type size_type; struct value_type { - ScalarType value[3] ; + ScalarType value[3]; }; - const size_type nwork ; + const size_type nwork; ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} - ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) - : nwork( rhs.nwork ) {} + ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) : nwork( rhs.nwork ) {} KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { - dst.value[0] = 0 ; - dst.value[1] = 0 ; - dst.value[2] = 0 ; + dst.value[0] = 0; + dst.value[1] = 0; + dst.value[2] = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst , - const volatile value_type & src ) const + void join( volatile value_type & dst, const volatile value_type & src ) const { - dst.value[0] += src.value[0] ; - dst.value[1] += src.value[1] ; - dst.value[2] += src.value[2] ; + dst.value[0] += src.value[0]; + dst.value[1] += src.value[1]; + dst.value[2] += src.value[2]; } KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type ind , value_type & dst ) const + void operator()( const typename policy_type::member_type ind, value_type & dst ) const { const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); const int thread_size = ind.team_size() * ind.league_size(); - const int chunk = ( nwork + thread_size - 1 ) / thread_size ; + const int chunk = ( nwork + thread_size - 1 ) / thread_size; - size_type iwork = chunk * thread_rank ; - const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ; + size_type iwork = chunk * thread_rank; + const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork; - for ( ; iwork < iwork_end ; ++iwork ) { - dst.value[0] += 1 ; - dst.value[1] += iwork + 1 ; - dst.value[2] += nwork - iwork ; + for ( ; iwork < iwork_end; ++iwork ) { + dst.value[0] += 1; + dst.value[1] += iwork + 1; + dst.value[2] += nwork - iwork; } } }; @@ -227,58 +219,53 @@ public: namespace { -template< typename ScalarType , class DeviceType, class ScheduleType > +template< typename ScalarType, class DeviceType, class ScheduleType > class TestReduceTeam { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef typename execution_space::size_type size_type; - //------------------------------------ - - TestReduceTeam( const size_type & nwork ) - { - run_test(nwork); - } + TestReduceTeam( const size_type & nwork ) { run_test( nwork ); } void run_test( const size_type & nwork ) { - typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ; - typedef typename functor_type::value_type value_type ; - typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ; + typedef Test::ReduceTeamFunctor< ScalarType, execution_space, ScheduleType> functor_type; + typedef typename functor_type::value_type value_type; + typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) ); - const unsigned league_size = ( nwork + team_size - 1 ) / team_size ; + const unsigned team_size = policy_type::team_size_recommended( functor_type( nwork ) ); + const unsigned league_size = ( nwork + team_size - 1 ) / team_size; - policy_type team_exec( league_size , team_size ); + policy_type team_exec( league_size, team_size ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned i = 0; i < Repeat; ++i ) { result_type tmp( & result[i] ); - Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp ); + Kokkos::parallel_reduce( team_exec, functor_type( nwork ), tmp ); } execution_space::fence(); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i].value[j] ); } } } }; -} +} // namespace /*--------------------------------------------------------------------------*/ @@ -288,53 +275,51 @@ template< class DeviceType, class ScheduleType > class ScanTeamFunctor { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef long int value_type; - typedef long int value_type ; - Kokkos::View< value_type , execution_space > accum ; - Kokkos::View< value_type , execution_space > total ; + Kokkos::View< value_type, execution_space > accum; + Kokkos::View< value_type, execution_space > total; - ScanTeamFunctor() : accum("accum"), total("total") {} + ScanTeamFunctor() : accum( "accum" ), total( "total" ) {} KOKKOS_INLINE_FUNCTION - void init( value_type & error ) const { error = 0 ; } + void init( value_type & error ) const { error = 0; } KOKKOS_INLINE_FUNCTION - void join( value_type volatile & error , - value_type volatile const & input ) const - { if ( input ) error = 1 ; } + void join( value_type volatile & error, value_type volatile const & input ) const + { if ( input ) error = 1; } struct JoinMax { - typedef long int value_type ; + typedef long int value_type; + KOKKOS_INLINE_FUNCTION - void join( value_type volatile & dst - , value_type volatile const & input ) const - { if ( dst < input ) dst = input ; } + void join( value_type volatile & dst, value_type volatile const & input ) const + { if ( dst < input ) dst = input; } }; KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type ind , value_type & error ) const + void operator()( const typename policy_type::member_type ind, value_type & error ) const { if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) { const long int thread_count = ind.league_size() * ind.team_size(); - total() = ( thread_count * ( thread_count + 1 ) ) / 2 ; + total() = ( thread_count * ( thread_count + 1 ) ) / 2; } // Team max: - const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() ); + const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ), JoinMax() ); if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) { - printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n" - , ind.league_rank(), ind.team_rank() - , ind.league_size(), ind.team_size() - , (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m ); + printf( "ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n", + ind.league_rank(), ind.team_rank(), + ind.league_size(), ind.team_size(), + (long int) ( ind.league_rank() + ( ind.team_size() - 1 ) ), m ); } // Scan: const long int answer = - ( ind.league_rank() + 1 ) * ind.team_rank() + - ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ; + ( ind.league_rank() + 1 ) * ind.team_rank() + ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2; const long int result = ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); @@ -343,16 +328,17 @@ public: ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); if ( answer != result || answer != result2 ) { - printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", - ind.league_rank(), ind.team_rank(), - ind.league_size(), ind.team_size(), - answer,result,result2); - error = 1 ; + printf( "ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", + ind.league_rank(), ind.team_rank(), + ind.league_size(), ind.team_size(), + answer, result, result2 ); + + error = 1; } const long int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); - ind.team_scan( 1 + thread_rank , accum.ptr_on_device() ); + ind.team_scan( 1 + thread_rank, accum.ptr_on_device() ); } }; @@ -360,47 +346,45 @@ template< class DeviceType, class ScheduleType > class TestScanTeam { public: - typedef DeviceType execution_space ; - typedef long int value_type ; + typedef DeviceType execution_space; + typedef long int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef Test::ScanTeamFunctor functor_type; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef Test::ScanTeamFunctor functor_type ; - - //------------------------------------ - - TestScanTeam( const size_t nteam ) - { - run_test(nteam); - } + TestScanTeam( const size_t nteam ) { run_test( nteam ); } void run_test( const size_t nteam ) { - typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; - const unsigned REPEAT = 100000 ; + typedef Kokkos::View< long int, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; + + const unsigned REPEAT = 100000; unsigned Repeat; - if ( nteam == 0 ) - { + + if ( nteam == 0 ) { Repeat = 1; - } else { - Repeat = ( REPEAT + nteam - 1 ) / nteam ; //error here + } + else { + Repeat = ( REPEAT + nteam - 1 ) / nteam; // Error here. } - functor_type functor ; + functor_type functor; - policy_type team_exec( nteam , policy_type::team_size_max( functor ) ); + policy_type team_exec( nteam, policy_type::team_size_max( functor ) ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - long int accum = 0 ; - long int total = 0 ; - long int error = 0 ; - Kokkos::deep_copy( functor.accum , total ); - Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) ); + for ( unsigned i = 0; i < Repeat; ++i ) { + long int accum = 0; + long int total = 0; + long int error = 0; + Kokkos::deep_copy( functor.accum, total ); + + Kokkos::parallel_reduce( team_exec, functor, result_type( & error ) ); DeviceType::fence(); - Kokkos::deep_copy( accum , functor.accum ); - Kokkos::deep_copy( total , functor.total ); - ASSERT_EQ( error , 0 ); - ASSERT_EQ( total , accum ); + Kokkos::deep_copy( accum, functor.accum ); + Kokkos::deep_copy( total, functor.total ); + + ASSERT_EQ( error, 0 ); + ASSERT_EQ( total, accum ); } execution_space::fence(); @@ -416,18 +400,18 @@ namespace Test { template< class ExecSpace, class ScheduleType > struct SharedTeamFunctor { - typedef ExecSpace execution_space ; - typedef int value_type ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef ExecSpace execution_space; + typedef int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; enum { SHARED_COUNT = 1000 }; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< int*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; - // Tell how much shared memory will be required by this functor: + // Tell how much shared memory will be required by this functor. inline unsigned team_shmem_size( int team_size ) const { @@ -436,19 +420,26 @@ struct SharedTeamFunctor { } KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type & ind , value_type & update ) const + void operator()( const typename policy_type::member_type & ind, value_type & update ) const { - const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); - const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); + const shared_int_array_type shared_A( ind.team_shmem(), SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem(), SHARED_COUNT ); - if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || - (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast (SHARED_COUNT)); - ++update; // failure to allocate is an error + if ( ( shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0 ) || + ( shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0 ) ) + { + printf ("member( %d/%d , %d/%d ) Failed to allocate shared memory of size %lu\n" + , ind.league_rank() + , ind.league_size() + , ind.team_rank() + , ind.team_size() + , static_cast( SHARED_COUNT ) + ); + + ++update; // Failure to allocate is an error. } else { - for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + for ( int i = ind.team_rank(); i < SHARED_COUNT; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } @@ -456,12 +447,13 @@ struct SharedTeamFunctor { ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { - for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + for ( int i = 0; i < SHARED_COUNT; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { - ++update ; + ++update; } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { - ++update ; + ++update; } } } @@ -469,78 +461,79 @@ struct SharedTeamFunctor { } }; -} +} // namespace Test namespace { template< class ExecSpace, class ScheduleType > struct TestSharedTeam { - - TestSharedTeam() - { run(); } + TestSharedTeam() { run(); } void run() { - typedef Test::SharedTeamFunctor Functor ; - typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + typedef Test::SharedTeamFunctor Functor; + typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; - const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); + const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); - typename Functor::value_type error_count = 0 ; + typename Functor::value_type error_count = 0; - Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) ); + Kokkos::parallel_reduce( team_exec, Functor(), result_type( & error_count ) ); - ASSERT_EQ( error_count , 0 ); + ASSERT_EQ( error_count, 0 ); } }; -} + +} // namespace namespace Test { -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) template< class MemorySpace, class ExecSpace, class ScheduleType > struct TestLambdaSharedTeam { - - TestLambdaSharedTeam() - { run(); } + TestLambdaSharedTeam() { run(); } void run() { - typedef Test::SharedTeamFunctor Functor ; - //typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; - typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ; + typedef Test::SharedTeamFunctor< ExecSpace, ScheduleType > Functor; + //typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; + typedef Kokkos::View< typename Functor::value_type, MemorySpace, Kokkos::MemoryUnmanaged > result_type; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< int*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; const int SHARED_COUNT = 1000; int team_size = 1; + #ifdef KOKKOS_ENABLE_CUDA - if(std::is_same::value) - team_size = 128; + if ( std::is_same< ExecSpace, Kokkos::Cuda >::value ) team_size = 128; #endif - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size); - team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int))); - typename Functor::value_type error_count = 0 ; + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); + team_exec = team_exec.set_scratch_size( 0, Kokkos::PerTeam( SHARED_COUNT * 2 * sizeof( int ) ) ); - Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA - ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) { + typename Functor::value_type error_count = 0; - const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); - const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); + Kokkos::parallel_reduce( team_exec, KOKKOS_LAMBDA + ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind, int & update ) + { + const shared_int_array_type shared_A( ind.team_shmem(), SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem(), SHARED_COUNT ); - if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || - (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast (SHARED_COUNT)); - ++update; // failure to allocate is an error - } else { - for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + if ( ( shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0 ) || + ( shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0 ) ) + { + printf( "Failed to allocate shared memory of size %lu\n", + static_cast( SHARED_COUNT ) ); + + ++update; // Failure to allocate is an error. + } + else { + for ( int i = ind.team_rank(); i < SHARED_COUNT; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } @@ -548,196 +541,213 @@ struct TestLambdaSharedTeam { ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { - for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + for ( int i = 0; i < SHARED_COUNT; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { - ++update ; + ++update; } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { - ++update ; + ++update; } } } } }, result_type( & error_count ) ); - ASSERT_EQ( error_count , 0 ); + ASSERT_EQ( error_count, 0 ); } }; #endif -} + +} // namespace Test namespace Test { template< class ExecSpace, class ScheduleType > struct ScratchTeamFunctor { - typedef ExecSpace execution_space ; - typedef int value_type ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef ExecSpace execution_space; + typedef int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; enum { SHARED_TEAM_COUNT = 100 }; enum { SHARED_THREAD_COUNT = 10 }; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< size_t*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type & ind , value_type & update ) const + void operator()( const typename policy_type::member_type & ind, value_type & update ) const { - const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 3*ind.team_size() ); - const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT ); - const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT ); + const shared_int_array_type scratch_ptr( ind.team_scratch( 1 ), 3 * ind.team_size() ); + const shared_int_array_type scratch_A( ind.team_scratch( 1 ), SHARED_TEAM_COUNT ); + const shared_int_array_type scratch_B( ind.thread_scratch( 1 ), SHARED_THREAD_COUNT ); - if ((scratch_ptr.ptr_on_device () == NULL ) || - (scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) || - (scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast (SHARED_TEAM_COUNT)); - ++update; // failure to allocate is an error + if ( ( scratch_ptr.ptr_on_device () == NULL ) || + ( scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0 ) || + ( scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0 ) ) + { + printf( "Failed to allocate shared memory of size %lu\n", + static_cast( SHARED_TEAM_COUNT ) ); + + ++update; // Failure to allocate is an error. } else { - Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) { + Kokkos::parallel_for( Kokkos::TeamThreadRange( ind, 0, (int) SHARED_TEAM_COUNT ), [&] ( const int & i ) { scratch_A[i] = i + ind.league_rank(); }); - for(int i=0; i struct TestScratchTeam { - - TestScratchTeam() - { run(); } + TestScratchTeam() { run(); } void run() { - typedef Test::ScratchTeamFunctor Functor ; - typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + typedef Test::ScratchTeamFunctor Functor; + typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); - typename Functor::value_type error_count = 0 ; + typename Functor::value_type error_count = 0; - int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + - Functor::shared_int_array_type::shmem_size(3*team_size); - int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT); - Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size), - Kokkos::PerThread(thread_scratch_size)) , - Functor() , result_type( & error_count ) ); + int team_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) + + Functor::shared_int_array_type::shmem_size( 3 * team_size ); - ASSERT_EQ( error_count , 0 ); + int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT ); + + Kokkos::parallel_reduce( team_exec.set_scratch_size( 0, Kokkos::PerTeam( team_scratch_size ), + Kokkos::PerThread( thread_scratch_size ) ), + Functor(), result_type( & error_count ) ); + + ASSERT_EQ( error_count, 0 ); } }; -} + +} // namespace namespace Test { -template< class ExecSpace> + +template< class ExecSpace > KOKKOS_INLINE_FUNCTION -int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy::member_type& team) { - Kokkos::View> a_team1(team.team_scratch(0),128); - Kokkos::View> a_thread1(team.thread_scratch(0),16); - Kokkos::View> a_team2(team.team_scratch(0),128); - Kokkos::View> a_thread2(team.thread_scratch(0),16); +int test_team_mulit_level_scratch_loop_body( const typename Kokkos::TeamPolicy::member_type& team ) { + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_team1( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_thread1( team.thread_scratch( 0 ), 16 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_team2( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_thread2( team.thread_scratch( 0 ), 16 ); - Kokkos::View> b_team1(team.team_scratch(1),128000); - Kokkos::View> b_thread1(team.thread_scratch(1),16000); - Kokkos::View> b_team2(team.team_scratch(1),128000); - Kokkos::View> b_thread2(team.thread_scratch(1),16000); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_team1( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_thread1( team.thread_scratch( 1 ), 16000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_team2( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_thread2( team.thread_scratch( 1 ), 16000 ); - Kokkos::View> a_team3(team.team_scratch(0),128); - Kokkos::View> a_thread3(team.thread_scratch(0),16); - Kokkos::View> b_team3(team.team_scratch(1),128000); - Kokkos::View> b_thread3(team.thread_scratch(1),16000); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_team3( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > a_thread3( team.thread_scratch( 0 ), 16 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_team3( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits > b_thread3( team.thread_scratch( 1 ), 16000 ); // The explicit types for 0 and 128 are here to test TeamThreadRange accepting different // types for begin and end. - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,int(0),unsigned(128)), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, int( 0 ), unsigned( 128 ) ), [&] ( const int & i ) { - a_team1(i) = 1000000 + i; - a_team2(i) = 2000000 + i; - a_team3(i) = 3000000 + i; + a_team1( i ) = 1000000 + i + team.league_rank() * 100000; + a_team2( i ) = 2000000 + i + team.league_rank() * 100000; + a_team3( i ) = 3000000 + i + team.league_rank() * 100000; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16 ), [&] ( const int & i ) { - a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + a_thread1( i ) = 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + a_thread2( i ) = 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + a_thread3( i ) = 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128000 ), [&] ( const int & i ) { - b_team1(i) = 1000000 + i; - b_team2(i) = 2000000 + i; - b_team3(i) = 3000000 + i; + b_team1( i ) = 1000000 + i + team.league_rank() * 100000; + b_team2( i ) = 2000000 + i + team.league_rank() * 100000; + b_team3( i ) = 3000000 + i + team.league_rank() * 100000; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16000 ), [&] ( const int & i ) { - b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + b_thread1( i ) = 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + b_thread2( i ) = 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + b_thread3( i ) = 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; }); team.team_barrier(); + int error = 0; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128 ), [&] ( const int & i ) { - if(a_team1(i) != 1000000 + i) error++; - if(a_team2(i) != 2000000 + i) error++; - if(a_team3(i) != 3000000 + i) error++; + if ( a_team1( i ) != 1000000 + i + team.league_rank() * 100000 ) error++; + if ( a_team2( i ) != 2000000 + i + team.league_rank() * 100000 ) error++; + if ( a_team3( i ) != 3000000 + i + team.league_rank() * 100000 ) error++; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16 ), [&] ( const int & i ) { - if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + if ( a_thread1( i ) != 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( a_thread2( i ) != 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( a_thread3( i ) != 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128000 ), [&] ( const int & i ) { - if(b_team1(i) != 1000000 + i) error++; - if(b_team2(i) != 2000000 + i) error++; - if(b_team3(i) != 3000000 + i) error++; + if ( b_team1( i ) != 1000000 + i + team.league_rank() * 100000 ) error++; + if ( b_team2( i ) != 2000000 + i + team.league_rank() * 100000 ) error++; + if ( b_team3( i ) != 3000000 + i + team.league_rank() * 100000 ) error++; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16000 ), [&] ( const int & i ) { - if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + if ( b_thread1( i ) != 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( b_thread2( i ) != 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( b_thread3( i ) != 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; }); return error; @@ -748,93 +758,107 @@ struct TagFor {}; template< class ExecSpace, class ScheduleType > struct ClassNoShmemSizeFunction { - Kokkos::View > errors; + typedef typename Kokkos::TeamPolicy< ExecSpace, ScheduleType >::member_type member_type; + + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits > errors; KOKKOS_INLINE_FUNCTION - void operator() (const TagFor&, const typename Kokkos::TeamPolicy::member_type& team) const { - int error = test_team_mulit_level_scratch_loop_body(team); + void operator()( const TagFor &, const member_type & team ) const { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; } KOKKOS_INLINE_FUNCTION - void operator() (const TagReduce&, const typename Kokkos::TeamPolicy::member_type& team, int& error) const { - error += test_team_mulit_level_scratch_loop_body(team); + void operator() ( const TagReduce &, const member_type & team, int & error ) const { + error += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); } void run() { - Kokkos::View d_errors = Kokkos::View("Errors"); + Kokkos::View< int, ExecSpace > d_errors = Kokkos::View< int, ExecSpace >( "Errors" ); errors = d_errors; - const int per_team0 = 3*Kokkos::View>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View>::shmem_size(16); + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16 ); + + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16000 ); - const int per_team1 = 3*Kokkos::View>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); { - Kokkos::TeamPolicy policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this); - Kokkos::fence(); - typename Kokkos::View::HostMirror h_errors = Kokkos::create_mirror_view(d_errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + Kokkos::TeamPolicy< TagFor, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_for( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this ); + Kokkos::fence(); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( d_errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); } { - int error = 0; - Kokkos::TeamPolicy policy(10,8,16); - Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this,error); - Kokkos::fence(); - ASSERT_EQ(error,0); + int error = 0; + Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); + Kokkos::fence(); + + ASSERT_EQ( error, 0 ); } }; }; template< class ExecSpace, class ScheduleType > struct ClassWithShmemSizeFunction { - Kokkos::View > errors; + typedef typename Kokkos::TeamPolicy< ExecSpace, ScheduleType >::member_type member_type; + + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits > errors; KOKKOS_INLINE_FUNCTION - void operator() (const TagFor&, const typename Kokkos::TeamPolicy::member_type& team) const { - int error = test_team_mulit_level_scratch_loop_body(team); + void operator()( const TagFor &, const member_type & team ) const { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; } KOKKOS_INLINE_FUNCTION - void operator() (const TagReduce&, const typename Kokkos::TeamPolicy::member_type& team, int& error) const { - error += test_team_mulit_level_scratch_loop_body(team); + void operator() ( const TagReduce &, const member_type & team, int & error ) const { + error += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); } void run() { - Kokkos::View d_errors = Kokkos::View("Errors"); + Kokkos::View< int, ExecSpace > d_errors = Kokkos::View< int, ExecSpace >( "Errors" ); errors = d_errors; - const int per_team1 = 3*Kokkos::View>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16000 ); + { - Kokkos::TeamPolicy policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this); - Kokkos::fence(); - typename Kokkos::View::HostMirror h_errors= Kokkos::create_mirror_view(d_errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + Kokkos::TeamPolicy< TagFor, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_for( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), + Kokkos::PerThread( per_thread1 ) ), + *this ); + Kokkos::fence(); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( d_errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); } { - int error = 0; - Kokkos::TeamPolicy policy(10,8,16); - Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this,error); - Kokkos::fence(); - ASSERT_EQ(error,0); + int error = 0; + Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_reduce( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), + Kokkos::PerThread( per_thread1 ) ), + *this, error ); + Kokkos::fence(); + + ASSERT_EQ( error, 0 ); } }; - unsigned team_shmem_size(int team_size) const { - const int per_team0 = 3*Kokkos::View>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View>::shmem_size(16); + unsigned team_shmem_size( int team_size ) const { + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16 ); return per_team0 + team_size * per_thread0; } }; @@ -842,67 +866,68 @@ struct ClassWithShmemSizeFunction { template< class ExecSpace, class ScheduleType > void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - Kokkos::View > errors; - Kokkos::View d_errors("Errors"); + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits > errors; + Kokkos::View< int, ExecSpace > d_errors( "Errors" ); errors = d_errors; - const int per_team0 = 3*Kokkos::View>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View>::shmem_size(16); + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16 ); - const int per_team1 = 3*Kokkos::View>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits >::shmem_size( 16000 ); - Kokkos::TeamPolicy policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team) { - int error = test_team_mulit_level_scratch_loop_body(team); + Kokkos::TeamPolicy< ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_for( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), + KOKKOS_LAMBDA ( const typename Kokkos::TeamPolicy< ExecSpace >::member_type & team ) + { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; }); Kokkos::fence(); - typename Kokkos::View::HostMirror h_errors= Kokkos::create_mirror_view(errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); int error = 0; - Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team, int& count) { - count += test_team_mulit_level_scratch_loop_body(team); - },error); - ASSERT_EQ(error,0); + Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), + KOKKOS_LAMBDA ( const typename Kokkos::TeamPolicy< ExecSpace >::member_type & team, int & count ) + { + count += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); + }, error ); + ASSERT_EQ( error, 0 ); Kokkos::fence(); #endif } - -} +} // namespace Test namespace { + template< class ExecSpace, class ScheduleType > struct TestMultiLevelScratchTeam { - - TestMultiLevelScratchTeam() - { run(); } + TestMultiLevelScratchTeam() { run(); } void run() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - Test::test_team_mulit_level_scratch_test_lambda(); + Test::test_team_mulit_level_scratch_test_lambda< ExecSpace, ScheduleType >(); #endif - Test::ClassNoShmemSizeFunction c1; + Test::ClassNoShmemSizeFunction< ExecSpace, ScheduleType > c1; c1.run(); - Test::ClassWithShmemSizeFunction c2; + Test::ClassWithShmemSizeFunction< ExecSpace, ScheduleType > c2; c2.run(); - } }; -} + +} // namespace namespace Test { template< class ExecSpace > struct TestShmemSize { - TestShmemSize() { run(); } void run() @@ -915,9 +940,8 @@ struct TestShmemSize { size_t size = view_type::shmem_size( d1, d2, d3 ); - ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) ); + ASSERT_EQ( size, d1 * d2 * d3 * sizeof( long ) ); } }; -} -/*--------------------------------------------------------------------------*/ +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index d9b06c29e4..8d16ac66db 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,36 +50,47 @@ namespace TestTeamVector { struct my_complex { - double re,im; + double re, im; int dummy; + KOKKOS_INLINE_FUNCTION my_complex() { re = 0.0; im = 0.0; dummy = 0; } + KOKKOS_INLINE_FUNCTION - my_complex(const my_complex& src) { + my_complex( const my_complex & src ) { re = src.re; im = src.im; dummy = src.dummy; } KOKKOS_INLINE_FUNCTION - my_complex(const volatile my_complex& src) { + my_complex & operator=( const my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const volatile my_complex & src ) { re = src.re; im = src.im; dummy = src.dummy; } KOKKOS_INLINE_FUNCTION - my_complex(const double& val) { + my_complex( const double & val ) { re = val; im = 0.0; dummy = 0; } + KOKKOS_INLINE_FUNCTION - my_complex& operator += (const my_complex& src) { + my_complex & operator+=( const my_complex & src ) { re += src.re; im += src.im; dummy += src.dummy; @@ -87,252 +98,124 @@ struct my_complex { } KOKKOS_INLINE_FUNCTION - void operator += (const volatile my_complex& src) volatile { + void operator+=( const volatile my_complex & src ) volatile { re += src.re; im += src.im; dummy += src.dummy; } + KOKKOS_INLINE_FUNCTION - my_complex& operator *= (const my_complex& src) { - double re_tmp = re*src.re - im*src.im; + my_complex & operator*=( const my_complex & src ) { + double re_tmp = re * src.re - im * src.im; double im_tmp = re * src.im + im * src.re; re = re_tmp; im = im_tmp; dummy *= src.dummy; return *this; } + KOKKOS_INLINE_FUNCTION - void operator *= (const volatile my_complex& src) volatile { - double re_tmp = re*src.re - im*src.im; + void operator*=( const volatile my_complex & src ) volatile { + double re_tmp = re * src.re - im * src.im; double im_tmp = re * src.im + im * src.re; re = re_tmp; im = im_tmp; dummy *= src.dummy; } + KOKKOS_INLINE_FUNCTION - bool operator == (const my_complex& src) { - return (re == src.re) && (im == src.im) && ( dummy == src.dummy ); + bool operator==( const my_complex & src ) { + return ( re == src.re ) && ( im == src.im ) && ( dummy == src.dummy ); } + KOKKOS_INLINE_FUNCTION - bool operator != (const my_complex& src) { - return (re != src.re) || (im != src.im) || ( dummy != src.dummy ); + bool operator!=( const my_complex & src ) { + return ( re != src.re ) || ( im != src.im ) || ( dummy != src.dummy ); } + KOKKOS_INLINE_FUNCTION - bool operator != (const double& val) { - return (re != val) || - (im != 0) || (dummy != 0); + bool operator!=( const double & val ) { + return ( re != val ) || ( im != 0 ) || ( dummy != 0 ); } + KOKKOS_INLINE_FUNCTION - my_complex& operator= (const int& val) { + my_complex & operator=( const int & val ) { re = val; im = 0.0; dummy = 0; return *this; } + KOKKOS_INLINE_FUNCTION - my_complex& operator= (const double& val) { + my_complex & operator=( const double & val ) { re = val; im = 0.0; dummy = 0; return *this; } + KOKKOS_INLINE_FUNCTION operator double() { return re; } }; -template +template< typename Scalar, class ExecutionSpace > struct functor_team_for { - typedef Kokkos::TeamPolicy policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_team_for(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View shared_int; + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; typedef typename shared_int::size_type size_type; - const size_type shmemSize = team.team_size () * 13; - shared_int values = shared_int (team.team_shmem (), shmemSize); + const size_type shmemSize = team.team_size() * 13; + shared_int values = shared_int( team.team_shmem(), shmemSize ); - if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { - printf ("FAILED to allocate shared memory of size %u\n", - static_cast (shmemSize)); + if ( values.ptr_on_device() == NULL || values.dimension_0() < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast( shmemSize ) ); } else { + // Initialize shared memory. + values( team.team_rank() ) = 0; - // Initialize shared memory - values(team.team_rank ()) = 0; - - // Accumulate value into per thread shared memory - // This is non blocking - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) + // Accumulate value into per thread shared memory. + // This is non blocking. + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i ) { - values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); - }); - // Wait for all memory to be written - team.team_barrier (); - // One thread per team executes the comparison - Kokkos::single(Kokkos::PerTeam(team),[&]() - { - Scalar test = 0; - Scalar value = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - for (int i = 0; i < team.team_size (); ++i) { - value += values(i); - } - if (test != value) { - printf ("FAILED team_parallel_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value)); - flag() = 1; - } - }); - } - } -}; - -template -struct functor_team_reduce { - typedef Kokkos::TeamPolicy policy_type; - typedef ExecutionSpace execution_space; - - Kokkos::View flag; - functor_team_reduce(Kokkos::View flag_):flag(flag_) {} - - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} - - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - },value); - - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() - { - Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - if (test != value) { - if(team.league_rank() == 0) - printf ("FAILED team_parallel_reduce %i %i %f %f %lu\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value),sizeof(Scalar)); - flag() = 1; - } - }); - } -}; - -template -struct functor_team_reduce_join { - typedef Kokkos::TeamPolicy policy_type; - typedef ExecutionSpace execution_space; - - Kokkos::View flag; - functor_team_reduce_join(Kokkos::View flag_):flag(flag_) {} - - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} - - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - Scalar value = 0; - - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - } - , [&] (volatile Scalar& val, const volatile Scalar& src) - {val+=src;} - , value - ); - - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() - { - Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - if (test != value) { - printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value)); - flag() = 1; - } - }); - } -}; - -template -struct functor_team_vector_for { - typedef Kokkos::TeamPolicy policy_type; - typedef ExecutionSpace execution_space; - - Kokkos::View flag; - functor_team_vector_for(Kokkos::View flag_):flag(flag_) {} - - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} - - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View shared_int; - typedef typename shared_int::size_type size_type; - - const size_type shmemSize = team.team_size () * 13; - shared_int values = shared_int (team.team_shmem (), shmemSize); - - if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { - printf ("FAILED to allocate shared memory of size %u\n", - static_cast (shmemSize)); - } - else { - Kokkos::single(Kokkos::PerThread(team),[&] () - { - values(team.team_rank ()) = 0; + values( team.team_rank() ) += i - team.league_rank() + team.league_size() + team.team_size(); }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) - { - Kokkos::single(Kokkos::PerThread(team),[&] () - { - values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); - }); - }); + // Wait for all memory to be written. + team.team_barrier(); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + // One thread per team executes the comparison. + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; Scalar value = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - for (int i = 0; i < team.team_size (); ++i) { - value += values(i); + + for ( int i = 0; i < team.team_size(); ++i ) { + value += values( i ); } - if (test != value) { - printf ("FAILED team_vector_parallel_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value)); + + if ( test != value ) { + printf ( "FAILED team_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); flag() = 1; } }); @@ -340,164 +223,153 @@ struct functor_team_vector_for { } }; -template -struct functor_team_vector_reduce { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_team_reduce { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_team_vector_reduce(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - + void operator()( typename policy_type::member_type team ) const { Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - },value); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if (test != value) { - if(team.league_rank() == 0) - printf ("FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value),sizeof(Scalar)); - flag() = 1; - } - }); - } -}; -template -struct functor_team_vector_reduce_join { - typedef Kokkos::TeamPolicy policy_type; - typedef ExecutionSpace execution_space; + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_parallel_reduce %i %i %f %f %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ), sizeof( Scalar ) ); + } - Kokkos::View flag; - functor_team_vector_reduce_join(Kokkos::View flag_):flag(flag_) {} - - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} - - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - } - , [&] (volatile Scalar& val, const volatile Scalar& src) - {val+=src;} - , value - ); - - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() - { - Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - if (test != value) { - printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value)); flag() = 1; } }); } }; -template -struct functor_vec_single { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_team_reduce_join { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_vec_single(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_team_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - // Warning: this test case intentionally violates permissable semantics - // It is not valid to get references to members of the enclosing region - // inside a parallel_for and write to it. + void operator()( typename policy_type::member_type team ) const { Scalar value = 0; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { - value = i; // This write is violating Kokkos semantics for nested parallelism + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, + value + ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + + flag() = 1; + } }); - - Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) - { - val = 1; - },value); - - Scalar value2 = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) - { - val += value; - },value2); - - if(value2!=(value*13)) { - printf("FAILED vector_single broadcast %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) value2,(double) value); - flag()=1; - } } }; -template -struct functor_vec_for { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_team_vector_for { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_vec_for(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_vector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + typedef typename shared_int::size_type size_type; - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View shared_int; - shared_int values = shared_int(team.team_shmem(),team.team_size()*13); + const size_type shmemSize = team.team_size() * 13; + shared_int values = shared_int( team.team_shmem(), shmemSize ); - if (values.ptr_on_device () == NULL || - values.dimension_0() < (unsigned) team.team_size() * 13) { - printf ("FAILED to allocate memory of size %i\n", - static_cast (team.team_size () * 13)); - flag() = 1; + if ( values.ptr_on_device() == NULL || values.dimension_0() < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast( shmemSize ) ); } else { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) + team.team_barrier(); + + Kokkos::single( Kokkos::PerThread( team ), [&] () { - values(13*team.team_rank() + i) = i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + values( team.team_rank() ) = 0; }); - Kokkos::single(Kokkos::PerThread(team),[&] () + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i ) + { + Kokkos::single( Kokkos::PerThread( team ), [&] () + { + values( team.team_rank() ) += i - team.league_rank() + team.league_size() + team.team_size(); + }); + }); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; Scalar value = 0; - for (int i = 0; i < 13; ++i) { - test += i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); - value += values(13*team.team_rank() + i); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if (test != value) { - printf ("FAILED vector_par_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast (test), static_cast (value)); + + for ( int i = 0; i < team.team_size(); ++i ) { + value += values( i ); + } + + if ( test != value ) { + printf( "FAILED team_vector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + flag() = 1; } }); @@ -505,169 +377,369 @@ struct functor_vec_for { } }; -template -struct functor_vec_red { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_team_vector_reduce { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_vec_red(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_team_vector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - Scalar value = 0; + void operator()( typename policy_type::member_type team ) const { + Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { - val += i; - }, value); + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); - Kokkos::single(Kokkos::PerThread(team),[&] () + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; - for(int i = 0; i < 13; i++) { - test+=i; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if(test!=value) { - printf("FAILED vector_par_reduce %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); - flag()=1; + + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ), sizeof( Scalar ) ); + } + + flag() = 1; } }); } }; -template -struct functor_vec_red_join { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_team_vector_reduce_join { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_vec_red_join(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_team_vector_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - Scalar value = 1; + void operator()( typename policy_type::member_type team ) const { + Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13) - , [&] (int i, Scalar& val) - { val *= i; } - , [&] (Scalar& val, const Scalar& src) - {val*=src;} - , value + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, + value ); - Kokkos::single(Kokkos::PerThread(team),[&] () + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { - Scalar test = 1; - for(int i = 0; i < 13; i++) { - test*=i; + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if(test!=value) { - printf("FAILED vector_par_reduce_join %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); - flag()=1; + + if ( test != value ) { + printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + + flag() = 1; } }); } }; -template -struct functor_vec_scan { - typedef Kokkos::TeamPolicy policy_type; +template< typename Scalar, class ExecutionSpace > +struct functor_vec_single { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_vec_scan(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) + void operator()( typename policy_type::member_type team ) const { + // Warning: this test case intentionally violates permissable semantics. + // It is not valid to get references to members of the enclosing region + // inside a parallel_for and write to it. + Scalar value = 0; + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i ) + { + value = i; // This write is violating Kokkos semantics for nested parallelism. + }); + + Kokkos::single( Kokkos::PerThread( team ), [&] ( Scalar & val ) + { + val = 1; + }, value ); + + Scalar value2 = 0; + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) + { + val += value; + }, value2 ); + + if ( value2 != ( value * 13 ) ) { + printf( "FAILED vector_single broadcast %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) value2, (double) value ); + + flag() = 1; + } + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_vec_for { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + + shared_int values = shared_int( team.team_shmem(), team.team_size() * 13 ); + + if ( values.ptr_on_device() == NULL || values.dimension_0() < (unsigned) team.team_size() * 13 ) { + printf( "FAILED to allocate memory of size %i\n", static_cast( team.team_size() * 13 ) ); + flag() = 1; + } + else { + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i ) + { + values( 13 * team.team_rank() + i ) = + i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + }); + + Kokkos::single( Kokkos::PerThread( team ), [&] () + { + Scalar test = 0; + Scalar value = 0; + + for ( int i = 0; i < 13; ++i ) { + test += i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + value += values( 13 * team.team_rank() + i ); + } + + if ( test != value ) { + printf( "FAILED vector_par_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + + flag() = 1; + } + }); + } + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_vec_red { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_red( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Scalar value = 0; + + // When no reducer is given the default is summation. + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) { val += i; - if(final) { + }, value ); + + Kokkos::single( Kokkos::PerThread( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 13; i++ ) test += i; + + if ( test != value ) { + printf( "FAILED vector_par_reduce %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) value ); + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_vec_red_join { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_red_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + // Must initialize to the identity value for the reduce operation + // for this test: + // ( identity, operation ) = ( 1 , *= ) + Scalar value = 1; + + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) + { + val *= ( i % 5 + 1 ); + }, + [&] ( Scalar & val, const Scalar & src ) { val *= src; }, + value + ); + + Kokkos::single( Kokkos::PerThread( team ), [&] () + { + Scalar test = 1; + + for ( int i = 0; i < 13; i++ ) test *= ( i % 5 + 1 ); + + if ( test != value ) { + printf( "FAILED vector_par_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) value ); + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_vec_scan { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_vec_scan( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Kokkos::parallel_scan( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val, bool final ) + { + val += i; + + if ( final ) { Scalar test = 0; - for(int k = 0; k <= i; k++) { - test+=k; - } - if(test!=val) { - printf("FAILED vector_par_scan %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) val); - flag()=1; + for ( int k = 0; k <= i; k++ ) test += k; + + if ( test != val ) { + printf( "FAILED vector_par_scan %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) val ); + + flag() = 1; } } }); } }; -template +template< typename Scalar, class ExecutionSpace > struct functor_reduce { typedef double value_type; - typedef Kokkos::TeamPolicy policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View flag; - functor_reduce(Kokkos::View flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team, double& sum) const { + void operator()( typename policy_type::member_type team, double & sum ) const { sum += team.league_rank() * 100 + team.thread_rank(); } }; -template -bool test_scalar(int nteams, int team_size, int test) { - Kokkos::View d_flag("flag"); - typename Kokkos::View::HostMirror h_flag("h_flag"); - h_flag() = 0 ; - Kokkos::deep_copy(d_flag,h_flag); - - if(test==0) - Kokkos::parallel_for( std::string("A") , Kokkos::TeamPolicy(nteams,team_size,8), - functor_vec_red(d_flag)); - if(test==1) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_vec_red_join(d_flag)); - if(test==2) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_vec_scan(d_flag)); - if(test==3) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_vec_for(d_flag)); - if(test==4) - Kokkos::parallel_for( "B" , Kokkos::TeamPolicy(nteams,team_size,8), - functor_vec_single(d_flag)); - if(test==5) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size), - functor_team_for(d_flag)); - if(test==6) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size), - functor_team_reduce(d_flag)); - if(test==7) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size), - functor_team_reduce_join(d_flag)); - if(test==8) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_team_vector_for(d_flag)); - if(test==9) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_team_vector_reduce(d_flag)); - if(test==10) - Kokkos::parallel_for( Kokkos::TeamPolicy(nteams,team_size,8), - functor_team_vector_reduce_join(d_flag)); - - Kokkos::deep_copy(h_flag,d_flag); +template< typename Scalar, class ExecutionSpace > +bool test_scalar( int nteams, int team_size, int test ) { + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > d_flag( "flag" ); + typename Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace >::HostMirror h_flag( "h_flag" ); + h_flag() = 0; + Kokkos::deep_copy( d_flag, h_flag ); - return (h_flag() == 0); + if ( test == 0 ) { + Kokkos::parallel_for( std::string( "A" ), Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_red< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 1 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_red_join< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 2 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_scan< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 3 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 4 ) { + Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_single< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 5 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 6 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 7 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 8 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 9 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 10 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + } + + Kokkos::deep_copy( h_flag, d_flag ); + + return ( h_flag() == 0 ); } -template -bool Test(int test) { +template< class ExecutionSpace > +bool Test( int test ) { bool passed = true; - passed = passed && test_scalar(317,33,test); - passed = passed && test_scalar(317,33,test); - passed = passed && test_scalar(317,33,test); - passed = passed && test_scalar(317,33,test); - passed = passed && test_scalar(317,33,test); + passed = passed && test_scalar< int, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< long long int, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< float, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< double, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< my_complex, ExecutionSpace >( 317, 33, test ); + return passed; } -} - +} // namespace TestTeamVector diff --git a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp index 203c952679..7bcf3f8a32 100644 --- a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp +++ b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp @@ -47,152 +47,162 @@ namespace { -template +template< class Scalar, class ExecutionSpace > struct SumPlain { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View type; + typedef typename Kokkos::View< Scalar*, execution_space > type; + type view; - SumPlain(type view_):view(view_) {} + + SumPlain( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, Scalar& val) { + void operator() ( int i, Scalar & val ) { val += Scalar(); } }; -template +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueType { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumInitJoinFinalValueType(type view_):view(view_) {} + + type view; + + SumInitJoinFinalValueType( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(value_type& val) const { + void init( value_type & val ) const { val = value_type(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, volatile value_type& src) const { + void join( volatile value_type & val, volatile value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueType2 { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumInitJoinFinalValueType2(type view_):view(view_) {} + + type view; + + SumInitJoinFinalValueType2( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(volatile value_type& val) const { + void init( volatile value_type & val ) const { val = value_type(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, const volatile value_type& src) const { + void join( volatile value_type & val, const volatile value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueTypeArray { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type[]; + + type view; int n; - SumInitJoinFinalValueTypeArray(type view_, int n_):view(view_),n(n_) {} + + SumInitJoinFinalValueTypeArray( type view_, int n_ ) : view( view_ ), n( n_ ) {} KOKKOS_INLINE_FUNCTION - void init(value_type val) const { - for(int k=0;k +template< class Scalar, class ExecutionSpace > struct SumWrongInitJoinFinalValueType { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumWrongInitJoinFinalValueType(type view_):view(view_) {} + + type view; + + SumWrongInitJoinFinalValueType( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(double& val) const { + void init( double & val ) const { val = double(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, const value_type& src) const { + void join( volatile value_type & val, const value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template +template< class Scalar, class ExecutionSpace > void TestTemplateMetaFunctions() { - typedef typename Kokkos::View type; - type a("A",100); + typedef typename Kokkos::View< Scalar*, ExecutionSpace > type; + type a( "A", 100 ); /* - int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit, Scalar& >::value; - ASSERT_EQ(sum_plain_has_init_arg,0); - int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg,1); - int sum_initjoinfinalvaluetype_has_init_arg2 = Kokkos::Impl::FunctorHasInit, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg2,1); - int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit, Scalar >::value; - ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_init_arg,0); + int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit< SumPlain, Scalar & >::value; + ASSERT_EQ( sum_plain_has_init_arg, 0 ); + int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueType, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_init_arg, 1 ); + int sum_initjoinfinalvaluetype_has_init_arg2 = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueType2, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_init_arg2, 1 ); + int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit< SumWrongInitJoinFinalValueType, Scalar >::value; + ASSERT_EQ( sum_wronginitjoinfinalvaluetype_has_init_arg, 0 ); - //int sum_initjoinfinalvaluetypearray_has_init_arg = Kokkos::Impl::FunctorHasInit, Scalar[] >::value; - //ASSERT_EQ(sum_initjoinfinalvaluetypearray_has_init_arg,1); + //int sum_initjoinfinalvaluetypearray_has_init_arg = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueTypeArray, Scalar[] >::value; + //ASSERT_EQ( sum_initjoinfinalvaluetypearray_has_init_arg, 1 ); - //printf("Values Init: %i %i %i\n",sum_plain_has_init_arg,sum_initjoinfinalvaluetype_has_init_arg,sum_wronginitjoinfinalvaluetype_has_init_arg); + //printf( "Values Init: %i %i %i\n", sum_plain_has_init_arg, sum_initjoinfinalvaluetype_has_init_arg, sum_wronginitjoinfinalvaluetype_has_init_arg ); - int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin, Scalar >::value; - ASSERT_EQ(sum_plain_has_join_arg,0); - int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg,1); - int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg2,1); - int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin, Scalar >::value; - ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_join_arg,0); + int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumPlain, Scalar >::value; + ASSERT_EQ( sum_plain_has_join_arg, 0 ); + int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumInitJoinFinalValueType, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_join_arg, 1 ); + int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin< SumInitJoinFinalValueType2, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_join_arg2, 1 ); + int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumWrongInitJoinFinalValueType, Scalar >::value; + ASSERT_EQ( sum_wronginitjoinfinalvaluetype_has_join_arg, 0 ); + + //printf( "Values Join: %i %i %i\n", sum_plain_has_join_arg, sum_initjoinfinalvaluetype_has_join_arg, sum_wronginitjoinfinalvaluetype_has_join_arg ); */ - //printf("Values Join: %i %i %i\n",sum_plain_has_join_arg,sum_initjoinfinalvaluetype_has_join_arg,sum_wronginitjoinfinalvaluetype_has_join_arg); } -} +} // namespace diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 842131debb..7d096c24c3 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -47,108 +47,96 @@ namespace TestTile { -template < typename Device , typename TileLayout> +template < typename Device, typename TileLayout > struct ReduceTileErrors { - typedef Device execution_space ; - - typedef Kokkos::View< ptrdiff_t**, TileLayout, Device> array_type; - typedef Kokkos::View< ptrdiff_t[ TileLayout::N0 ][ TileLayout::N1 ], Kokkos::LayoutLeft , Device > tile_type ; - - array_type m_array ; - + typedef Device execution_space; + typedef Kokkos::View< ptrdiff_t**, TileLayout, Device > array_type; + typedef Kokkos::View< ptrdiff_t[ TileLayout::N0 ][ TileLayout::N1 ], Kokkos::LayoutLeft, Device > tile_type; typedef ptrdiff_t value_type; - ReduceTileErrors( array_type a ) - : m_array(a) - {} + array_type m_array; + ReduceTileErrors( array_type a ) : m_array( a ) {} KOKKOS_INLINE_FUNCTION - static void init( value_type & errors ) - { - errors = 0; - } + static void init( value_type & errors ) { errors = 0; } KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & errors , + static void join( volatile value_type & errors, const volatile value_type & src_errors ) { errors += src_errors; } - // Initialize + // Initialize. KOKKOS_INLINE_FUNCTION void operator()( size_t iwork ) const { const size_t i = iwork % m_array.dimension_0(); const size_t j = iwork / m_array.dimension_0(); + if ( j < m_array.dimension_1() ) { - m_array(i,j) = & m_array(i,j) - & m_array(0,0); - -// printf("m_array(%d,%d) = %d\n",int(i),int(j),int(m_array(i,j))); + m_array( i, j ) = &m_array( i, j ) - &m_array( 0, 0 ); + //printf( "m_array(%d, %d) = %d\n", int( i ), int( j ), int( m_array( i, j ) ) ); } } // Verify: KOKKOS_INLINE_FUNCTION - void operator()( size_t iwork , value_type & errors ) const + void operator()( size_t iwork, value_type & errors ) const { - const size_t tile_dim0 = ( m_array.dimension_0() + TileLayout::N0 - 1 ) / TileLayout::N0 ; - const size_t tile_dim1 = ( m_array.dimension_1() + TileLayout::N1 - 1 ) / TileLayout::N1 ; + const size_t tile_dim0 = ( m_array.dimension_0() + TileLayout::N0 - 1 ) / TileLayout::N0; + const size_t tile_dim1 = ( m_array.dimension_1() + TileLayout::N1 - 1 ) / TileLayout::N1; - const size_t itile = iwork % tile_dim0 ; - const size_t jtile = iwork / tile_dim0 ; + const size_t itile = iwork % tile_dim0; + const size_t jtile = iwork / tile_dim0; if ( jtile < tile_dim1 ) { + tile_type tile = Kokkos::Experimental::tile_subview( m_array, itile, jtile ); - tile_type tile = Kokkos::Experimental::tile_subview( m_array , itile , jtile ); - - if ( tile(0,0) != ptrdiff_t(( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { - ++errors ; + if ( tile( 0, 0 ) != ptrdiff_t( ( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { + ++errors; } else { + for ( size_t j = 0; j < size_t( TileLayout::N1 ); ++j ) { + for ( size_t i = 0; i < size_t( TileLayout::N0 ); ++i ) { + const size_t iglobal = i + itile * TileLayout::N0; + const size_t jglobal = j + jtile * TileLayout::N1; - for ( size_t j = 0 ; j < size_t(TileLayout::N1) ; ++j ) { - for ( size_t i = 0 ; i < size_t(TileLayout::N0) ; ++i ) { - const size_t iglobal = i + itile * TileLayout::N0 ; - const size_t jglobal = j + jtile * TileLayout::N1 ; - - if ( iglobal < m_array.dimension_0() && jglobal < m_array.dimension_1() ) { - if ( tile(i,j) != ptrdiff_t( tile(0,0) + i + j * TileLayout::N0 ) ) ++errors ; - -// printf("tile(%d,%d)(%d,%d) = %d\n",int(itile),int(jtile),int(i),int(j),int(tile(i,j))); + if ( iglobal < m_array.dimension_0() && jglobal < m_array.dimension_1() ) { + if ( tile( i, j ) != ptrdiff_t( tile( 0, 0 ) + i + j * TileLayout::N0 ) ) ++errors; + //printf( "tile(%d, %d)(%d, %d) = %d\n", int( itile ), int( jtile ), int( i ), int( j ), int( tile( i, j ) ) ); + } } } - } } } } }; -template< class Space , unsigned N0 , unsigned N1 > -void test( const size_t dim0 , const size_t dim1 ) +template< class Space, unsigned N0, unsigned N1 > +void test( const size_t dim0, const size_t dim1 ) { - typedef Kokkos::LayoutTileLeft array_layout ; - typedef ReduceTileErrors< Space , array_layout > functor_type ; + typedef Kokkos::LayoutTileLeft< N0, N1 > array_layout; + typedef ReduceTileErrors< Space, array_layout > functor_type; - const size_t tile_dim0 = ( dim0 + N0 - 1 ) / N0 ; - const size_t tile_dim1 = ( dim1 + N1 - 1 ) / N1 ; - - typename functor_type::array_type array("",dim0,dim1); + const size_t tile_dim0 = ( dim0 + N0 - 1 ) / N0; + const size_t tile_dim1 = ( dim1 + N1 - 1 ) / N1; - Kokkos::parallel_for( Kokkos::RangePolicy(0,dim0*dim1) , functor_type( array ) ); + typename functor_type::array_type array( "", dim0, dim1 ); - ptrdiff_t error = 0 ; + Kokkos::parallel_for( Kokkos::RangePolicy< Space, size_t >( 0, dim0 * dim1 ), functor_type( array ) ); - Kokkos::parallel_reduce( Kokkos::RangePolicy(0,tile_dim0*tile_dim1) , functor_type( array ) , error ); + ptrdiff_t error = 0; - EXPECT_EQ( error , ptrdiff_t(0) ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< Space, size_t >( 0, tile_dim0 * tile_dim1 ), functor_type( array ), error ); + + EXPECT_EQ( error, ptrdiff_t( 0 ) ); } -} /* namespace TestTile */ +} // namespace TestTile #endif //TEST_TILE_HPP - diff --git a/lib/kokkos/core/unit_test/TestUtilities.hpp b/lib/kokkos/core/unit_test/TestUtilities.hpp index 947be03e39..be4a93b894 100644 --- a/lib/kokkos/core/unit_test/TestUtilities.hpp +++ b/lib/kokkos/core/unit_test/TestUtilities.hpp @@ -49,258 +49,253 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { inline void test_utilities() { using namespace Kokkos::Impl; - { - using i = integer_sequence; - using j = make_integer_sequence; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + { + using i = integer_sequence< int >; + using j = make_integer_sequence< int, 0 >; + + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 0u, "Error: integer_sequence.size()" ); } - { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0 >; + using j = make_integer_sequence< int, 1 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 1u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); } - { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1 >; + using j = make_integer_sequence< int, 2 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 2u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2 >; + using j = make_integer_sequence< int, 3 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 3u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3 >; + using j = make_integer_sequence< int, 4 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 4u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4 >; + using j = make_integer_sequence< int, 5 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 5u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5 >; + using j = make_integer_sequence< int, 6 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 6u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6 >; + using j = make_integer_sequence< int, 7 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 7u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7 >; + using j = make_integer_sequence< int, 8 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 8u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7, 8 >; + using j = make_integer_sequence< int, 9 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 9u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 8, i >::value == 8, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 8, i{} ) == 8, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence; - using j = make_integer_sequence; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 >; + using j = make_integer_sequence< int, 10 >; - static_assert( std::is_same::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 10u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<9, i>::value == 9, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 8, i >::value == 8, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 9, i >::value == 9, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(9, i{}) == 9, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 8, i{} ) == 8, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 9, i{} ) == 9, "Error: at(unsigned, integer_sequence)" ); } { - using i = make_integer_sequence; - using r = reverse_integer_sequence; - using gr = integer_sequence; + using i = make_integer_sequence< int, 5 >; + using r = reverse_integer_sequence< i >; + using gr = integer_sequence< int, 4, 3, 2, 1, 0 >; - static_assert( std::is_same::value, "Error: reverse_integer_sequence" ); + static_assert( std::is_same< r, gr >::value, "Error: reverse_integer_sequence" ); } { - using s = make_integer_sequence; - using e = exclusive_scan_integer_sequence; - using i = inclusive_scan_integer_sequence; + using s = make_integer_sequence< int, 10 >; + using e = exclusive_scan_integer_sequence< s >; + using i = inclusive_scan_integer_sequence< s >; - using ge = integer_sequence; - using gi = integer_sequence; + using ge = integer_sequence< int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36 >; + using gi = integer_sequence< int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45 >; - static_assert( e::value == 45, "Error: scan value"); - static_assert( i::value == 45, "Error: scan value"); + static_assert( e::value == 45, "Error: scan value" ); + static_assert( i::value == 45, "Error: scan value" ); - static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan"); - static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan"); + static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan" ); + static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan" ); } - - } } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index a96f31cc12..cbf86dc58c 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,103 +48,92 @@ #include #include -/*--------------------------------------------------------------------------*/ - - -/*--------------------------------------------------------------------------*/ - namespace Test { -template< class T , class ... P > -size_t allocation_count( const Kokkos::View & view ) +template< class T, class ... P > +size_t allocation_count( const Kokkos::View< T, P... > & view ) { const size_t card = view.size(); const size_t alloc = view.span(); - const int memory_span = Kokkos::View::required_allocation_size(100); + const int memory_span = Kokkos::View< int* >::required_allocation_size( 100 ); - return (card <= alloc && memory_span == 400) ? alloc : 0 ; + return ( card <= alloc && memory_span == 400 ) ? alloc : 0; } /*--------------------------------------------------------------------------*/ -template< typename T, class DeviceType> +template< typename T, class DeviceType > struct TestViewOperator { - typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::execution_space execution_space; - static const unsigned N = 100 ; - static const unsigned D = 3 ; + static const unsigned N = 100; + static const unsigned D = 3; - typedef Kokkos::View< T*[D] , execution_space > view_type ; + typedef Kokkos::View< T*[D], execution_space > view_type; - const view_type v1 ; - const view_type v2 ; + const view_type v1; + const view_type v2; TestViewOperator() - : v1( "v1" , N ) - , v2( "v2" , N ) + : v1( "v1", N ) + , v2( "v2", N ) {} static void testit() { - Kokkos::parallel_for( N , TestViewOperator() ); + Kokkos::parallel_for( N, TestViewOperator() ); } KOKKOS_INLINE_FUNCTION void operator()( const unsigned i ) const { - const unsigned X = 0 ; - const unsigned Y = 1 ; - const unsigned Z = 2 ; + const unsigned X = 0; + const unsigned Y = 1; + const unsigned Z = 2; - v2(i,X) = v1(i,X); - v2(i,Y) = v1(i,Y); - v2(i,Z) = v1(i,Z); + v2( i, X ) = v1( i, X ); + v2( i, Y ) = v1( i, Y ); + v2( i, Z ) = v1( i, Z ); } }; /*--------------------------------------------------------------------------*/ -template< class DataType , - class DeviceType , +template< class DataType, + class DeviceType, unsigned Rank = Kokkos::ViewTraits< DataType >::rank > -struct TestViewOperator_LeftAndRight ; +struct TestViewOperator_LeftAndRight; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 8 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -157,93 +146,89 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i7 = 0 ; i7 < unsigned(left.dimension_7()) ; ++i7 ) - for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i7 = 0; i7 < unsigned( left.dimension_7() ); ++i7 ) + for ( unsigned i6 = 0; i6 < unsigned( left.dimension_6() ); ++i6 ) + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5, i6, i7 ) - & left( 0, 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; - if ( & left(i0,i1,i2,i3,i4,i5,i6,i7) != - & left_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { - update |= 4 ; + if ( & left( i0, i1, i2, i3, i4, i5, i6, i7 ) != + & left_stride( i0, i1, i2, i3, i4, i5, i6, i7 ) ) { + update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) - for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) - for ( unsigned i7 = 0 ; i7 < unsigned(right.dimension_7()) ; ++i7 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) + for ( unsigned i6 = 0; i6 < unsigned( right.dimension_6() ); ++i6 ) + for ( unsigned i7 = 0; i7 < unsigned( right.dimension_7() ); ++i7 ) { const long j = & right( i0, i1, i2, i3, i4, i5, i6, i7 ) - & right( 0, 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; - if ( & right(i0,i1,i2,i3,i4,i5,i6,i7) != - & right_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { - update |= 8 ; + if ( & right( i0, i1, i2, i3, i4, i5, i6, i7 ) != + & right_stride( i0, i1, i2, i3, i4, i5, i6, i7 ) ) { + update |= 8; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 7 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -254,81 +239,77 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i6 = 0; i6 < unsigned( left.dimension_6() ); ++i6 ) + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5, i6 ) - & left( 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) - for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) + for ( unsigned i6 = 0; i6 < unsigned( right.dimension_6() ); ++i6 ) { const long j = & right( i0, i1, i2, i3, i4, i5, i6 ) - & right( 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 6 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -339,84 +320,78 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5 ) - & left( 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) { const long j = & right( i0, i1, i2, i3, i4, i5 ) - & right( 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 5 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -429,83 +404,79 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4 ) - & left( 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; if ( & left( i0, i1, i2, i3, i4 ) != - & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4 ; } + & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) { const long j = & right( i0, i1, i2, i3, i4 ) - & right( 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; if ( & right( i0, i1, i2, i3, i4 ) != - & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8 ; } + & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 4 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -516,84 +487,78 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3 ) - & left( 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) { const long j = & right( i0, i1, i2, i3 ) - & right( 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 3 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() - : left( std::string("left") ) - , right( std::string("right") ) + : left( std::string( "left" ) ) + , right( std::string( "right" ) ) , left_stride( left ) , right_stride( right ) , left_alloc( allocation_count( left ) ) @@ -602,85 +567,81 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2 ) - & left( 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; - if ( & left(i0,i1,i2) != & left_stride(i0,i1,i2) ) { update |= 4 ; } + if ( & left( i0, i1, i2 ) != & left_stride( i0, i1, i2 ) ) { update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) { const long j = & right( i0, i1, i2 ) - & right( 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; - if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; } + if ( & right( i0, i1, i2 ) != & right_stride( i0, i1, i2 ) ) { update |= 8; } } - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) { - if ( & left(i0,i1,i2) != & left(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } + if ( & left( i0, i1, i2 ) != & left( i0, i1, i2, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0, i1, i2 ) != & right( i0, i1, i2, 0, 0, 0, 0, 0 ) ) { update |= 3; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 2 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -691,83 +652,77 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1 ) - & left( 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) { const long j = & right( i0, i1 ) - & right( 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) { - if ( & left(i0,i1) != & left(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & left( i0, i1 ) != & left( i0, i1, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0, i1 ) != & right( i0, i1, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 1 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -780,78 +735,75 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { - if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } - if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } + if ( & left( i0 ) != & left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0 ) != & right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & left( i0 ) != & left_stride( i0 ) ) { update |= 4; } + if ( & right( i0 ) != & right_stride( i0 ) ) { update |= 8; } } } }; -template -struct TestViewMirror { - - template +template< class Layout, class DeviceType > +struct TestViewMirror +{ + template< class MemoryTraits > void static test_mirror() { - Kokkos::View a_org("A",1000); - Kokkos::View a_h = a_org; - auto a_h2 = Kokkos::create_mirror(Kokkos::HostSpace(),a_h); - auto a_d = Kokkos::create_mirror(DeviceType(),a_h); + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 1000 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + auto a_h2 = Kokkos::create_mirror( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror( DeviceType(), a_h ); - int equal_ptr_h_h2 = (a_h.data() ==a_h2.data())?1:0; - int equal_ptr_h_d = (a_h.data() ==a_d. data())?1:0; - int equal_ptr_h2_d = (a_h2.data()==a_d. data())?1:0; + int equal_ptr_h_h2 = ( a_h.data() == a_h2.data() ) ? 1 : 0; + int equal_ptr_h_d = ( a_h.data() == a_d.data() ) ? 1 : 0; + int equal_ptr_h2_d = ( a_h2.data() == a_d.data() ) ? 1 : 0; - ASSERT_EQ(equal_ptr_h_h2,0); - ASSERT_EQ(equal_ptr_h_d ,0); - ASSERT_EQ(equal_ptr_h2_d,0); - + ASSERT_EQ( equal_ptr_h_h2, 0 ); + ASSERT_EQ( equal_ptr_h_d, 0 ); + ASSERT_EQ( equal_ptr_h2_d, 0 ); - ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); - ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); + ASSERT_EQ( a_h.dimension_0(), a_h2.dimension_0() ); + ASSERT_EQ( a_h.dimension_0(), a_d .dimension_0() ); } - - template + template< class MemoryTraits > void static test_mirror_view() { - Kokkos::View a_org("A",1000); - Kokkos::View a_h = a_org; - auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); - auto a_d = Kokkos::create_mirror_view(DeviceType(),a_h); + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 1000 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + auto a_h2 = Kokkos::create_mirror_view( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror_view( DeviceType(), a_h ); - int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; - int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; - int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; + int equal_ptr_h_h2 = a_h.data() == a_h2.data() ? 1 : 0; + int equal_ptr_h_d = a_h.data() == a_d.data() ? 1 : 0; + int equal_ptr_h2_d = a_h2.data() == a_d.data() ? 1 : 0; - int is_same_memspace = std::is_same::value?1:0; - ASSERT_EQ(equal_ptr_h_h2,1); - ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); - ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + ASSERT_EQ( equal_ptr_h_h2, 1 ); + ASSERT_EQ( equal_ptr_h_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h2_d, is_same_memspace ); - - ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); - ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); - } + ASSERT_EQ( a_h.dimension_0(), a_h2.dimension_0() ); + ASSERT_EQ( a_h.dimension_0(), a_d .dimension_0() ); + } void static testit() { - test_mirror>(); - test_mirror>(); - test_mirror_view>(); - test_mirror_view>(); + test_mirror< Kokkos::MemoryTraits<0> >(); + test_mirror< Kokkos::MemoryTraits >(); + test_mirror_view< Kokkos::MemoryTraits<0> >(); + test_mirror_view< Kokkos::MemoryTraits >(); } }; @@ -861,23 +813,21 @@ template< typename T, class DeviceType > class TestViewAPI { public: - typedef DeviceType device ; + typedef DeviceType device; - enum { N0 = 1000 , - N1 = 3 , - N2 = 5 , + enum { N0 = 1000, + N1 = 3, + N2 = 5, N3 = 7 }; - typedef Kokkos::View< T , device > dView0 ; - typedef Kokkos::View< T* , device > dView1 ; - typedef Kokkos::View< T*[N1] , device > dView2 ; - typedef Kokkos::View< T*[N1][N2] , device > dView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ; - - typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ; - - typedef typename dView0::host_mirror_space host ; + typedef Kokkos::View< T, device > dView0; + typedef Kokkos::View< T*, device > dView1; + typedef Kokkos::View< T*[N1], device > dView2; + typedef Kokkos::View< T*[N1][N2], device > dView3; + typedef Kokkos::View< T*[N1][N2][N3], device > dView4; + typedef Kokkos::View< const T*[N1][N2][N3], device > const_dView4; + typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged; + typedef typename dView0::host_mirror_space host; TestViewAPI() { @@ -889,41 +839,38 @@ public: run_test_subview_strided(); run_test_vector(); - TestViewOperator< T , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2] , device >::testit(); - TestViewMirror::testit(); - TestViewMirror::testit(); - + TestViewOperator< T, device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2], device >::testit(); + TestViewMirror< Kokkos::LayoutLeft, device >::testit(); + TestViewMirror< Kokkos::LayoutRight, device >::testit(); } static void run_test_mirror() { - typedef Kokkos::View< int , host > view_type ; - typedef typename view_type::HostMirror mirror_type ; + typedef Kokkos::View< int, host > view_type; + typedef typename view_type::HostMirror mirror_type; - static_assert( std::is_same< typename view_type::memory_space - , typename mirror_type::memory_space - >::value , "" ); + static_assert( std::is_same< typename view_type::memory_space, typename mirror_type::memory_space >::value, "" ); - view_type a("a"); - mirror_type am = Kokkos::create_mirror_view(a); - mirror_type ax = Kokkos::create_mirror(a); - ASSERT_EQ( & a() , & am() ); + view_type a( "a" ); + mirror_type am = Kokkos::create_mirror_view( a ); + mirror_type ax = Kokkos::create_mirror( a ); + ASSERT_EQ( & a(), & am() ); } static void run_test_scalar() { - typedef typename dView0::HostMirror hView0 ; + typedef typename dView0::HostMirror hView0; - dView0 dx , dy ; - hView0 hx , hy ; + dView0 dx, dy; + hView0 hx, hy; dx = dView0( "dx" ); dy = dView0( "dy" ); @@ -931,11 +878,11 @@ public: hx = Kokkos::create_mirror( dx ); hy = Kokkos::create_mirror( dy ); - hx() = 1 ; + hx() = 1; - Kokkos::deep_copy( dx , hx ); - Kokkos::deep_copy( dy , dx ); - Kokkos::deep_copy( hy , dy ); + Kokkos::deep_copy( dx, hx ); + Kokkos::deep_copy( dy, dx ); + Kokkos::deep_copy( hy, dy ); ASSERT_EQ( hx(), hy() ); } @@ -948,11 +895,11 @@ public: // usual "(void)" marker to avoid compiler warnings for unused // variables. - typedef typename dView0::HostMirror hView0 ; - typedef typename dView1::HostMirror hView1 ; - typedef typename dView2::HostMirror hView2 ; - typedef typename dView3::HostMirror hView3 ; - typedef typename dView4::HostMirror hView4 ; + typedef typename dView0::HostMirror hView0; + typedef typename dView1::HostMirror hView1; + typedef typename dView2::HostMirror hView2; + typedef typename dView3::HostMirror hView3; + typedef typename dView4::HostMirror hView4; { hView0 thing; @@ -975,8 +922,8 @@ public: (void) thing; } - dView4 dx , dy , dz ; - hView4 hx , hy , hz ; + dView4 dx, dy, dz; + hView4 hx, hy, hz; ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); @@ -984,220 +931,239 @@ public: ASSERT_TRUE( hx.ptr_on_device() == 0 ); ASSERT_TRUE( hy.ptr_on_device() == 0 ); ASSERT_TRUE( hz.ptr_on_device() == 0 ); - ASSERT_EQ( dx.dimension_0() , 0u ); - ASSERT_EQ( dy.dimension_0() , 0u ); - ASSERT_EQ( dz.dimension_0() , 0u ); - ASSERT_EQ( hx.dimension_0() , 0u ); - ASSERT_EQ( hy.dimension_0() , 0u ); - ASSERT_EQ( hz.dimension_0() , 0u ); - ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dz.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hz.dimension_1() , unsigned(N1) ); + ASSERT_EQ( dx.dimension_0(), 0u ); + ASSERT_EQ( dy.dimension_0(), 0u ); + ASSERT_EQ( dz.dimension_0(), 0u ); + ASSERT_EQ( hx.dimension_0(), 0u ); + ASSERT_EQ( hy.dimension_0(), 0u ); + ASSERT_EQ( hz.dimension_0(), 0u ); + ASSERT_EQ( dx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dz.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hz.dimension_1(), unsigned( N1 ) ); - dx = dView4( "dx" , N0 ); - dy = dView4( "dy" , N0 ); + dx = dView4( "dx", N0 ); + dy = dView4( "dy", N0 ); - ASSERT_EQ( dx.use_count() , size_t(1) ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); dView4_unmanaged unmanaged_dx = dx; - ASSERT_EQ( dx.use_count() , size_t(1) ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); - dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(), - dx.dimension_0(), - dx.dimension_1(), - dx.dimension_2(), - dx.dimension_3()); + dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged( dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), + dx.dimension_3() ); { - // Destruction of this view should be harmless - const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device() , - dx.dimension_0() , - dx.dimension_1() , - dx.dimension_2() , + // Destruction of this view should be harmless. + const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), dx.dimension_3() ); } - const_dView4 const_dx = dx ; - ASSERT_EQ( dx.use_count() , size_t(2) ); + const_dView4 const_dx = dx; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); { const_dView4 const_dx2; const_dx2 = const_dx; - ASSERT_EQ( dx.use_count() , size_t(3) ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); const_dx2 = dy; - ASSERT_EQ( dx.use_count() , size_t(2) ); + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); - const_dView4 const_dx3(dx); - ASSERT_EQ( dx.use_count() , size_t(3) ); - - dView4_unmanaged dx4_unmanaged(dx); - ASSERT_EQ( dx.use_count() , size_t(3) ); + const_dView4 const_dx3( dx ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + + dView4_unmanaged dx4_unmanaged( dx ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); } - ASSERT_EQ( dx.use_count() , size_t(2) ); - + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); ASSERT_FALSE( dx.ptr_on_device() == 0 ); ASSERT_FALSE( const_dx.ptr_on_device() == 0 ); ASSERT_FALSE( unmanaged_dx.ptr_on_device() == 0 ); ASSERT_FALSE( unmanaged_from_ptr_dx.ptr_on_device() == 0 ); ASSERT_FALSE( dy.ptr_on_device() == 0 ); - ASSERT_NE( dx , dy ); + ASSERT_NE( dx, dy ); - ASSERT_EQ( dx.dimension_0() , unsigned(N0) ); - ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dx.dimension_2() , unsigned(N2) ); - ASSERT_EQ( dx.dimension_3() , unsigned(N3) ); + ASSERT_EQ( dx.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( dx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dx.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( dx.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( dy.dimension_0() , unsigned(N0) ); - ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dy.dimension_2() , unsigned(N2) ); - ASSERT_EQ( dy.dimension_3() , unsigned(N3) ); + ASSERT_EQ( dy.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( dy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dy.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( dy.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( unmanaged_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + ASSERT_EQ( unmanaged_from_ptr_dx.capacity(), unsigned( N0 ) * unsigned( N1 ) * unsigned( N2 ) * unsigned( N3 ) ); hx = Kokkos::create_mirror( dx ); hy = Kokkos::create_mirror( dy ); - // T v1 = hx() ; // Generates compile error as intended - // T v2 = hx(0,0) ; // Generates compile error as intended - // hx(0,0) = v2 ; // Generates compile error as intended + // T v1 = hx(); // Generates compile error as intended. + // T v2 = hx( 0, 0 ); // Generates compile error as intended. + // hx( 0, 0 ) = v2; // Generates compile error as intended. // Testing with asynchronous deep copy with respect to device { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} + size_t count = 0; + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } - Kokkos::deep_copy(typename hView4::execution_space(), dx , hx ); - Kokkos::deep_copy(typename hView4::execution_space(), dy , dx ); - Kokkos::deep_copy(typename hView4::execution_space(), hy , dy ); + Kokkos::deep_copy( typename hView4::execution_space(), dx, hx ); + Kokkos::deep_copy( typename hView4::execution_space(), dy, dx ); + Kokkos::deep_copy( typename hView4::execution_space(), hy, dy ); - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } - Kokkos::deep_copy(typename hView4::execution_space(), dx , T(0) ); - Kokkos::deep_copy(typename hView4::execution_space(), hx , dx ); + Kokkos::deep_copy( typename hView4::execution_space(), dx, T( 0 ) ); + Kokkos::deep_copy( typename hView4::execution_space(), hx, dx ); - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - // Testing with asynchronous deep copy with respect to host + // Testing with asynchronous deep copy with respect to host. { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} + size_t count = 0; - Kokkos::deep_copy(typename dView4::execution_space(), dx , hx ); - Kokkos::deep_copy(typename dView4::execution_space(), dy , dx ); - Kokkos::deep_copy(typename dView4::execution_space(), hy , dy ); + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} + Kokkos::deep_copy( typename dView4::execution_space(), dx, hx ); + Kokkos::deep_copy( typename dView4::execution_space(), dy, dx ); + Kokkos::deep_copy( typename dView4::execution_space(), hy, dy ); - Kokkos::deep_copy(typename dView4::execution_space(), dx , T(0) ); - Kokkos::deep_copy(typename dView4::execution_space(), hx , dx ); + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + Kokkos::deep_copy( typename dView4::execution_space(), dx, T( 0 ) ); + Kokkos::deep_copy( typename dView4::execution_space(), hx, dx ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - // Testing with synchronous deep copy + // Testing with synchronous deep copy. { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} + size_t count = 0; - Kokkos::deep_copy( dx , hx ); - Kokkos::deep_copy( dy , dx ); - Kokkos::deep_copy( hy , dy ); + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} + Kokkos::deep_copy( dx, hx ); + Kokkos::deep_copy( dy, dx ); + Kokkos::deep_copy( hy, dy ); - Kokkos::deep_copy( dx , T(0) ); - Kokkos::deep_copy( hx , dx ); + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + Kokkos::deep_copy( dx, T( 0 ) ); + Kokkos::deep_copy( hx, dx ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - dz = dx ; ASSERT_EQ( dx, dz); ASSERT_NE( dy, dz); - dz = dy ; ASSERT_EQ( dy, dz); ASSERT_NE( dx, dz); + + dz = dx; + ASSERT_EQ( dx, dz ); + ASSERT_NE( dy, dz ); + + dz = dy; + ASSERT_EQ( dy, dz ); + ASSERT_NE( dx, dz ); dx = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_FALSE( dy.ptr_on_device() == 0 ); ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dy = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dz = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); ASSERT_TRUE( dz.ptr_on_device() == 0 ); } - typedef T DataType[2] ; + typedef T DataType[2]; static void check_auto_conversion_to_const( - const Kokkos::View< const DataType , device > & arg_const , - const Kokkos::View< DataType , device > & arg ) + const Kokkos::View< const DataType, device > & arg_const, + const Kokkos::View< DataType, device > & arg ) { ASSERT_TRUE( arg_const == arg ); } static void run_test_const() { - typedef Kokkos::View< DataType , device > typeX ; - typedef Kokkos::View< const DataType , device > const_typeX ; - typedef Kokkos::View< const DataType , device , Kokkos::MemoryRandomAccess > const_typeR ; + typedef Kokkos::View< DataType, device > typeX; + typedef Kokkos::View< const DataType, device > const_typeX; + typedef Kokkos::View< const DataType, device, Kokkos::MemoryRandomAccess > const_typeR; + typeX x( "X" ); - const_typeX xc = x ; - const_typeR xr = x ; + const_typeX xc = x; + const_typeR xr = x; ASSERT_TRUE( xc == x ); ASSERT_TRUE( x == xc ); @@ -1206,144 +1172,142 @@ public: // an lvalue reference due to retrieving through texture cache // therefore not allowed to query the underlying pointer. #if defined( KOKKOS_ENABLE_CUDA ) - if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value ) + if ( !std::is_same< typename device::execution_space, Kokkos::Cuda >::value ) #endif { ASSERT_TRUE( x.ptr_on_device() == xr.ptr_on_device() ); } - // typeX xf = xc ; // setting non-const from const must not compile + // typeX xf = xc; // Setting non-const from const must not compile. - check_auto_conversion_to_const( x , x ); + check_auto_conversion_to_const( x, x ); } static void run_test_subview() { - typedef Kokkos::View< const T , device > sView ; + typedef Kokkos::View< const T, device > sView; dView0 d0( "d0" ); - dView1 d1( "d1" , N0 ); - dView2 d2( "d2" , N0 ); - dView3 d3( "d3" , N0 ); - dView4 d4( "d4" , N0 ); + dView1 d1( "d1", N0 ); + dView2 d2( "d2", N0 ); + dView3 d3( "d3", N0 ); + dView4 d4( "d4", N0 ); - sView s0 = d0 ; - sView s1 = Kokkos::subview( d1 , 1 ); - sView s2 = Kokkos::subview( d2 , 1 , 1 ); - sView s3 = Kokkos::subview( d3 , 1 , 1 , 1 ); - sView s4 = Kokkos::subview( d4 , 1 , 1 , 1 , 1 ); + sView s0 = d0; + sView s1 = Kokkos::subview( d1, 1 ); + sView s2 = Kokkos::subview( d2, 1, 1 ); + sView s3 = Kokkos::subview( d3, 1, 1, 1 ); + sView s4 = Kokkos::subview( d4, 1, 1, 1, 1 ); } static void run_test_subview_strided() { - typedef Kokkos::View< int **** , Kokkos::LayoutLeft , host > view_left_4 ; - typedef Kokkos::View< int **** , Kokkos::LayoutRight , host > view_right_4 ; - typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2 ; - typedef Kokkos::View< int ** , Kokkos::LayoutRight , host > view_right_2 ; + typedef Kokkos::View< int ****, Kokkos::LayoutLeft , host > view_left_4; + typedef Kokkos::View< int ****, Kokkos::LayoutRight, host > view_right_4; + typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2; + typedef Kokkos::View< int ** , Kokkos::LayoutRight, host > view_right_2; - typedef Kokkos::View< int * , Kokkos::LayoutStride , host > view_stride_1 ; - typedef Kokkos::View< int ** , Kokkos::LayoutStride , host > view_stride_2 ; + typedef Kokkos::View< int * , Kokkos::LayoutStride, host > view_stride_1; + typedef Kokkos::View< int **, Kokkos::LayoutStride, host > view_stride_2; - view_left_2 xl2("xl2", 100 , 200 ); - view_right_2 xr2("xr2", 100 , 200 ); - view_stride_1 yl1 = Kokkos::subview( xl2 , 0 , Kokkos::ALL() ); - view_stride_1 yl2 = Kokkos::subview( xl2 , 1 , Kokkos::ALL() ); - view_stride_1 yr1 = Kokkos::subview( xr2 , 0 , Kokkos::ALL() ); - view_stride_1 yr2 = Kokkos::subview( xr2 , 1 , Kokkos::ALL() ); + view_left_2 xl2( "xl2", 100, 200 ); + view_right_2 xr2( "xr2", 100, 200 ); + view_stride_1 yl1 = Kokkos::subview( xl2, 0, Kokkos::ALL() ); + view_stride_1 yl2 = Kokkos::subview( xl2, 1, Kokkos::ALL() ); + view_stride_1 yr1 = Kokkos::subview( xr2, 0, Kokkos::ALL() ); + view_stride_1 yr2 = Kokkos::subview( xr2, 1, Kokkos::ALL() ); - ASSERT_EQ( yl1.dimension_0() , xl2.dimension_1() ); - ASSERT_EQ( yl2.dimension_0() , xl2.dimension_1() ); - ASSERT_EQ( yr1.dimension_0() , xr2.dimension_1() ); - ASSERT_EQ( yr2.dimension_0() , xr2.dimension_1() ); + ASSERT_EQ( yl1.dimension_0(), xl2.dimension_1() ); + ASSERT_EQ( yl2.dimension_0(), xl2.dimension_1() ); + ASSERT_EQ( yr1.dimension_0(), xr2.dimension_1() ); + ASSERT_EQ( yr2.dimension_0(), xr2.dimension_1() ); - ASSERT_EQ( & yl1(0) - & xl2(0,0) , 0 ); - ASSERT_EQ( & yl2(0) - & xl2(1,0) , 0 ); - ASSERT_EQ( & yr1(0) - & xr2(0,0) , 0 ); - ASSERT_EQ( & yr2(0) - & xr2(1,0) , 0 ); + ASSERT_EQ( & yl1( 0 ) - & xl2( 0, 0 ), 0 ); + ASSERT_EQ( & yl2( 0 ) - & xl2( 1, 0 ), 0 ); + ASSERT_EQ( & yr1( 0 ) - & xr2( 0, 0 ), 0 ); + ASSERT_EQ( & yr2( 0 ) - & xr2( 1, 0 ), 0 ); - view_left_4 xl4( "xl4" , 10 , 20 , 30 , 40 ); - view_right_4 xr4( "xr4" , 10 , 20 , 30 , 40 ); + view_left_4 xl4( "xl4", 10, 20, 30, 40 ); + view_right_4 xr4( "xr4", 10, 20, 30, 40 ); - view_stride_2 yl4 = Kokkos::subview( xl4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); - view_stride_2 yr4 = Kokkos::subview( xr4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); + view_stride_2 yl4 = Kokkos::subview( xl4, 1, Kokkos::ALL(), 2, Kokkos::ALL() ); + view_stride_2 yr4 = Kokkos::subview( xr4, 1, Kokkos::ALL(), 2, Kokkos::ALL() ); - ASSERT_EQ( yl4.dimension_0() , xl4.dimension_1() ); - ASSERT_EQ( yl4.dimension_1() , xl4.dimension_3() ); - ASSERT_EQ( yr4.dimension_0() , xr4.dimension_1() ); - ASSERT_EQ( yr4.dimension_1() , xr4.dimension_3() ); + ASSERT_EQ( yl4.dimension_0(), xl4.dimension_1() ); + ASSERT_EQ( yl4.dimension_1(), xl4.dimension_3() ); + ASSERT_EQ( yr4.dimension_0(), xr4.dimension_1() ); + ASSERT_EQ( yr4.dimension_1(), xr4.dimension_3() ); - ASSERT_EQ( & yl4(4,4) - & xl4(1,4,2,4) , 0 ); - ASSERT_EQ( & yr4(4,4) - & xr4(1,4,2,4) , 0 ); + ASSERT_EQ( & yl4( 4, 4 ) - & xl4( 1, 4, 2, 4 ), 0 ); + ASSERT_EQ( & yr4( 4, 4 ) - & xr4( 1, 4, 2, 4 ), 0 ); } static void run_test_vector() { - static const unsigned Length = 1000 , Count = 8 ; + static const unsigned Length = 1000, Count = 8; - typedef Kokkos::View< T* , Kokkos::LayoutLeft , host > vector_type ; - typedef Kokkos::View< T** , Kokkos::LayoutLeft , host > multivector_type ; + typedef Kokkos::View< T*, Kokkos::LayoutLeft, host > vector_type; + typedef Kokkos::View< T**, Kokkos::LayoutLeft, host > multivector_type; - typedef Kokkos::View< T* , Kokkos::LayoutRight , host > vector_right_type ; - typedef Kokkos::View< T** , Kokkos::LayoutRight , host > multivector_right_type ; + typedef Kokkos::View< T*, Kokkos::LayoutRight, host > vector_right_type; + typedef Kokkos::View< T**, Kokkos::LayoutRight, host > multivector_right_type; - typedef Kokkos::View< const T* , Kokkos::LayoutRight, host > const_vector_right_type ; - typedef Kokkos::View< const T* , Kokkos::LayoutLeft , host > const_vector_type ; - typedef Kokkos::View< const T** , Kokkos::LayoutLeft , host > const_multivector_type ; + typedef Kokkos::View< const T*, Kokkos::LayoutRight, host > const_vector_right_type; + typedef Kokkos::View< const T*, Kokkos::LayoutLeft, host > const_vector_type; + typedef Kokkos::View< const T**, Kokkos::LayoutLeft, host > const_multivector_type; - multivector_type mv = multivector_type( "mv" , Length , Count ); - multivector_right_type mv_right = multivector_right_type( "mv" , Length , Count ); + multivector_type mv = multivector_type( "mv", Length, Count ); + multivector_right_type mv_right = multivector_right_type( "mv", Length, Count ); - vector_type v1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - vector_type v2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - vector_type v3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + vector_type v1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + vector_type v2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + vector_type v3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - vector_type rv1 = Kokkos::subview( mv_right , 0 , Kokkos::ALL() ); - vector_type rv2 = Kokkos::subview( mv_right , 1 , Kokkos::ALL() ); - vector_type rv3 = Kokkos::subview( mv_right , 2 , Kokkos::ALL() ); + vector_type rv1 = Kokkos::subview( mv_right, 0, Kokkos::ALL() ); + vector_type rv2 = Kokkos::subview( mv_right, 1, Kokkos::ALL() ); + vector_type rv3 = Kokkos::subview( mv_right, 2, Kokkos::ALL() ); - multivector_type mv1 = Kokkos::subview( mv , std::make_pair( 1 , 998 ) , - std::make_pair( 2 , 5 ) ); + multivector_type mv1 = Kokkos::subview( mv, std::make_pair( 1, 998 ), + std::make_pair( 2, 5 ) ); - multivector_right_type mvr1 = - Kokkos::subview( mv_right , - std::make_pair( 1 , 998 ) , - std::make_pair( 2 , 5 ) ); + multivector_right_type mvr1 = Kokkos::subview( mv_right, std::make_pair( 1, 998 ), + std::make_pair( 2, 5 ) ); - const_vector_type cv1 = Kokkos::subview( mv , Kokkos::ALL(), 0 ); - const_vector_type cv2 = Kokkos::subview( mv , Kokkos::ALL(), 1 ); - const_vector_type cv3 = Kokkos::subview( mv , Kokkos::ALL(), 2 ); + const_vector_type cv1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + const_vector_type cv2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + const_vector_type cv3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - vector_right_type vr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - vector_right_type vr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - vector_right_type vr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + vector_right_type vr1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + vector_right_type vr2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + vector_right_type vr3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - const_vector_right_type cvr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - const_vector_right_type cvr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - const_vector_right_type cvr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + const_vector_right_type cvr1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + const_vector_right_type cvr2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + const_vector_right_type cvr3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - ASSERT_TRUE( & v1[0] == & v1(0) ); - ASSERT_TRUE( & v1[0] == & mv(0,0) ); - ASSERT_TRUE( & v2[0] == & mv(0,1) ); - ASSERT_TRUE( & v3[0] == & mv(0,2) ); + ASSERT_TRUE( & v1[0] == & v1( 0 ) ); + ASSERT_TRUE( & v1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & v2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & v3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & cv1[0] == & mv(0,0) ); - ASSERT_TRUE( & cv2[0] == & mv(0,1) ); - ASSERT_TRUE( & cv3[0] == & mv(0,2) ); + ASSERT_TRUE( & cv1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & cv2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & cv3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & vr1[0] == & mv(0,0) ); - ASSERT_TRUE( & vr2[0] == & mv(0,1) ); - ASSERT_TRUE( & vr3[0] == & mv(0,2) ); + ASSERT_TRUE( & vr1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & vr2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & vr3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & cvr1[0] == & mv(0,0) ); - ASSERT_TRUE( & cvr2[0] == & mv(0,1) ); - ASSERT_TRUE( & cvr3[0] == & mv(0,2) ); + ASSERT_TRUE( & cvr1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & cvr2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & cvr3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & mv1(0,0) == & mv( 1 , 2 ) ); - ASSERT_TRUE( & mv1(1,1) == & mv( 2 , 3 ) ); - ASSERT_TRUE( & mv1(3,2) == & mv( 4 , 4 ) ); - ASSERT_TRUE( & mvr1(0,0) == & mv_right( 1 , 2 ) ); - ASSERT_TRUE( & mvr1(1,1) == & mv_right( 2 , 3 ) ); - ASSERT_TRUE( & mvr1(3,2) == & mv_right( 4 , 4 ) ); + ASSERT_TRUE( & mv1( 0, 0 ) == & mv( 1, 2 ) ); + ASSERT_TRUE( & mv1( 1, 1 ) == & mv( 2, 3 ) ); + ASSERT_TRUE( & mv1( 3, 2 ) == & mv( 4, 4 ) ); + ASSERT_TRUE( & mvr1( 0, 0 ) == & mv_right( 1, 2 ) ); + ASSERT_TRUE( & mvr1( 1, 1 ) == & mv_right( 2, 3 ) ); + ASSERT_TRUE( & mvr1( 3, 2 ) == & mv_right( 4, 4 ) ); const_vector_type c_cv1( v1 ); typename vector_type::const_type c_cv2( v2 ); @@ -1356,6 +1320,3 @@ public: }; } // namespace Test - -/*--------------------------------------------------------------------------*/ - diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp index 324f02e947..71604bed51 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -49,1126 +49,1140 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Test { template< class Space > void test_view_mapping() { - typedef typename Space::execution_space ExecSpace ; + typedef typename Space::execution_space ExecSpace; - typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2> dim_s2 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2,3> dim_s2_s3 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2,3,4> dim_s2_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0; + typedef Kokkos::Experimental::Impl::ViewDimension< 2 > dim_s2; + typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3 > dim_s2_s3; + typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3, 4 > dim_s2_s3_s4; - typedef Kokkos::Experimental::Impl::ViewDimension<0> dim_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,3> dim_s0_s3 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,3,4> dim_s0_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewDimension< 0 > dim_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3 > dim_s0_s3; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3, 4 > dim_s0_s3_s4; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0> dim_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,4> dim_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0 > dim_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 4 > dim_s0_s0_s4; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dim_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0> dim_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0> dim_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dim_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0 > dim_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0_s0; - // Fully static dimensions should not be larger than an int - ASSERT_LE( sizeof(dim_0) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2_s3) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2_s3_s4) , sizeof(int) ); + // Fully static dimensions should not be larger than an int. + ASSERT_LE( sizeof( dim_0 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2_s3 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2_s3_s4 ), sizeof( int ) ); - // Rank 1 is size_t - ASSERT_EQ( sizeof(dim_s0) , sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s3) , sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s3_s4) , sizeof(size_t) ); + // Rank 1 is size_t. + ASSERT_EQ( sizeof( dim_s0 ), sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s3 ), sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s3_s4 ), sizeof( size_t ) ); - // Allow for padding - ASSERT_LE( sizeof(dim_s0_s0) , 2 * sizeof(size_t) ); - ASSERT_LE( sizeof(dim_s0_s0_s4) , 2 * sizeof(size_t) ); + // Allow for padding. + ASSERT_LE( sizeof( dim_s0_s0 ), 2 * sizeof( size_t ) ); + ASSERT_LE( sizeof( dim_s0_s0_s4 ), 2 * sizeof( size_t ) ); - ASSERT_LE( sizeof(dim_s0_s0_s0) , 4 * sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0) , 4 * sizeof(unsigned) ); - ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); - ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); + ASSERT_LE( sizeof( dim_s0_s0_s0 ), 4 * sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0 ), 4 * sizeof( unsigned ) ); + ASSERT_LE( sizeof( dim_s0_s0_s0_s0_s0 ), 6 * sizeof( unsigned ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0_s0_s0 ), 6 * sizeof( unsigned ) ); + ASSERT_LE( sizeof( dim_s0_s0_s0_s0_s0_s0_s0 ), 8 * sizeof( unsigned ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0_s0_s0_s0_s0 ), 8 * sizeof( unsigned ) ); - static_assert( int(dim_0::rank) == int(0) , "" ); - static_assert( int(dim_0::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_0::ArgN0) == 1 , "" ); - static_assert( int(dim_0::ArgN1) == 1 , "" ); - static_assert( int(dim_0::ArgN2) == 1 , "" ); + static_assert( int( dim_0::rank ) == int( 0 ), "" ); + static_assert( int( dim_0::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_0::ArgN0 ) == 1, "" ); + static_assert( int( dim_0::ArgN1 ) == 1, "" ); + static_assert( int( dim_0::ArgN2 ) == 1, "" ); - static_assert( int(dim_s2::rank) == int(1) , "" ); - static_assert( int(dim_s2::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2::ArgN0) == 2 , "" ); - static_assert( int(dim_s2::ArgN1) == 1 , "" ); + static_assert( int( dim_s2::rank ) == int( 1 ), "" ); + static_assert( int( dim_s2::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2::ArgN1 ) == 1, "" ); - static_assert( int(dim_s2_s3::rank) == int(2) , "" ); - static_assert( int(dim_s2_s3::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2_s3::ArgN0) == 2 , "" ); - static_assert( int(dim_s2_s3::ArgN1) == 3 , "" ); - static_assert( int(dim_s2_s3::ArgN2) == 1 , "" ); + static_assert( int( dim_s2_s3::rank ) == int( 2 ), "" ); + static_assert( int( dim_s2_s3::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2_s3::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2_s3::ArgN1 ) == 3, "" ); + static_assert( int( dim_s2_s3::ArgN2 ) == 1, "" ); - static_assert( int(dim_s2_s3_s4::rank) == int(3) , "" ); - static_assert( int(dim_s2_s3_s4::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2_s3_s4::ArgN0) == 2 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN1) == 3 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN2) == 4 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN3) == 1 , "" ); + static_assert( int( dim_s2_s3_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s2_s3_s4::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2_s3_s4::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2_s3_s4::ArgN1 ) == 3, "" ); + static_assert( int( dim_s2_s3_s4::ArgN2 ) == 4, "" ); + static_assert( int( dim_s2_s3_s4::ArgN3 ) == 1, "" ); - static_assert( int(dim_s0::rank) == int(1) , "" ); - static_assert( int(dim_s0::rank_dynamic) == int(1) , "" ); + static_assert( int( dim_s0::rank ) == int( 1 ), "" ); + static_assert( int( dim_s0::rank_dynamic ) == int( 1 ), "" ); - static_assert( int(dim_s0_s3::rank) == int(2) , "" ); - static_assert( int(dim_s0_s3::rank_dynamic) == int(1) , "" ); - static_assert( int(dim_s0_s3::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s3::ArgN1) == 3 , "" ); + static_assert( int( dim_s0_s3::rank ) == int( 2 ), "" ); + static_assert( int( dim_s0_s3::rank_dynamic ) == int( 1 ), "" ); + static_assert( int( dim_s0_s3::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s3::ArgN1 ) == 3, "" ); - static_assert( int(dim_s0_s3_s4::rank) == int(3) , "" ); - static_assert( int(dim_s0_s3_s4::rank_dynamic) == int(1) , "" ); - static_assert( int(dim_s0_s3_s4::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s3_s4::ArgN1) == 3 , "" ); - static_assert( int(dim_s0_s3_s4::ArgN2) == 4 , "" ); + static_assert( int( dim_s0_s3_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s3_s4::rank_dynamic ) == int( 1 ), "" ); + static_assert( int( dim_s0_s3_s4::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s3_s4::ArgN1 ) == 3, "" ); + static_assert( int( dim_s0_s3_s4::ArgN2 ) == 4, "" ); - static_assert( int(dim_s0_s0_s4::rank) == int(3) , "" ); - static_assert( int(dim_s0_s0_s4::rank_dynamic) == int(2) , "" ); - static_assert( int(dim_s0_s0_s4::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s0_s4::ArgN1) == 0 , "" ); - static_assert( int(dim_s0_s0_s4::ArgN2) == 4 , "" ); + static_assert( int( dim_s0_s0_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s0_s4::rank_dynamic ) == int( 2 ), "" ); + static_assert( int( dim_s0_s0_s4::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s0_s4::ArgN1 ) == 0, "" ); + static_assert( int( dim_s0_s0_s4::ArgN2 ) == 4, "" ); - static_assert( int(dim_s0_s0_s0::rank) == int(3) , "" ); - static_assert( int(dim_s0_s0_s0::rank_dynamic) == int(3) , "" ); + static_assert( int( dim_s0_s0_s0::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s0_s0::rank_dynamic ) == int( 3 ), "" ); - static_assert( int(dim_s0_s0_s0_s0::rank) == int(4) , "" ); - static_assert( int(dim_s0_s0_s0_s0::rank_dynamic) == int(4) , "" ); + static_assert( int( dim_s0_s0_s0_s0::rank ) == int( 4 ), "" ); + static_assert( int( dim_s0_s0_s0_s0::rank_dynamic ) == int( 4 ), "" ); - static_assert( int(dim_s0_s0_s0_s0_s0::rank) == int(5) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0::rank_dynamic) == int(5) , "" ); + static_assert( int( dim_s0_s0_s0_s0_s0::rank ) == int( 5 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0::rank_dynamic ) == int( 5 ), "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank) == int(6) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(6) , "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0::rank ) == int( 6 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 6 ), "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) == int(7) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(7) , "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0::rank ) == int( 7 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 7 ), "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) == int(8) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(8) , "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0_s0::rank ) == int( 8 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 8 ), "" ); - dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); + dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0 d2( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0_s0 d3( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0_s0_s0 d4( 2, 3, 4, 5, 6, 7, 8, 9 ); - ASSERT_EQ( d1.N0 , 2 ); - ASSERT_EQ( d2.N0 , 2 ); - ASSERT_EQ( d3.N0 , 2 ); - ASSERT_EQ( d4.N0 , 2 ); + ASSERT_EQ( d1.N0, 2 ); + ASSERT_EQ( d2.N0, 2 ); + ASSERT_EQ( d3.N0, 2 ); + ASSERT_EQ( d4.N0, 2 ); - ASSERT_EQ( d1.N1 , 1 ); - ASSERT_EQ( d2.N1 , 3 ); - ASSERT_EQ( d3.N1 , 3 ); - ASSERT_EQ( d4.N1 , 3 ); + ASSERT_EQ( d1.N1, 1 ); + ASSERT_EQ( d2.N1, 3 ); + ASSERT_EQ( d3.N1, 3 ); + ASSERT_EQ( d4.N1, 3 ); - ASSERT_EQ( d1.N2 , 1 ); - ASSERT_EQ( d2.N2 , 1 ); - ASSERT_EQ( d3.N2 , 4 ); - ASSERT_EQ( d4.N2 , 4 ); + ASSERT_EQ( d1.N2, 1 ); + ASSERT_EQ( d2.N2, 1 ); + ASSERT_EQ( d3.N2, 4 ); + ASSERT_EQ( d4.N2, 4 ); - ASSERT_EQ( d1.N3 , 1 ); - ASSERT_EQ( d2.N3 , 1 ); - ASSERT_EQ( d3.N3 , 1 ); - ASSERT_EQ( d4.N3 , 5 ); + ASSERT_EQ( d1.N3, 1 ); + ASSERT_EQ( d2.N3, 1 ); + ASSERT_EQ( d3.N3, 1 ); + ASSERT_EQ( d4.N3, 5 ); //---------------------------------------- - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0 , Kokkos::LayoutStride > stride_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0, Kokkos::LayoutStride > stride_s0_s0_s0; //---------------------------------------- - // Static dimension + // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutLeft > left_s2_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutLeft > left_s2_s3_s4; - ASSERT_EQ( sizeof(left_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + ASSERT_EQ( sizeof( left_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); - left_s2_s3_s4 off3 ; + left_s2_s3_s4 off3; - stride_s0_s0_s0 stride3( off3 ); + stride_s0_s0_s0 stride3( off3 ); - ASSERT_EQ( off3.stride_0() , 1 ); - ASSERT_EQ( off3.stride_1() , 2 ); - ASSERT_EQ( off3.stride_2() , 6 ); - ASSERT_EQ( off3.span() , 24 ); + ASSERT_EQ( off3.stride_0(), 1 ); + ASSERT_EQ( off3.stride_1(), 2 ); + ASSERT_EQ( off3.stride_2(), 6 ); + ASSERT_EQ( off3.span(), 24 ); - ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( off3.span() , stride3.span() ); + ASSERT_EQ( off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int i = 0 ; i < 2 ; ++i , ++offset ){ - ASSERT_EQ( off3(i,j,k) , offset ); - ASSERT_EQ( stride3(i,j,k) , off3(i,j,k) ); - }}} + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < 3; ++j ) + for ( int i = 0; i < 2; ++i, ++offset ) + { + ASSERT_EQ( off3( i, j, k ), offset ); + ASSERT_EQ( stride3( i, j, k ), off3( i, j, k ) ); + } } //---------------------------------------- - // Small dimension is unpadded + // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( 2, 3, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), 2 * 3 * 4 ); const Kokkos::LayoutLeft layout = dyn_off3.layout(); - ASSERT_EQ( layout.dimension[0] , 2 ); - ASSERT_EQ( layout.dimension[1] , 3 ); - ASSERT_EQ( layout.dimension[2] , 4 ); - ASSERT_EQ( layout.dimension[3] , 1 ); - ASSERT_EQ( layout.dimension[4] , 1 ); - ASSERT_EQ( layout.dimension[5] , 1 ); - ASSERT_EQ( layout.dimension[6] , 1 ); - ASSERT_EQ( layout.dimension[7] , 1 ); + ASSERT_EQ( layout.dimension[0], 2 ); + ASSERT_EQ( layout.dimension[1], 3 ); + ASSERT_EQ( layout.dimension[2], 4 ); + ASSERT_EQ( layout.dimension[3], 1 ); + ASSERT_EQ( layout.dimension[4], 1 ); + ASSERT_EQ( layout.dimension[5], 1 ); + ASSERT_EQ( layout.dimension[6], 1 ); + ASSERT_EQ( layout.dimension[7], 1 ); - ASSERT_EQ( stride3.m_dim.rank , 3 ); - ASSERT_EQ( stride3.m_dim.N0 , 2 ); - ASSERT_EQ( stride3.m_dim.N1 , 3 ); - ASSERT_EQ( stride3.m_dim.N2 , 4 ); - ASSERT_EQ( stride3.m_dim.N3 , 1 ); - ASSERT_EQ( stride3.size() , 2 * 3 * 4 ); + ASSERT_EQ( stride3.m_dim.rank, 3 ); + ASSERT_EQ( stride3.m_dim.N0, 2 ); + ASSERT_EQ( stride3.m_dim.N1, 3 ); + ASSERT_EQ( stride3.m_dim.N2, 4 ); + ASSERT_EQ( stride3.m_dim.N3, 1 ); + ASSERT_EQ( stride3.size(), 2 * 3 * 4 ); - int offset = 0 ; + int offset = 0; - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int i = 0 ; i < 2 ; ++i , ++offset ){ - ASSERT_EQ( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < 3; ++j ) + for ( int i = 0; i < 2; ++i, ++offset ) + { + ASSERT_EQ( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } - ASSERT_EQ( dyn_off3.span() , offset ); - ASSERT_EQ( stride3.span() , dyn_off3.span() ); + ASSERT_EQ( dyn_off3.span(), offset ); + ASSERT_EQ( stride3.span(), dyn_off3.span() ); } - // Large dimension is likely padded + //---------------------------------------- + // Large dimension is likely padded. { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1, N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), N0 * N1 * 4 ); - ASSERT_EQ( stride3.m_dim.rank , 3 ); - ASSERT_EQ( stride3.m_dim.N0 , N0 ); - ASSERT_EQ( stride3.m_dim.N1 , N1 ); - ASSERT_EQ( stride3.m_dim.N2 , 4 ); - ASSERT_EQ( stride3.m_dim.N3 , 1 ); - ASSERT_EQ( stride3.size() , N0 * N1 * 4 ); - ASSERT_EQ( stride3.span() , dyn_off3.span() ); + ASSERT_EQ( stride3.m_dim.rank, 3 ); + ASSERT_EQ( stride3.m_dim.N0, N0 ); + ASSERT_EQ( stride3.m_dim.N1, N1 ); + ASSERT_EQ( stride3.m_dim.N2, 4 ); + ASSERT_EQ( stride3.m_dim.N3, 1 ); + ASSERT_EQ( stride3.size(), N0 * N1 * 4 ); + ASSERT_EQ( stride3.span(), dyn_off3.span() ); - int offset = 0 ; + int offset = 0; - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < N1 ; ++j ){ - for ( int i = 0 ; i < N0 ; ++i ){ - ASSERT_LE( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - offset = dyn_off3(i,j,k) + 1 ; - }}} + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < N1; ++j ) + for ( int i = 0; i < N0; ++i ) + { + ASSERT_LE( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + offset = dyn_off3( i, j, k ) + 1; + } - ASSERT_LE( offset , dyn_off3.span() ); + ASSERT_LE( offset, dyn_off3.span() ); } //---------------------------------------- - // Static dimension + // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutRight > right_s2_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutRight > right_s2_s3_s4; - ASSERT_EQ( sizeof(right_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + ASSERT_EQ( sizeof( right_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); - right_s2_s3_s4 off3 ; + right_s2_s3_s4 off3; stride_s0_s0_s0 stride3( off3 ); - ASSERT_EQ( off3.stride_0() , 12 ); - ASSERT_EQ( off3.stride_1() , 4 ); - ASSERT_EQ( off3.stride_2() , 1 ); + ASSERT_EQ( off3.stride_0(), 12 ); + ASSERT_EQ( off3.stride_1(), 4 ); + ASSERT_EQ( off3.stride_2(), 1 ); - ASSERT_EQ( off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( off3.span() , stride3.span() ); + ASSERT_EQ( off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int i = 0 ; i < 2 ; ++i ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k , ++offset ){ - ASSERT_EQ( off3(i,j,k) , offset ); - ASSERT_EQ( off3(i,j,k) , stride3(i,j,k) ); - }}} + for ( int i = 0; i < 2; ++i ) + for ( int j = 0; j < 3; ++j ) + for ( int k = 0; k < 4; ++k, ++offset ) + { + ASSERT_EQ( off3( i, j, k ), offset ); + ASSERT_EQ( off3( i, j, k ), stride3( i, j, k ) ); + } - ASSERT_EQ( off3.span() , offset ); + ASSERT_EQ( off3.span(), offset ); } //---------------------------------------- - // Small dimension is unpadded + // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( 2, 3, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), 2 * 3 * 4 ); - ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int i = 0 ; i < 2 ; ++i ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k , ++offset ){ - ASSERT_EQ( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); - }}} + for ( int i = 0; i < 2; ++i ) + for ( int j = 0; j < 3; ++j ) + for ( int k = 0; k < 4; ++k, ++offset ) + { + ASSERT_EQ( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( dyn_off3( i, j, k ), stride3( i, j, k ) ); + } - ASSERT_EQ( dyn_off3.span() , offset ); + ASSERT_EQ( dyn_off3.span(), offset ); } - // Large dimension is likely padded + //---------------------------------------- + // Large dimension is likely padded. { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1, N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), N0 * N1 * 4 ); - ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int i = 0 ; i < N0 ; ++i ){ - for ( int j = 0 ; j < N1 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k ){ - ASSERT_LE( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); - offset = dyn_off3(i,j,k) + 1 ; - }}} + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < 4; ++k ) + { + ASSERT_LE( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( dyn_off3( i, j, k ), stride3( i, j, k ) ); + offset = dyn_off3( i, j, k ) + 1; + } - ASSERT_LE( offset , dyn_off3.span() ); + ASSERT_LE( offset, dyn_off3.span() ); } //---------------------------------------- - // Subview + // Subview. { // Mapping rank 4 to rank 3 - typedef Kokkos::Experimental::Impl::SubviewExtents<4,3> SubviewExtents ; + typedef Kokkos::Experimental::Impl::SubviewExtents< 4, 3 > SubviewExtents; - constexpr int N0 = 1000 ; - constexpr int N1 = 2000 ; - constexpr int N2 = 3000 ; - constexpr int N3 = 4000 ; + constexpr int N0 = 1000; + constexpr int N1 = 2000; + constexpr int N2 = 3000; + constexpr int N3 = 4000; - Kokkos::Experimental::Impl::ViewDimension dim ; + Kokkos::Experimental::Impl::ViewDimension< N0, N1, N2, N3 > dim; SubviewExtents tmp( dim , N0 / 2 , Kokkos::Experimental::ALL - , std::pair( N2 / 4 , 10 + N2 / 4 ) - , Kokkos::pair( N3 / 4 , 20 + N3 / 4 ) + , std::pair< int, int >( N2 / 4, 10 + N2 / 4 ) + , Kokkos::pair< int, int >( N3 / 4, 20 + N3 / 4 ) ); - ASSERT_EQ( tmp.domain_offset(0) , N0 / 2 ); - ASSERT_EQ( tmp.domain_offset(1) , 0 ); - ASSERT_EQ( tmp.domain_offset(2) , N2 / 4 ); - ASSERT_EQ( tmp.domain_offset(3) , N3 / 4 ); + ASSERT_EQ( tmp.domain_offset( 0 ), N0 / 2 ); + ASSERT_EQ( tmp.domain_offset( 1 ), 0 ); + ASSERT_EQ( tmp.domain_offset( 2 ), N2 / 4 ); + ASSERT_EQ( tmp.domain_offset( 3 ), N3 / 4 ); - ASSERT_EQ( tmp.range_index(0) , 1 ); - ASSERT_EQ( tmp.range_index(1) , 2 ); - ASSERT_EQ( tmp.range_index(2) , 3 ); + ASSERT_EQ( tmp.range_index( 0 ), 1 ); + ASSERT_EQ( tmp.range_index( 1 ), 2 ); + ASSERT_EQ( tmp.range_index( 2 ), 3 ); - ASSERT_EQ( tmp.range_extent(0) , N1 ); - ASSERT_EQ( tmp.range_extent(1) , 10 ); - ASSERT_EQ( tmp.range_extent(2) , 20 ); + ASSERT_EQ( tmp.range_extent( 0 ), N1 ); + ASSERT_EQ( tmp.range_extent( 1 ), 10 ); + ASSERT_EQ( tmp.range_extent( 2 ), 20 ); } - //---------------------------------------- + { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - constexpr int sub_N0 = 1000 ; - constexpr int sub_N1 = 200 ; - constexpr int sub_N2 = 4 ; + constexpr int sub_N0 = 1000; + constexpr int sub_N1 = 200; + constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim - , Kokkos::pair(0,sub_N0) - , Kokkos::pair(0,sub_N1) - , Kokkos::pair(0,sub_N2) + , Kokkos::pair< int, int >( 0, sub_N0 ) + , Kokkos::pair< int, int >( 0, sub_N1 ) + , Kokkos::pair< int, int >( 0, sub_N2 ) ); - stride_s0_s0_s0 stride3( dyn_off3 , sub ); + stride_s0_s0_s0 stride3( dyn_off3, sub ); - ASSERT_EQ( stride3.dimension_0() , sub_N0 ); - ASSERT_EQ( stride3.dimension_1() , sub_N1 ); - ASSERT_EQ( stride3.dimension_2() , sub_N2 ); - ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + ASSERT_EQ( stride3.dimension_0(), sub_N0 ); + ASSERT_EQ( stride3.dimension_1(), sub_N1 ); + ASSERT_EQ( stride3.dimension_2(), sub_N2 ); + ASSERT_EQ( stride3.size(), sub_N0 * sub_N1 * sub_N2 ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_GE( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); - for ( int k = 0 ; k < sub_N2 ; ++k ){ - for ( int j = 0 ; j < sub_N1 ; ++j ){ - for ( int i = 0 ; i < sub_N0 ; ++i ){ - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} + for ( int k = 0; k < sub_N2; ++k ) + for ( int j = 0; j < sub_N1; ++j ) + for ( int i = 0; i < sub_N0; ++i ) + { + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } } { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - constexpr int sub_N0 = 1000 ; - constexpr int sub_N1 = 200 ; - constexpr int sub_N2 = 4 ; + constexpr int sub_N0 = 1000; + constexpr int sub_N1 = 200; + constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim - , Kokkos::pair(0,sub_N0) - , Kokkos::pair(0,sub_N1) - , Kokkos::pair(0,sub_N2) + , Kokkos::pair< int, int >( 0, sub_N0 ) + , Kokkos::pair< int, int >( 0, sub_N1 ) + , Kokkos::pair< int, int >( 0, sub_N2 ) ); - stride_s0_s0_s0 stride3( dyn_off3 , sub ); + stride_s0_s0_s0 stride3( dyn_off3, sub ); - ASSERT_EQ( stride3.dimension_0() , sub_N0 ); - ASSERT_EQ( stride3.dimension_1() , sub_N1 ); - ASSERT_EQ( stride3.dimension_2() , sub_N2 ); - ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + ASSERT_EQ( stride3.dimension_0(), sub_N0 ); + ASSERT_EQ( stride3.dimension_1(), sub_N1 ); + ASSERT_EQ( stride3.dimension_2(), sub_N2 ); + ASSERT_EQ( stride3.size(), sub_N0 * sub_N1 * sub_N2 ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_GE( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); - for ( int i = 0 ; i < sub_N0 ; ++i ){ - for ( int j = 0 ; j < sub_N1 ; ++j ){ - for ( int k = 0 ; k < sub_N2 ; ++k ){ - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} + for ( int i = 0; i < sub_N0; ++i ) + for ( int j = 0; j < sub_N1; ++j ) + for ( int k = 0; k < sub_N2; ++k ) + { + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } } //---------------------------------------- - // view data analysis + // View data analysis. { - using namespace Kokkos::Experimental::Impl ; - static_assert( rank_dynamic<>::value == 0 , "" ); - static_assert( rank_dynamic<1>::value == 0 , "" ); - static_assert( rank_dynamic<0>::value == 1 , "" ); - static_assert( rank_dynamic<0,1>::value == 1 , "" ); - static_assert( rank_dynamic<0,0,1>::value == 2 , "" ); + using namespace Kokkos::Experimental::Impl; + + static_assert( rank_dynamic<>::value == 0, "" ); + static_assert( rank_dynamic< 1 >::value == 0, "" ); + static_assert( rank_dynamic< 0 >::value == 1, "" ); + static_assert( rank_dynamic< 0, 1 >::value == 1, "" ); + static_assert( rank_dynamic< 0, 0, 1 >::value == 2, "" ); } { - using namespace Kokkos::Experimental::Impl ; + using namespace Kokkos::Experimental::Impl; - typedef ViewArrayAnalysis< int[] > a_int_r1 ; - typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5 ; - typedef ViewArrayAnalysis< const int[] > a_const_int_r1 ; - typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5 ; + typedef ViewArrayAnalysis< int[] > a_int_r1; + typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5; + typedef ViewArrayAnalysis< const int[] > a_const_int_r1; + typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5; - static_assert( a_int_r1::dimension::rank == 1 , "" ); - static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" ); - static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN2 == 4 , "" ); - static_assert( a_int_r5::dimension::ArgN3 == 5 , "" ); - static_assert( a_int_r5::dimension::ArgN4 == 6 , "" ); - static_assert( a_int_r5::dimension::ArgN5 == 1 , "" ); + static_assert( a_int_r1::dimension::rank == 1, "" ); + static_assert( a_int_r1::dimension::rank_dynamic == 1, "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN2 == 4, "" ); + static_assert( a_int_r5::dimension::ArgN3 == 5, "" ); + static_assert( a_int_r5::dimension::ArgN4 == 6, "" ); + static_assert( a_int_r5::dimension::ArgN5 == 1, "" ); - static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" ); - static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" ); + static_assert( std::is_same< typename a_int_r1::dimension, ViewDimension<0> >::value, "" ); + static_assert( std::is_same< typename a_int_r1::non_const_value_type, int >::value, "" ); - static_assert( a_const_int_r1::dimension::rank == 1 , "" ); - static_assert( a_const_int_r1::dimension::rank_dynamic == 1 , "" ); - static_assert( std::is_same< typename a_const_int_r1::dimension , ViewDimension<0> >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); + static_assert( a_const_int_r1::dimension::rank == 1, "" ); + static_assert( a_const_int_r1::dimension::rank_dynamic == 1, "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension, ViewDimension<0> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type, int >::value, "" ); - static_assert( a_const_int_r5::dimension::rank == 5 , "" ); - static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" ); + static_assert( a_const_int_r5::dimension::rank == 5, "" ); + static_assert( a_const_int_r5::dimension::rank_dynamic == 2, "" ); - static_assert( a_const_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_const_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_const_int_r5::dimension::ArgN2 == 4 , "" ); - static_assert( a_const_int_r5::dimension::ArgN3 == 5 , "" ); - static_assert( a_const_int_r5::dimension::ArgN4 == 6 , "" ); - static_assert( a_const_int_r5::dimension::ArgN5 == 1 , "" ); + static_assert( a_const_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_const_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_const_int_r5::dimension::ArgN2 == 4, "" ); + static_assert( a_const_int_r5::dimension::ArgN3 == 5, "" ); + static_assert( a_const_int_r5::dimension::ArgN4 == 6, "" ); + static_assert( a_const_int_r5::dimension::ArgN5 == 1, "" ); - static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); - static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r5::dimension, ViewDimension<0, 0, 4, 5, 6> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r5::non_const_value_type, int >::value, "" ); - static_assert( a_int_r5::dimension::rank == 5 , "" ); - static_assert( a_int_r5::dimension::rank_dynamic == 2 , "" ); - static_assert( std::is_same< typename a_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); - static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + static_assert( a_int_r5::dimension::rank == 5, "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 2, "" ); + static_assert( std::is_same< typename a_int_r5::dimension, ViewDimension<0, 0, 4, 5, 6> >::value, "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type, int >::value, "" ); } { - using namespace Kokkos::Experimental::Impl ; + using namespace Kokkos::Experimental::Impl; - typedef int t_i4[4] ; + typedef int t_i4[4]; // Dimensions of t_i4 are appended to the multdimensional array. - typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5 ; + typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5; - static_assert( a_int_r5::dimension::rank == 5 , "" ); - static_assert( a_int_r5::dimension::rank_dynamic == 3 , "" ); - static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN2 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN3 == 3 , "" ); - static_assert( a_int_r5::dimension::ArgN4 == 4 , "" ); - static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + static_assert( a_int_r5::dimension::rank == 5, "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 3, "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN2 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN3 == 3, "" ); + static_assert( a_int_r5::dimension::ArgN4 == 4, "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type, int >::value, "" ); } { - using namespace Kokkos::Experimental::Impl ; + using namespace Kokkos::Experimental::Impl; - typedef ViewDataAnalysis< const int[] , void > a_const_int_r1 ; + typedef ViewDataAnalysis< const int[], void > a_const_int_r1; - static_assert( std::is_same< typename a_const_int_r1::specialize , void >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::dimension , Kokkos::Experimental::Impl::ViewDimension<0> >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::specialize, void >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension, Kokkos::Experimental::Impl::ViewDimension<0> >::value, "" ); - static_assert( std::is_same< typename a_const_int_r1::type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::value_type , const int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::value_type, const int >::value, "" ); - static_assert( std::is_same< typename a_const_int_r1::scalar_array_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_scalar_array_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_type , int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::scalar_array_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_scalar_array_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_type, int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type, int >::value, "" ); - typedef ViewDataAnalysis< const int**[4] , void > a_const_int_r3 ; + typedef ViewDataAnalysis< const int**[4], void > a_const_int_r3; - static_assert( std::is_same< typename a_const_int_r3::specialize , void >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::specialize, void >::value, "" ); - static_assert( std::is_same< typename a_const_int_r3::dimension , Kokkos::Experimental::Impl::ViewDimension<0,0,4> >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::dimension, Kokkos::Experimental::Impl::ViewDimension<0, 0, 4> >::value, "" ); - static_assert( std::is_same< typename a_const_int_r3::type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::scalar_array_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_scalar_array_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_type , int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_value_type , int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_scalar_array_type , int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::scalar_array_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_scalar_array_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_type, int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_value_type, int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_scalar_array_type, int**[4] >::value, "" ); - - // std::cout << "typeid(const int**[4]).name() = " << typeid(const int**[4]).name() << std::endl ; + // std::cout << "typeid( const int**[4] ).name() = " << typeid( const int**[4] ).name() << std::endl; } //---------------------------------------- { - constexpr int N = 10 ; + constexpr int N = 10; - typedef Kokkos::View T ; - typedef Kokkos::View C ; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; - int data[N] ; + int data[N]; - T vr1(data,N); // view of non-const - C cr1(vr1); // view of const from view of non-const - C cr2( (const int *) data , N ); + T vr1( data, N ); // View of non-const. + C cr1( vr1 ); // View of const from view of non-const. + C cr2( (const int *) data, N ); // Generate static_assert error: // T tmp( cr1 ); - ASSERT_EQ( vr1.span() , N ); - ASSERT_EQ( cr1.span() , N ); - ASSERT_EQ( vr1.data() , & data[0] ); - ASSERT_EQ( cr1.data() , & data[0] ); + ASSERT_EQ( vr1.span(), N ); + ASSERT_EQ( cr1.span(), N ); + ASSERT_EQ( vr1.data(), & data[0] ); + ASSERT_EQ( cr1.data(), & data[0] ); - ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type, int & >::value ) ); - ASSERT_EQ( T::Rank , 1 ); + ASSERT_EQ( T::Rank, 1 ); - ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::reference_type , const int & >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::reference_type, const int & >::value ) ); - ASSERT_EQ( C::Rank , 1 ); + ASSERT_EQ( C::Rank, 1 ); - ASSERT_EQ( vr1.dimension_0() , N ); + ASSERT_EQ( vr1.dimension_0(), N ); - if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { - for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + for ( int i = 0; i < N; ++i ) data[i] = i + 1; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 1 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( cr1[i], i + 1 ); { T tmp( vr1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 1 ); + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 2; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 2 ); } - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 2 ); } } - { - constexpr int N = 10 ; - typedef Kokkos::View T ; - typedef Kokkos::View C ; + constexpr int N = 10; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; - T vr1("vr1",N); - C cr1(vr1); + T vr1( "vr1", N ); + C cr1( vr1 ); - ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); - ASSERT_EQ( T::Rank , 1 ); - - ASSERT_EQ( vr1.dimension_0() , N ); + ASSERT_TRUE( ( std::is_same< typename T::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type, int & >::value ) ); + ASSERT_EQ( T::Rank, 1 ); - if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + ASSERT_EQ( vr1.dimension_0(), N ); + + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 1; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 1 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( cr1[i], i + 1 ); { T tmp( vr1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 1 ); + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 2; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 2 ); } - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 2 ); } } - // Testing proper handling of zero-length allocations + // Testing proper handling of zero-length allocations. { - constexpr int N = 0 ; - typedef Kokkos::View T ; - typedef Kokkos::View C ; + constexpr int N = 0; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; - T vr1("vr1",N); - C cr1(vr1); + T vr1( "vr1", N ); + C cr1( vr1 ); - ASSERT_EQ( vr1.dimension_0() , 0 ); - ASSERT_EQ( cr1.dimension_0() , 0 ); + ASSERT_EQ( vr1.dimension_0(), 0 ); + ASSERT_EQ( cr1.dimension_0(), 0 ); } - // Testing using space instance for allocation. - // The execution space of the memory space must be available for view data initialization + // The execution space of the memory space must be available for view data initialization. + if ( std::is_same< ExecSpace, typename ExecSpace::memory_space::execution_space >::value ) { - if ( std::is_same< ExecSpace , typename ExecSpace::memory_space::execution_space >::value ) { + using namespace Kokkos::Experimental; - using namespace Kokkos::Experimental ; + typedef typename ExecSpace::memory_space memory_space; + typedef View< int*, memory_space > V; - typedef typename ExecSpace::memory_space memory_space ; - typedef View V ; + constexpr int N = 10; - constexpr int N = 10 ; + memory_space mem_space; - memory_space mem_space ; - - V v( "v" , N ); - V va( view_alloc() , N ); - V vb( view_alloc( "vb" ) , N ); - V vc( view_alloc( "vc" , AllowPadding ) , N ); - V vd( view_alloc( "vd" , WithoutInitializing ) , N ); - V ve( view_alloc( "ve" , WithoutInitializing , AllowPadding ) , N ); - V vf( view_alloc( "vf" , mem_space , WithoutInitializing , AllowPadding ) , N ); - V vg( view_alloc( mem_space , "vg" , WithoutInitializing , AllowPadding ) , N ); - V vh( view_alloc( WithoutInitializing , AllowPadding ) , N ); - V vi( view_alloc( WithoutInitializing ) , N ); - V vj( view_alloc( std::string("vj") , AllowPadding ) , N ); - V vk( view_alloc( mem_space , std::string("vk") , AllowPadding ) , N ); + V v( "v", N ); + V va( view_alloc(), N ); + V vb( view_alloc( "vb" ), N ); + V vc( view_alloc( "vc", AllowPadding ), N ); + V vd( view_alloc( "vd", WithoutInitializing ), N ); + V ve( view_alloc( "ve", WithoutInitializing, AllowPadding ), N ); + V vf( view_alloc( "vf", mem_space, WithoutInitializing, AllowPadding ), N ); + V vg( view_alloc( mem_space, "vg", WithoutInitializing, AllowPadding ), N ); + V vh( view_alloc( WithoutInitializing, AllowPadding ), N ); + V vi( view_alloc( WithoutInitializing ), N ); + V vj( view_alloc( std::string( "vj" ), AllowPadding ), N ); + V vk( view_alloc( mem_space, std::string( "vk" ), AllowPadding ), N ); } { - typedef Kokkos::ViewTraits traits_t ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dims_t ; - typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride > offset_t ; + typedef Kokkos::ViewTraits< int***, Kokkos::LayoutStride, ExecSpace > traits_t; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dims_t; + typedef Kokkos::Experimental::Impl::ViewOffset< dims_t, Kokkos::LayoutStride > offset_t; - Kokkos::LayoutStride stride ; + Kokkos::LayoutStride stride; - stride.dimension[0] = 3 ; - stride.dimension[1] = 4 ; - stride.dimension[2] = 5 ; - stride.stride[0] = 4 ; - stride.stride[1] = 1 ; - stride.stride[2] = 12 ; + stride.dimension[0] = 3; + stride.dimension[1] = 4; + stride.dimension[2] = 5; + stride.stride[0] = 4; + stride.stride[1] = 1; + stride.stride[2] = 12; - const offset_t offset( std::integral_constant() , stride ); + const offset_t offset( std::integral_constant< unsigned, 0 >(), stride ); - ASSERT_EQ( offset.dimension_0() , 3 ); - ASSERT_EQ( offset.dimension_1() , 4 ); - ASSERT_EQ( offset.dimension_2() , 5 ); + ASSERT_EQ( offset.dimension_0(), 3 ); + ASSERT_EQ( offset.dimension_1(), 4 ); + ASSERT_EQ( offset.dimension_2(), 5 ); - ASSERT_EQ( offset.stride_0() , 4 ); - ASSERT_EQ( offset.stride_1() , 1 ); - ASSERT_EQ( offset.stride_2() , 12 ); + ASSERT_EQ( offset.stride_0(), 4 ); + ASSERT_EQ( offset.stride_1(), 1 ); + ASSERT_EQ( offset.stride_2(), 12 ); - ASSERT_EQ( offset.span() , 60 ); + ASSERT_EQ( offset.span(), 60 ); ASSERT_TRUE( offset.span_is_contiguous() ); - Kokkos::Experimental::Impl::ViewMapping< traits_t , void > - v( Kokkos::Experimental::Impl::ViewCtorProp((int*)0), stride ); + Kokkos::Experimental::Impl::ViewMapping< traits_t, void > + v( Kokkos::Experimental::Impl::ViewCtorProp< int* >( (int*) 0 ), stride ); } { - typedef Kokkos::View V ; - typedef typename V::HostMirror M ; - typedef typename Kokkos::View::array_layout layout_type; + typedef Kokkos::View< int**, Space > V; + typedef typename V::HostMirror M; + typedef typename Kokkos::View< int**, Space >::array_layout layout_type; - constexpr int N0 = 10 ; - constexpr int N1 = 11 ; + constexpr int N0 = 10; + constexpr int N1 = 11; - V a("a",N0,N1); - M b = Kokkos::Experimental::create_mirror(a); - M c = Kokkos::Experimental::create_mirror_view(a); - M d ; + V a( "a", N0, N1 ); + M b = Kokkos::Experimental::create_mirror( a ); + M c = Kokkos::Experimental::create_mirror_view( a ); + M d; - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - Kokkos::Experimental::deep_copy( a , b ); - Kokkos::Experimental::deep_copy( c , a ); + Kokkos::Experimental::deep_copy( a, b ); + Kokkos::Experimental::deep_copy( c, a ); - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + } - Kokkos::Experimental::resize( b , 5 , 6 ); + Kokkos::Experimental::resize( b, 5, 6 ); - for ( int i0 = 0 ; i0 < 5 ; ++i0 ) - for ( int i1 = 0 ; i1 < 6 ; ++i1 ) { + for ( int i0 = 0; i0 < 5; ++i0 ) + for ( int i1 = 0; i1 < 6; ++i1 ) + { int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); - ASSERT_EQ( b(i0,i1) , val ); + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c , 5 , 6 ); - Kokkos::Experimental::realloc( d , 5 , 6 ); + Kokkos::Experimental::realloc( c, 5, 6 ); + Kokkos::Experimental::realloc( d, 5, 6 ); - ASSERT_EQ( b.dimension_0() , 5 ); - ASSERT_EQ( b.dimension_1() , 6 ); - ASSERT_EQ( c.dimension_0() , 5 ); - ASSERT_EQ( c.dimension_1() , 6 ); - ASSERT_EQ( d.dimension_0() , 5 ); - ASSERT_EQ( d.dimension_1() , 6 ); + ASSERT_EQ( b.dimension_0(), 5 ); + ASSERT_EQ( b.dimension_1(), 6 ); + ASSERT_EQ( c.dimension_0(), 5 ); + ASSERT_EQ( c.dimension_1(), 6 ); + ASSERT_EQ( d.dimension_0(), 5 ); + ASSERT_EQ( d.dimension_1(), 6 ); - layout_type layout(7,8); - Kokkos::Experimental::resize( b , layout ); - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 6 ; i1 < 8 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + layout_type layout( 7, 8 ); + Kokkos::Experimental::resize( b, layout ); + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 6; i1 < 8; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - for ( int i0 = 5 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + for ( int i0 = 5; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , val ); + ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c , layout ); - Kokkos::Experimental::realloc( d , layout ); + Kokkos::Experimental::realloc( c, layout ); + Kokkos::Experimental::realloc( d, layout ); - ASSERT_EQ( b.dimension_0() , 7 ); - ASSERT_EQ( b.dimension_1() , 8 ); - ASSERT_EQ( c.dimension_0() , 7 ); - ASSERT_EQ( c.dimension_1() , 8 ); - ASSERT_EQ( d.dimension_0() , 7 ); - ASSERT_EQ( d.dimension_1() , 8 ); + ASSERT_EQ( b.dimension_0(), 7 ); + ASSERT_EQ( b.dimension_1(), 8 ); + ASSERT_EQ( c.dimension_0(), 7 ); + ASSERT_EQ( c.dimension_1(), 8 ); + ASSERT_EQ( d.dimension_0(), 7 ); + ASSERT_EQ( d.dimension_1(), 8 ); + } + + { + typedef Kokkos::View< int**, Kokkos::LayoutStride, Space > V; + typedef typename V::HostMirror M; + typedef typename Kokkos::View< int**, Kokkos::LayoutStride, Space >::array_layout layout_type; + + constexpr int N0 = 10; + constexpr int N1 = 11; + + const int dimensions[] = { N0, N1 }; + const int order[] = { 1, 0 }; + + V a( "a", Kokkos::LayoutStride::order_dimensions( 2, order, dimensions ) ); + M b = Kokkos::Experimental::create_mirror( a ); + M c = Kokkos::Experimental::create_mirror_view( a ); + M d; + + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } + + Kokkos::Experimental::deep_copy( a, b ); + Kokkos::Experimental::deep_copy( c, a ); + + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + } + + const int dimensions2[] = { 7, 8 }; + const int order2[] = { 1, 0 }; + layout_type layout = layout_type::order_dimensions( 2, order2, dimensions2 ); + Kokkos::Experimental::resize( b, layout ); + + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { + int val = 1 + i0 + i1 * N0; + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + ASSERT_EQ( b( i0, i1 ), val ); + } + + Kokkos::Experimental::realloc( c, layout ); + Kokkos::Experimental::realloc( d, layout ); + + ASSERT_EQ( b.dimension_0(), 7 ); + ASSERT_EQ( b.dimension_1(), 8 ); + ASSERT_EQ( c.dimension_0(), 7 ); + ASSERT_EQ( c.dimension_1(), 8 ); + ASSERT_EQ( d.dimension_0(), 7 ); + ASSERT_EQ( d.dimension_1(), 8 ); } { - typedef Kokkos::View V ; - typedef typename V::HostMirror M ; - typedef typename Kokkos::View::array_layout layout_type; + typedef Kokkos::View< int*, Space > V; + typedef Kokkos::View< int*, Space, Kokkos::MemoryUnmanaged > U; - constexpr int N0 = 10 ; - constexpr int N1 = 11 ; + V a( "a", 10 ); - const int dimensions[] = {N0,N1}; - const int order[] = {1,0}; + ASSERT_EQ( a.use_count(), 1 ); - V a("a",Kokkos::LayoutStride::order_dimensions(2,order,dimensions)); - M b = Kokkos::Experimental::create_mirror(a); - M c = Kokkos::Experimental::create_mirror_view(a); - M d ; + V b = a; - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; - - Kokkos::Experimental::deep_copy( a , b ); - Kokkos::Experimental::deep_copy( c , a ); - - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); - - const int dimensions2[] = {7,8}; - const int order2[] = {1,0}; - layout_type layout = layout_type::order_dimensions(2,order2,dimensions2); - Kokkos::Experimental::resize( b , layout ); - - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { - int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); - ASSERT_EQ( b(i0,i1) , val ); - } - - Kokkos::Experimental::realloc( c , layout ); - Kokkos::Experimental::realloc( d , layout ); - - ASSERT_EQ( b.dimension_0() , 7 ); - ASSERT_EQ( b.dimension_1() , 8 ); - ASSERT_EQ( c.dimension_0() , 7 ); - ASSERT_EQ( c.dimension_1() , 8 ); - ASSERT_EQ( d.dimension_0() , 7 ); - ASSERT_EQ( d.dimension_1() , 8 ); - - } - - { - typedef Kokkos::View V ; - typedef Kokkos::View U ; - - - V a("a",10); - - ASSERT_EQ( a.use_count() , 1 ); - - V b = a ; - - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); { - U c = b ; // 'c' is compile-time unmanaged + U c = b; // 'c' is compile-time unmanaged. - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); - ASSERT_EQ( c.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); + ASSERT_EQ( c.use_count(), 2 ); - V d = c ; // 'd' is run-time unmanaged + V d = c; // 'd' is run-time unmanaged. - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); - ASSERT_EQ( c.use_count() , 2 ); - ASSERT_EQ( d.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); + ASSERT_EQ( c.use_count(), 2 ); + ASSERT_EQ( d.use_count(), 2 ); } - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); b = V(); - ASSERT_EQ( a.use_count() , 1 ); - ASSERT_EQ( b.use_count() , 0 ); + ASSERT_EQ( a.use_count(), 1 ); + ASSERT_EQ( b.use_count(), 0 ); -#if ! defined ( KOKKOS_ENABLE_CUDA_LAMBDA ) - /* Cannot launch host lambda when CUDA lambda is enabled */ +#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) + // Cannot launch host lambda when CUDA lambda is enabled. - typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space - host_exec_space ; + typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space; - Kokkos::parallel_for( - Kokkos::RangePolicy< host_exec_space >(0,10) , - KOKKOS_LAMBDA( int i ){ - // 'a' is captured by copy and the capture mechanism - // converts 'a' to an unmanaged copy. - // When the parallel dispatch accepts a move for the lambda - // this count should become 1 - ASSERT_EQ( a.use_count() , 2 ); - V x = a ; - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( x.use_count() , 2 ); - }); -#endif /* #if ! defined ( KOKKOS_ENABLE_CUDA_LAMBDA ) */ + Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int i ) { + // 'a' is captured by copy, and the capture mechanism converts 'a' to an + // unmanaged copy. When the parallel dispatch accepts a move for the + // lambda, this count should become 1. + ASSERT_EQ( a.use_count(), 2 ); + V x = a; + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( x.use_count(), 2 ); + }); +#endif // #if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) } } template< class Space > struct TestViewMappingSubview { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; - typedef Kokkos::pair range ; + typedef Kokkos::pair< int, int > range; enum { AN = 10 }; - typedef Kokkos::View AT ; - typedef Kokkos::View ACT ; - typedef Kokkos::Subview< AT , range > AS ; + typedef Kokkos::View< int*, ExecSpace > AT; + typedef Kokkos::View< const int*, ExecSpace > ACT; + typedef Kokkos::Subview< AT, range > AS; - enum { BN0 = 10 , BN1 = 11 , BN2 = 12 }; - typedef Kokkos::View BT ; - typedef Kokkos::Subview< BT , range , range , range > BS ; + enum { BN0 = 10, BN1 = 11, BN2 = 12 }; + typedef Kokkos::View< int***, ExecSpace > BT; + typedef Kokkos::Subview< BT, range, range, range > BS; - enum { CN0 = 10 , CN1 = 11 , CN2 = 12 }; - typedef Kokkos::View CT ; - typedef Kokkos::Subview< CT , range , range , range , int , int > CS ; + enum { CN0 = 10, CN1 = 11, CN2 = 12 }; + typedef Kokkos::View< int***[13][14], ExecSpace > CT; + typedef Kokkos::Subview< CT, range, range, range, int, int > CS; - enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 }; - typedef Kokkos::View DT ; - typedef Kokkos::Subview< DT , int , range , range , range , int > DS ; + enum { DN0 = 10, DN1 = 11, DN2 = 12, DN3 = 13, DN4 = 14 }; + typedef Kokkos::View< int***[DN3][DN4], ExecSpace > DT; + typedef Kokkos::Subview< DT, int, range, range, range, int > DS; + typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT; + typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1; - typedef Kokkos::View DLT ; - typedef Kokkos::Subview< DLT , range , int , int , int , int > DLS1 ; - - static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value + static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); - typedef Kokkos::View DRT ; - typedef Kokkos::Subview< DRT , int , int , int , int , range > DRS1 ; + typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT; + typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1; - static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value + static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); - AT Aa ; - AS Ab ; - ACT Ac ; - BT Ba ; - BS Bb ; - CT Ca ; - CS Cb ; - DT Da ; - DS Db ; + AT Aa; + AS Ab; + ACT Ac; + BT Ba; + BS Bb; + CT Ca; + CS Cb; + DT Da; + DS Db; TestViewMappingSubview() - : Aa("Aa",AN) - , Ab( Kokkos::Experimental::subview( Aa , std::pair(1,AN-1) ) ) - , Ac( Aa , std::pair(1,AN-1) ) - , Ba("Ba",BN0,BN1,BN2) + : Aa( "Aa", AN ) + , Ab( Kokkos::Experimental::subview( Aa, std::pair< int, int >( 1, AN - 1 ) ) ) + , Ac( Aa, std::pair< int, int >( 1, AN - 1 ) ) + , Ba( "Ba", BN0, BN1, BN2 ) , Bb( Kokkos::Experimental::subview( Ba - , std::pair(1,BN0-1) - , std::pair(1,BN1-1) - , std::pair(1,BN2-1) + , std::pair< int, int >( 1, BN0 - 1 ) + , std::pair< int, int >( 1, BN1 - 1 ) + , std::pair< int, int >( 1, BN2 - 1 ) ) ) - , Ca("Ca",CN0,CN1,CN2) + , Ca( "Ca", CN0, CN1, CN2 ) , Cb( Kokkos::Experimental::subview( Ca - , std::pair(1,CN0-1) - , std::pair(1,CN1-1) - , std::pair(1,CN2-1) + , std::pair< int, int >( 1, CN0 - 1 ) + , std::pair< int, int >( 1, CN1 - 1 ) + , std::pair< int, int >( 1, CN2 - 1 ) , 1 , 2 ) ) - , Da("Da",DN0,DN1,DN2) + , Da( "Da", DN0, DN1, DN2 ) , Db( Kokkos::Experimental::subview( Da , 1 - , std::pair(1,DN1-1) - , std::pair(1,DN2-1) - , std::pair(1,DN3-1) + , std::pair< int, int >( 1, DN1 - 1 ) + , std::pair< int, int >( 1, DN2 - 1 ) + , std::pair< int, int >( 1, DN3 - 1 ) , 2 ) ) - { - } - + {} KOKKOS_INLINE_FUNCTION - void operator()( const int , long & error_count ) const + void operator()( const int, long & error_count ) const + { + auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa, Kokkos::pair< int, int >( 1, AN - 1 ) ); + + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ab[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ac[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ad[i - 1] ) ++error_count; + + for ( int i2 = 1; i2 < BN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < BN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < BN0 - 1; ++i0 ) { - auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa , Kokkos::pair(1,AN-1) ); - - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ab[i-1] ) ++error_count ; - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ac[i-1] ) ++error_count ; - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ad[i-1] ) ++error_count ; - - for ( int i2 = 1 ; i2 < BN2-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < BN1-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < BN0-1 ; ++i0 ) { - if ( & Ba(i0,i1,i2) != & Bb(i0-1,i1-1,i2-1) ) ++error_count ; - }}} - - for ( int i2 = 1 ; i2 < CN2-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < CN1-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < CN0-1 ; ++i0 ) { - if ( & Ca(i0,i1,i2,1,2) != & Cb(i0-1,i1-1,i2-1) ) ++error_count ; - }}} - - for ( int i2 = 1 ; i2 < DN3-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < DN2-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < DN1-1 ; ++i0 ) { - if ( & Da(1,i0,i1,i2,2) != & Db(i0-1,i1-1,i2-1) ) ++error_count ; - }}} + if ( & Ba( i0, i1, i2 ) != & Bb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; } + for ( int i2 = 1; i2 < CN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < CN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < CN0 - 1; ++i0 ) + { + if ( & Ca( i0, i1, i2, 1, 2 ) != & Cb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } + + for ( int i2 = 1; i2 < DN3 - 1; ++i2 ) + for ( int i1 = 1; i1 < DN2 - 1; ++i1 ) + for ( int i0 = 1; i0 < DN1 - 1; ++i0 ) + { + if ( & Da( 1, i0, i1, i2, 2 ) != & Db( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } + } + static void run() { - TestViewMappingSubview self ; + TestViewMappingSubview self; - ASSERT_EQ( self.Aa.dimension_0() , AN ); - ASSERT_EQ( self.Ab.dimension_0() , AN - 2 ); - ASSERT_EQ( self.Ac.dimension_0() , AN - 2 ); - ASSERT_EQ( self.Ba.dimension_0() , BN0 ); - ASSERT_EQ( self.Ba.dimension_1() , BN1 ); - ASSERT_EQ( self.Ba.dimension_2() , BN2 ); - ASSERT_EQ( self.Bb.dimension_0() , BN0 - 2 ); - ASSERT_EQ( self.Bb.dimension_1() , BN1 - 2 ); - ASSERT_EQ( self.Bb.dimension_2() , BN2 - 2 ); + ASSERT_EQ( self.Aa.dimension_0(), AN ); + ASSERT_EQ( self.Ab.dimension_0(), AN - 2 ); + ASSERT_EQ( self.Ac.dimension_0(), AN - 2 ); + ASSERT_EQ( self.Ba.dimension_0(), BN0 ); + ASSERT_EQ( self.Ba.dimension_1(), BN1 ); + ASSERT_EQ( self.Ba.dimension_2(), BN2 ); + ASSERT_EQ( self.Bb.dimension_0(), BN0 - 2 ); + ASSERT_EQ( self.Bb.dimension_1(), BN1 - 2 ); + ASSERT_EQ( self.Bb.dimension_2(), BN2 - 2 ); - ASSERT_EQ( self.Ca.dimension_0() , CN0 ); - ASSERT_EQ( self.Ca.dimension_1() , CN1 ); - ASSERT_EQ( self.Ca.dimension_2() , CN2 ); - ASSERT_EQ( self.Ca.dimension_3() , 13 ); - ASSERT_EQ( self.Ca.dimension_4() , 14 ); - ASSERT_EQ( self.Cb.dimension_0() , CN0 - 2 ); - ASSERT_EQ( self.Cb.dimension_1() , CN1 - 2 ); - ASSERT_EQ( self.Cb.dimension_2() , CN2 - 2 ); + ASSERT_EQ( self.Ca.dimension_0(), CN0 ); + ASSERT_EQ( self.Ca.dimension_1(), CN1 ); + ASSERT_EQ( self.Ca.dimension_2(), CN2 ); + ASSERT_EQ( self.Ca.dimension_3(), 13 ); + ASSERT_EQ( self.Ca.dimension_4(), 14 ); + ASSERT_EQ( self.Cb.dimension_0(), CN0 - 2 ); + ASSERT_EQ( self.Cb.dimension_1(), CN1 - 2 ); + ASSERT_EQ( self.Cb.dimension_2(), CN2 - 2 ); - ASSERT_EQ( self.Da.dimension_0() , DN0 ); - ASSERT_EQ( self.Da.dimension_1() , DN1 ); - ASSERT_EQ( self.Da.dimension_2() , DN2 ); - ASSERT_EQ( self.Da.dimension_3() , DN3 ); - ASSERT_EQ( self.Da.dimension_4() , DN4 ); + ASSERT_EQ( self.Da.dimension_0(), DN0 ); + ASSERT_EQ( self.Da.dimension_1(), DN1 ); + ASSERT_EQ( self.Da.dimension_2(), DN2 ); + ASSERT_EQ( self.Da.dimension_3(), DN3 ); + ASSERT_EQ( self.Da.dimension_4(), DN4 ); - ASSERT_EQ( self.Db.dimension_0() , DN1 - 2 ); - ASSERT_EQ( self.Db.dimension_1() , DN2 - 2 ); - ASSERT_EQ( self.Db.dimension_2() , DN3 - 2 ); + ASSERT_EQ( self.Db.dimension_0(), DN1 - 2 ); + ASSERT_EQ( self.Db.dimension_1(), DN2 - 2 ); + ASSERT_EQ( self.Db.dimension_2(), DN3 - 2 ); - ASSERT_EQ( self.Da.stride_1() , self.Db.stride_0() ); - ASSERT_EQ( self.Da.stride_2() , self.Db.stride_1() ); - ASSERT_EQ( self.Da.stride_3() , self.Db.stride_2() ); + ASSERT_EQ( self.Da.stride_1(), self.Db.stride_0() ); + ASSERT_EQ( self.Da.stride_2(), self.Db.stride_1() ); + ASSERT_EQ( self.Da.stride_3(), self.Db.stride_2() ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >(0,1) , self , error_count ); - ASSERT_EQ( error_count , 0 ); + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), self, error_count ); + ASSERT_EQ( error_count, 0 ); } - }; template< class Space > void test_view_mapping_subview() { - typedef typename Space::execution_space ExecSpace ; + typedef typename Space::execution_space ExecSpace; TestViewMappingSubview< ExecSpace >::run(); } @@ -1181,214 +1195,228 @@ struct TestViewMapOperator { static_assert( ViewType::reference_type_is_lvalue_reference , "Test only valid for lvalue reference type" ); - const ViewType v ; + const ViewType v; KOKKOS_INLINE_FUNCTION - void test_left( size_t i0 , long & error_count ) const + void test_left( size_t i0, long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v( 0, 0, 0, 0, 0, 0, 0, 0 ); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0; + + for ( size_t i7 = 0; i7 < n7; ++i7 ) + for ( size_t i6 = 0; i6 < n6; ++i6 ) + for ( size_t i5 = 0; i5 < n5; ++i5 ) + for ( size_t i4 = 0; i4 < n4; ++i4 ) + for ( size_t i3 = 0; i3 < n3; ++i3 ) + for ( size_t i2 = 0; i2 < n2; ++i2 ) + for ( size_t i1 = 0; i1 < n1; ++i1 ) { - typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); - const size_t n1 = v.dimension_1(); - const size_t n2 = v.dimension_2(); - const size_t n3 = v.dimension_3(); - const size_t n4 = v.dimension_4(); - const size_t n5 = v.dimension_5(); - const size_t n6 = v.dimension_6(); - const size_t n7 = v.dimension_7(); - - long offset = 0 ; - - for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) - for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) - for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) - for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) - for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) - for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) - for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) - { - const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; - if ( d < offset ) ++error_count ; - offset = d ; - } - - if ( v.span() <= size_t(offset) ) ++error_count ; + const long d = & v( i0, i1, i2, i3, i4, i5, i6, i7 ) - base_ptr; + if ( d < offset ) ++error_count; + offset = d; } + if ( v.span() <= size_t( offset ) ) ++error_count; + } + KOKKOS_INLINE_FUNCTION - void test_right( size_t i0 , long & error_count ) const + void test_right( size_t i0, long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v( 0, 0, 0, 0, 0, 0, 0, 0 ); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0; + + for ( size_t i1 = 0; i1 < n1; ++i1 ) + for ( size_t i2 = 0; i2 < n2; ++i2 ) + for ( size_t i3 = 0; i3 < n3; ++i3 ) + for ( size_t i4 = 0; i4 < n4; ++i4 ) + for ( size_t i5 = 0; i5 < n5; ++i5 ) + for ( size_t i6 = 0; i6 < n6; ++i6 ) + for ( size_t i7 = 0; i7 < n7; ++i7 ) { - typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); - const size_t n1 = v.dimension_1(); - const size_t n2 = v.dimension_2(); - const size_t n3 = v.dimension_3(); - const size_t n4 = v.dimension_4(); - const size_t n5 = v.dimension_5(); - const size_t n6 = v.dimension_6(); - const size_t n7 = v.dimension_7(); - - long offset = 0 ; - - for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) - for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) - for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) - for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) - for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) - for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) - for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) - { - const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; - if ( d < offset ) ++error_count ; - offset = d ; - } - - if ( v.span() <= size_t(offset) ) ++error_count ; + const long d = & v( i0, i1, i2, i3, i4, i5, i6, i7 ) - base_ptr; + if ( d < offset ) ++error_count; + offset = d; } + if ( v.span() <= size_t( offset ) ) ++error_count; + } + KOKKOS_INLINE_FUNCTION - void operator()( size_t i , long & error_count ) const - { - if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutLeft >::value ) - test_left(i,error_count); - else if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutRight >::value ) - test_right(i,error_count); + void operator()( size_t i, long & error_count ) const + { + if ( std::is_same< typename ViewType::array_layout, Kokkos::LayoutLeft >::value ) { + test_left( i, error_count ); } + else if ( std::is_same< typename ViewType::array_layout, Kokkos::LayoutRight >::value ) { + test_right( i, error_count ); + } + } - constexpr static size_t N0 = 10 ; - constexpr static size_t N1 = 9 ; - constexpr static size_t N2 = 8 ; - constexpr static size_t N3 = 7 ; - constexpr static size_t N4 = 6 ; - constexpr static size_t N5 = 5 ; - constexpr static size_t N6 = 4 ; - constexpr static size_t N7 = 3 ; + constexpr static size_t N0 = 10; + constexpr static size_t N1 = 9; + constexpr static size_t N2 = 8; + constexpr static size_t N3 = 7; + constexpr static size_t N4 = 6; + constexpr static size_t N5 = 5; + constexpr static size_t N6 = 4; + constexpr static size_t N7 = 3; - TestViewMapOperator() : v( "Test" , N0, N1, N2, N3, N4, N5, N6, N7 ) {} + TestViewMapOperator() : v( "Test", N0, N1, N2, N3, N4, N5, N6, N7 ) {} static void run() - { - TestViewMapOperator self ; + { + TestViewMapOperator self; - ASSERT_EQ( self.v.dimension_0() , ( 0 < ViewType::rank ? N0 : 1 ) ); - ASSERT_EQ( self.v.dimension_1() , ( 1 < ViewType::rank ? N1 : 1 ) ); - ASSERT_EQ( self.v.dimension_2() , ( 2 < ViewType::rank ? N2 : 1 ) ); - ASSERT_EQ( self.v.dimension_3() , ( 3 < ViewType::rank ? N3 : 1 ) ); - ASSERT_EQ( self.v.dimension_4() , ( 4 < ViewType::rank ? N4 : 1 ) ); - ASSERT_EQ( self.v.dimension_5() , ( 5 < ViewType::rank ? N5 : 1 ) ); - ASSERT_EQ( self.v.dimension_6() , ( 6 < ViewType::rank ? N6 : 1 ) ); - ASSERT_EQ( self.v.dimension_7() , ( 7 < ViewType::rank ? N7 : 1 ) ); + ASSERT_EQ( self.v.dimension_0(), ( 0 < ViewType::rank ? N0 : 1 ) ); + ASSERT_EQ( self.v.dimension_1(), ( 1 < ViewType::rank ? N1 : 1 ) ); + ASSERT_EQ( self.v.dimension_2(), ( 2 < ViewType::rank ? N2 : 1 ) ); + ASSERT_EQ( self.v.dimension_3(), ( 3 < ViewType::rank ? N3 : 1 ) ); + ASSERT_EQ( self.v.dimension_4(), ( 4 < ViewType::rank ? N4 : 1 ) ); + ASSERT_EQ( self.v.dimension_5(), ( 5 < ViewType::rank ? N5 : 1 ) ); + ASSERT_EQ( self.v.dimension_6(), ( 6 < ViewType::rank ? N6 : 1 ) ); + ASSERT_EQ( self.v.dimension_7(), ( 7 < ViewType::rank ? N7 : 1 ) ); - ASSERT_LE( self.v.dimension_0()* - self.v.dimension_1()* - self.v.dimension_2()* - self.v.dimension_3()* - self.v.dimension_4()* - self.v.dimension_5()* - self.v.dimension_6()* - self.v.dimension_7() - , self.v.span() ); + ASSERT_LE( self.v.dimension_0() * + self.v.dimension_1() * + self.v.dimension_2() * + self.v.dimension_3() * + self.v.dimension_4() * + self.v.dimension_5() * + self.v.dimension_6() * + self.v.dimension_7() + , self.v.span() ); - long error_count ; - Kokkos::RangePolicy< typename ViewType::execution_space > range(0,self.v.dimension_0()); - Kokkos::parallel_reduce( range , self , error_count ); - ASSERT_EQ( 0 , error_count ); - } + long error_count; + Kokkos::RangePolicy< typename ViewType::execution_space > range( 0, self.v.dimension_0() ); + Kokkos::parallel_reduce( range, self, error_count ); + ASSERT_EQ( 0, error_count ); + } }; - template< class Space > void test_view_mapping_operator() { - typedef typename Space::execution_space ExecSpace ; + typedef typename Space::execution_space ExecSpace; - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); - TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); + TestViewMapOperator< Kokkos::View >::run(); } /*--------------------------------------------------------------------------*/ template< class Space > struct TestViewMappingAtomic { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; - typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait ; + typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait; - typedef Kokkos::View< int * , ExecSpace > T ; - typedef Kokkos::View< int * , ExecSpace , mem_trait > T_atom ; + typedef Kokkos::View< int *, ExecSpace > T; + typedef Kokkos::View< int *, ExecSpace, mem_trait > T_atom; - T x ; - T_atom x_atom ; + T x; + T_atom x_atom; - constexpr static size_t N = 100000 ; + constexpr static size_t N = 100000; struct TagInit {}; struct TagUpdate {}; struct TagVerify {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const - { x(i) = i ; } + void operator()( const TagInit &, const int i ) const + { x( i ) = i; } KOKKOS_INLINE_FUNCTION - void operator()( const TagUpdate & , const int i ) const - { x_atom(i%2) += 1 ; } + void operator()( const TagUpdate &, const int i ) const + { x_atom( i % 2 ) += 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagVerify & , const int i , long & error_count ) const - { - if ( i < 2 ) { if ( x(i) != int(i + N / 2) ) ++error_count ; } - else { if ( x(i) != int(i) ) ++error_count ; } - } + void operator()( const TagVerify &, const int i, long & error_count ) const + { + if ( i < 2 ) { if ( x( i ) != int( i + N / 2 ) ) ++error_count; } + else { if ( x( i ) != int( i ) ) ++error_count; } + } TestViewMappingAtomic() - : x("x",N) + : x( "x", N ) , x_atom( x ) {} static void run() - { - ASSERT_TRUE( T::reference_type_is_lvalue_reference ); - ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); + { + ASSERT_TRUE( T::reference_type_is_lvalue_reference ); + ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); - TestViewMappingAtomic self ; - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagInit >(0,N) , self ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagUpdate >(0,N) , self ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagVerify >(0,N) , self , error_count ); - ASSERT_EQ( 0 , error_count ); - typename TestViewMappingAtomic::T_atom::HostMirror x_host = Kokkos::create_mirror_view(self.x); - Kokkos::deep_copy(x_host,self.x); - error_count = -1; - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify>(0,N), - [=] ( const TagVerify & , const int i , long & tmp_error_count ) { - if ( i < 2 ) { if ( x_host(i) != int(i + N / 2) ) ++tmp_error_count ; } - else { if ( x_host(i) != int(i) ) ++tmp_error_count ; } - }, error_count); - ASSERT_EQ( 0 , error_count ); - Kokkos::deep_copy(self.x,x_host); - } + TestViewMappingAtomic self; + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagInit >( 0, N ), self ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagUpdate >( 0, N ), self ); + + long error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagVerify >( 0, N ), self, error_count ); + + ASSERT_EQ( 0, error_count ); + + typename TestViewMappingAtomic::T_atom::HostMirror x_host = Kokkos::create_mirror_view( self.x ); + Kokkos::deep_copy( x_host, self.x ); + + error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify >( 0, N ), + [=] ( const TagVerify &, const int i, long & tmp_error_count ) + { + if ( i < 2 ) { + if ( x_host( i ) != int( i + N / 2 ) ) ++tmp_error_count ; + } + else { + if ( x_host( i ) != int( i ) ) ++tmp_error_count ; + } + }, error_count); + + ASSERT_EQ( 0 , error_count ); + Kokkos::deep_copy( self.x, x_host ); + } }; /*--------------------------------------------------------------------------*/ template< class Space > struct TestViewMappingClassValue { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; struct ValueType { KOKKOS_INLINE_FUNCTION @@ -1396,11 +1424,11 @@ struct TestViewMappingClassValue { { #if 0 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf("TestViewMappingClassValue construct on Cuda\n"); + printf( "TestViewMappingClassValue construct on Cuda\n" ); #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf("TestViewMappingClassValue construct on Host\n"); + printf( "TestViewMappingClassValue construct on Host\n" ); #else - printf("TestViewMappingClassValue construct unknown\n"); + printf( "TestViewMappingClassValue construct unknown\n" ); #endif #endif } @@ -1409,11 +1437,11 @@ struct TestViewMappingClassValue { { #if 0 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf("TestViewMappingClassValue destruct on Cuda\n"); + printf( "TestViewMappingClassValue destruct on Cuda\n" ); #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf("TestViewMappingClassValue destruct on Host\n"); + printf( "TestViewMappingClassValue destruct on Host\n" ); #else - printf("TestViewMappingClassValue destruct unknown\n"); + printf( "TestViewMappingClassValue destruct unknown\n" ); #endif #endif } @@ -1421,17 +1449,15 @@ struct TestViewMappingClassValue { static void run() { - using namespace Kokkos::Experimental ; + using namespace Kokkos::Experimental; + ExecSpace::fence(); { - View< ValueType , ExecSpace > a("a"); + View< ValueType, ExecSpace > a( "a" ); ExecSpace::fence(); } ExecSpace::fence(); } }; -} /* namespace Test */ - -/*--------------------------------------------------------------------------*/ - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp index 381b8786bc..d624c5dda2 100644 --- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp +++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,34 +48,29 @@ #include #include -/*--------------------------------------------------------------------------*/ - namespace Test { template< class Space > struct NestedView { - - Kokkos::View member ; + Kokkos::View< int*, Space > member; public: + KOKKOS_INLINE_FUNCTION + NestedView() : member() {} KOKKOS_INLINE_FUNCTION - NestedView() : member() - {} - - KOKKOS_INLINE_FUNCTION - NestedView & operator = ( const Kokkos::View & lhs ) - { - member = lhs ; - if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 ); - return *this ; - } + NestedView & operator=( const Kokkos::View< int*, Space > & lhs ) + { + member = lhs; + if ( member.dimension_0() ) Kokkos::atomic_add( & member( 0 ), 1 ); + return *this; + } KOKKOS_INLINE_FUNCTION ~NestedView() - { + { if ( member.dimension_0() ) { - Kokkos::atomic_add( & member(0) , -1 ); + Kokkos::atomic_add( & member( 0 ), -1 ); } } }; @@ -83,49 +78,44 @@ public: template< class Space > struct NestedViewFunctor { - Kokkos::View< NestedView * , Space > nested ; - Kokkos::View array ; + Kokkos::View< NestedView *, Space > nested; + Kokkos::View< int*, Space > array; - NestedViewFunctor( - const Kokkos::View< NestedView * , Space > & arg_nested , - const Kokkos::View & arg_array ) + NestedViewFunctor( + const Kokkos::View< NestedView *, Space > & arg_nested, + const Kokkos::View< int*, Space > & arg_array ) : nested( arg_nested ) , array( arg_array ) {} KOKKOS_INLINE_FUNCTION - void operator()( int i ) const - { nested[i] = array ; } + void operator()( int i ) const { nested[i] = array; } }; - template< class Space > void view_nested_view() { - Kokkos::View tracking("tracking",1); + Kokkos::View< int*, Space > tracking( "tracking", 1 ); - typename Kokkos::View::HostMirror - host_tracking = Kokkos::create_mirror( tracking ); + typename Kokkos::View< int*, Space >::HostMirror host_tracking = Kokkos::create_mirror( tracking ); { - Kokkos::View< NestedView * , Space > a("a_nested_view",2); + Kokkos::View< NestedView *, Space > a( "a_nested_view", 2 ); - Kokkos::parallel_for( Kokkos::RangePolicy(0,2) , NestedViewFunctor( a , tracking ) ); - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 2 , host_tracking(0) ); + Kokkos::parallel_for( Kokkos::RangePolicy< Space >( 0, 2 ), NestedViewFunctor< Space >( a, tracking ) ); + Kokkos::deep_copy( host_tracking, tracking ); + ASSERT_EQ( 2, host_tracking( 0 ) ); - Kokkos::View< NestedView * , Space > b("b_nested_view",2); - Kokkos::parallel_for( Kokkos::RangePolicy(0,2) , NestedViewFunctor( b , tracking ) ); - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 4 , host_tracking(0) ); + Kokkos::View< NestedView *, Space > b( "b_nested_view", 2 ); + Kokkos::parallel_for( Kokkos::RangePolicy< Space >( 0, 2 ), NestedViewFunctor< Space >( b, tracking ) ); + Kokkos::deep_copy( host_tracking, tracking ); + ASSERT_EQ( 4, host_tracking( 0 ) ); } - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 0 , host_tracking(0) ); + Kokkos::deep_copy( host_tracking, tracking ); + + ASSERT_EQ( 0, host_tracking( 0 ) ); } -} - -/*--------------------------------------------------------------------------*/ - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp b/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp index 09141e582c..21ae92e93c 100644 --- a/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp +++ b/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,35 +48,29 @@ #include #include -/*--------------------------------------------------------------------------*/ - namespace Test { -template< typename SpaceDst , typename SpaceSrc > +template< typename SpaceDst, typename SpaceSrc > void view_space_assign() { - Kokkos::View a = - Kokkos::View("a",1); + Kokkos::View< double*, SpaceDst > a = + Kokkos::View< double*, SpaceSrc >( "a", 1 ); - Kokkos::View b = - Kokkos::View("b",1); + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceDst > b = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "b", 1 ); - Kokkos::View c = - Kokkos::View("c",1); + Kokkos::View< double*, Kokkos::LayoutRight, SpaceDst > c = + Kokkos::View< double*, Kokkos::LayoutRight, SpaceSrc >( "c", 1 ); - Kokkos::View d = - Kokkos::View("d",1); + Kokkos::View< double*, SpaceDst, Kokkos::MemoryRandomAccess > d = + Kokkos::View< double*, SpaceSrc >( "d", 1 ); - Kokkos::View e = - Kokkos::View("e",1); + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceDst, Kokkos::MemoryRandomAccess > e = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "e", 1 ); // Rank-one layout can assign: - Kokkos::View f = - Kokkos::View("f",1); + Kokkos::View< double*, Kokkos::LayoutRight, SpaceDst > f = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "f", 1 ); } - } // namespace Test - -/*--------------------------------------------------------------------------*/ - diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 1c2575b6f6..386301b45d 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,64 +48,68 @@ #include #include -/*--------------------------------------------------------------------------*/ - namespace TestViewSubview { -template +template< class Layout, class Space > struct getView { static - Kokkos::View get(int n, int m) { - return Kokkos::View("G",n,m); + Kokkos::View< double**, Layout, Space > get( int n, int m ) { + return Kokkos::View< double**, Layout, Space >( "G", n, m ); } }; -template -struct getView { +template< class Space > +struct getView< Kokkos::LayoutStride, Space > { static - Kokkos::View get(int n, int m) { - const int rank = 2 ; + Kokkos::View< double**, Kokkos::LayoutStride, Space > get( int n, int m ) { + const int rank = 2; const int order[] = { 0, 1 }; - const unsigned dim[] = { unsigned(n), unsigned(m) }; - Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank , order , dim ); - return Kokkos::View("G",stride); + const unsigned dim[] = { unsigned( n ), unsigned( m ) }; + Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank, order, dim ); + + return Kokkos::View< double**, Kokkos::LayoutStride, Space >( "G", stride ); } }; -template +template< class ViewType, class Space > struct fill_1D { typedef typename Space::execution_space execution_space; typedef typename ViewType::size_type size_type; + ViewType a; double val; - fill_1D(ViewType a_, double val_):a(a_),val(val_) { - } + + fill_1D( ViewType a_, double val_ ) : a( a_ ), val( val_ ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int i) const { - a(i) = val; - } + void operator()( const int i ) const { a( i ) = val; } }; -template +template< class ViewType, class Space > struct fill_2D { typedef typename Space::execution_space execution_space; typedef typename ViewType::size_type size_type; + ViewType a; double val; - fill_2D(ViewType a_, double val_):a(a_),val(val_) { - } + + fill_2D( ViewType a_, double val_ ) : a( a_ ), val( val_ ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int i) const{ - for(int j = 0; j < static_cast(a.dimension_1()); j++) - a(i,j) = val; + void operator()( const int i ) const + { + for ( int j = 0; j < static_cast< int >( a.dimension_1() ); j++ ) { + a( i, j ) = val; + } } }; -template +template< class Layout, class Space > void test_auto_1d () { - typedef Kokkos::View mv_type; + typedef Kokkos::View< double**, Layout, Space > mv_type; typedef typename mv_type::size_type size_type; + const double ZERO = 0.0; const double ONE = 1.0; const double TWO = 2.0; @@ -113,359 +117,359 @@ void test_auto_1d () const size_type numRows = 10; const size_type numCols = 3; - mv_type X = getView::get(numRows, numCols); - typename mv_type::HostMirror X_h = Kokkos::create_mirror_view (X); + mv_type X = getView< Layout, Space >::get( numRows, numCols ); + typename mv_type::HostMirror X_h = Kokkos::create_mirror_view( X ); - fill_2D f1(X, ONE); - Kokkos::parallel_for(X.dimension_0(),f1); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ONE); + fill_2D< mv_type, Space > f1( X, ONE ); + Kokkos::parallel_for( X.dimension_0(), f1 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ONE ); } } - fill_2D f2(X, 0.0); - Kokkos::parallel_for(X.dimension_0(),f2); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ZERO); + fill_2D< mv_type, Space > f2( X, 0.0 ); + Kokkos::parallel_for( X.dimension_0(), f2 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ZERO ); } } - fill_2D f3(X, TWO); - Kokkos::parallel_for(X.dimension_0(),f3); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == TWO); + fill_2D< mv_type, Space > f3( X, TWO ); + Kokkos::parallel_for( X.dimension_0(), f3 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == TWO ); } } - for (size_type j = 0; j < numCols; ++j) { - auto X_j = Kokkos::subview (X, Kokkos::ALL, j); + for ( size_type j = 0; j < numCols; ++j ) { + auto X_j = Kokkos::subview( X, Kokkos::ALL, j ); - fill_1D f4(X_j, ZERO); - Kokkos::parallel_for(X_j.dimension_0(),f4); - Kokkos::deep_copy (X_h, X); - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ZERO); + fill_1D< decltype( X_j ), Space > f4( X_j, ZERO ); + Kokkos::parallel_for( X_j.dimension_0(), f4 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ZERO ); } - for (size_type jj = 0; jj < numCols; ++jj) { - auto X_jj = Kokkos::subview (X, Kokkos::ALL, jj); - fill_1D f5(X_jj, ONE); - Kokkos::parallel_for(X_jj.dimension_0(),f5); - Kokkos::deep_copy (X_h, X); - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,jj) == ONE); + for ( size_type jj = 0; jj < numCols; ++jj ) { + auto X_jj = Kokkos::subview ( X, Kokkos::ALL, jj ); + fill_1D< decltype( X_jj ), Space > f5( X_jj, ONE ); + Kokkos::parallel_for( X_jj.dimension_0(), f5 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, jj ) == ONE ); } } } } -template -void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int m) { - Kokkos::View l2d("l2d",n,m); +template< class LD, class LS, class Space > +void test_1d_strided_assignment_impl( bool a, bool b, bool c, bool d, int n, int m ) { + Kokkos::View< double**, LS, Space > l2d( "l2d", n, m ); - int col = n>2?2:0; - int row = m>2?2:0; + int col = n > 2 ? 2 : 0; + int row = m > 2 ? 2 : 0; - if(Kokkos::Impl::SpaceAccessibility::accessible) { - if(a) { - Kokkos::View l1da = Kokkos::subview(l2d,Kokkos::ALL,row); - ASSERT_TRUE( & l1da(0) == & l2d(0,row) ); - if(n>1) - ASSERT_TRUE( & l1da(1) == & l2d(1,row) ); - } - if(b && n>13) { - Kokkos::View l1db = Kokkos::subview(l2d,std::pair(2,13),row); - ASSERT_TRUE( & l1db(0) == & l2d(2,row) ); - ASSERT_TRUE( & l1db(1) == & l2d(3,row) ); - } - if(c) { - Kokkos::View l1dc = Kokkos::subview(l2d,col,Kokkos::ALL); - ASSERT_TRUE( & l1dc(0) == & l2d(col,0) ); - if(m>1) - ASSERT_TRUE( & l1dc(1) == & l2d(col,1) ); - } - if(d && m>13) { - Kokkos::View l1dd = Kokkos::subview(l2d,col,std::pair(2,13)); - ASSERT_TRUE( & l1dd(0) == & l2d(col,2) ); - ASSERT_TRUE( & l1dd(1) == & l2d(col,3) ); - } + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + if ( a ) { + Kokkos::View< double*, LD, Space > l1da = Kokkos::subview( l2d, Kokkos::ALL, row ); + ASSERT_TRUE( & l1da( 0 ) == & l2d( 0, row ) ); + if ( n > 1 ) { + ASSERT_TRUE( & l1da( 1 ) == & l2d( 1, row ) ); + } + } + + if ( b && n > 13 ) { + Kokkos::View< double*, LD, Space > l1db = Kokkos::subview( l2d, std::pair< unsigned, unsigned >( 2, 13 ), row ); + ASSERT_TRUE( & l1db( 0 ) == & l2d( 2, row ) ); + ASSERT_TRUE( & l1db( 1 ) == & l2d( 3, row ) ); + } + + if ( c ) { + Kokkos::View< double*, LD, Space > l1dc = Kokkos::subview( l2d, col, Kokkos::ALL ); + ASSERT_TRUE( & l1dc( 0 ) == & l2d( col, 0 ) ); + if( m > 1 ) { + ASSERT_TRUE( & l1dc( 1 ) == & l2d( col, 1 ) ); + } + } + + if ( d && m > 13 ) { + Kokkos::View< double*, LD, Space > l1dd = Kokkos::subview( l2d, col, std::pair< unsigned, unsigned >( 2, 13 ) ); + ASSERT_TRUE( & l1dd( 0 ) == & l2d( col, 2 ) ); + ASSERT_TRUE( & l1dd( 1 ) == & l2d( col, 3 ) ); + } } } -template +template< class Space > void test_1d_strided_assignment() { - test_1d_strided_assignment_impl(true,true,true,true,17,3); - test_1d_strided_assignment_impl(true,true,true,true,17,3); + test_1d_strided_assignment_impl< Kokkos::LayoutStride, Kokkos::LayoutLeft, Space >( true, true, true, true, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutStride, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 3 ); - test_1d_strided_assignment_impl(true,true,false,false,17,3); - test_1d_strided_assignment_impl(true,true,false,false,17,3); - test_1d_strided_assignment_impl(false,false,true,true,17,3); - test_1d_strided_assignment_impl(false,false,true,true,17,3); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( false, false, true, true, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( false, false, true, true, 17, 3 ); - test_1d_strided_assignment_impl(true,true,false,false,17,1); - test_1d_strided_assignment_impl(true,true,true,true,1,17); - test_1d_strided_assignment_impl(true,true,true,true,1,17); - test_1d_strided_assignment_impl(true,true,false,false,17,1); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 1 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 1 ); - test_1d_strided_assignment_impl(true,true,true,true,17,1); - test_1d_strided_assignment_impl(false,false,true,true,1,17); - test_1d_strided_assignment_impl(false,false,true,true,1,17); - test_1d_strided_assignment_impl(true,true,true,true,17,1); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 1 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( false, false, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( false, false, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 1 ); } template< class Space > void test_left_0() { - typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space > - view_static_8_type ; + typedef Kokkos::View< int [2][3][4][5][2][3][4][5], Kokkos::LayoutLeft, Space > view_static_8_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_static_8_type x_static_8( "x_static_left_8" ); - view_static_8_type x_static_8("x_static_left_8"); + ASSERT_TRUE( x_static_8.is_contiguous() ); - ASSERT_TRUE( x_static_8.is_contiguous() ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x_static_8, 0, 0, 0, 0, 0, 0, 0, 0 ); - Kokkos::View x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x_static_8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x_static_8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3, 0, 1, 2, 3 ); - Kokkos::View x1 = - Kokkos::subview( x_static_8, Kokkos::pair(0,2), 1, 2, 3, 0, 1, 2, 3 ); + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x_static_8(1,1,2,3,0,1,2,3) ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x_static_8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - Kokkos::View x2 = - Kokkos::subview( x_static_8, Kokkos::pair(0,2), 1, 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x_static_8( 0, 1, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,3,1,1,2,3) ); - ASSERT_TRUE( & x2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - // Kokkos::View error_2 = - Kokkos::View sx2 = - Kokkos::subview( x_static_8, 1, Kokkos::pair(0,2), 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x_static_8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x_static_8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); - Kokkos::View sx4 = - Kokkos::subview( x_static_8, 0, Kokkos::pair(0,2) /* of [3] */ - , 1, Kokkos::pair(1,3) /* of [5] */ - , 1, Kokkos::pair(0,2) /* of [3] */ - , 2, Kokkos::pair(2,4) /* of [5] */ - ); - - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + ASSERT_TRUE( ! sx4.is_contiguous() ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x_static_8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_left_1() { - typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space > - view_type ; + typedef Kokkos::View< int ****[2][3][4][5], Kokkos::LayoutLeft, Space > view_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type x8( "x_left_8", 2, 3, 4, 5 ); - view_type x8("x_left_8",2,3,4,5); + ASSERT_TRUE( x8.is_contiguous() ); - ASSERT_TRUE( x8.is_contiguous() ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x8, 0, 0, 0, 0, 0, 0, 0, 0 ); - Kokkos::View x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3, 0, 1, 2, 3 ); - Kokkos::View x1 = - Kokkos::subview( x8, Kokkos::pair(0,2), 1, 2, 3, 0, 1, 2, 3 ); + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x8(1,1,2,3,0,1,2,3) ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - Kokkos::View x2 = - Kokkos::subview( x8, Kokkos::pair(0,2), 1, 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x8( 0, 1, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,3,1,1,2,3) ); - ASSERT_TRUE( & x2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - // Kokkos::View error_2 = - Kokkos::View sx2 = - Kokkos::subview( x8, 1, Kokkos::pair(0,2), 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); - Kokkos::View sx4 = - Kokkos::subview( x8, 0, Kokkos::pair(0,2) /* of [3] */ - , 1, Kokkos::pair(1,3) /* of [5] */ - , 1, Kokkos::pair(0,2) /* of [3] */ - , 2, Kokkos::pair(2,4) /* of [5] */ - ); - - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + ASSERT_TRUE( ! sx4.is_contiguous() ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_left_2() { - typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ; + typedef Kokkos::View< int ****, Kokkos::LayoutLeft, Space > view_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility::accessible ) { + view_type x4( "x4", 2, 3, 4, 5 ); - view_type x4("x4",2,3,4,5); + ASSERT_TRUE( x4.is_contiguous() ); - ASSERT_TRUE( x4.is_contiguous() ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x4, 0, 0, 0, 0 ); - Kokkos::View x0 = Kokkos::subview( x4 , 0, 0, 0, 0 ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x4( 0, 0, 0, 0 ) ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x4(0,0,0,0) ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - Kokkos::View x1 = - Kokkos::subview( x4, Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x4( 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x4( 1, 1, 2, 3 ) ); - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x4(0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x4(1,1,2,3) ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 0, 2 ), 1 + , Kokkos::pair< int, int >( 1, 3 ), 2 ); - Kokkos::View x2 = - Kokkos::subview( x4, Kokkos::pair(0,2), 1, Kokkos::pair(1,3), 2 ); + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x4( 0, 1, 1, 2 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x4( 1, 1, 1, 2 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x4( 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x4( 1, 1, 2, 2 ) ); - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x4(0,1,1,2) ); - ASSERT_TRUE( & x2(1,0) == & x4(1,1,1,2) ); - ASSERT_TRUE( & x2(0,1) == & x4(0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x4(1,1,2,2) ); + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x4, 1, Kokkos::pair< int, int >( 0, 2 ) + , 2, Kokkos::pair< int, int >( 1, 4 ) ); - // Kokkos::View error_2 = - Kokkos::View sx2 = - Kokkos::subview( x4, 1, Kokkos::pair(0,2) - , 2, Kokkos::pair(1,4) ); + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x4( 1, 0, 2, 1 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x4( 1, 1, 2, 1 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x4( 1, 0, 2, 2 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x4( 1, 1, 2, 2 ) ); + ASSERT_TRUE( & sx2( 0, 2 ) == & x4( 1, 0, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 2 ) == & x4( 1, 1, 2, 3 ) ); - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x4(1,0,2,1) ); - ASSERT_TRUE( & sx2(1,0) == & x4(1,1,2,1) ); - ASSERT_TRUE( & sx2(0,1) == & x4(1,0,2,2) ); - ASSERT_TRUE( & sx2(1,1) == & x4(1,1,2,2) ); - ASSERT_TRUE( & sx2(0,2) == & x4(1,0,2,3) ); - ASSERT_TRUE( & sx2(1,2) == & x4(1,1,2,3) ); + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 1, 2 ) /* of [2] */ + , Kokkos::pair< int, int >( 1, 3 ) /* of [3] */ + , Kokkos::pair< int, int >( 0, 4 ) /* of [4] */ + , Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); - Kokkos::View sx4 = - Kokkos::subview( x4, Kokkos::pair(1,2) /* of [2] */ - , Kokkos::pair(1,3) /* of [3] */ - , Kokkos::pair(0,4) /* of [4] */ - , Kokkos::pair(2,4) /* of [5] */ - ); - - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x4( 1+i0, 1+i1, 0+i2, 2+i3 ) ); - } + ASSERT_TRUE( ! sx4.is_contiguous() ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x4( 1 + i0, 1 + i1, 0 + i2, 2 + i3 ) ); + } } } template< class Space > void test_left_3() { - typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ; + typedef Kokkos::View< int **, Kokkos::LayoutLeft, Space > view_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type xm( "x4", 10, 5 ); - view_type xm("x4",10,5); + ASSERT_TRUE( xm.is_contiguous() ); - ASSERT_TRUE( xm.is_contiguous() ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( xm, 5, 3 ); - Kokkos::View x0 = Kokkos::subview( xm , 5, 3 ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm( 5, 3 ) ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & xm(5,3) ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = Kokkos::subview( xm, Kokkos::ALL, 3 ); - Kokkos::View x1 = - Kokkos::subview( xm, Kokkos::ALL, 3 ); + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0; i < int( xm.dimension_0() ); ++i ) { + ASSERT_TRUE( & x1( i ) == & xm( i, 3 ) ); + } - ASSERT_TRUE( x1.is_contiguous() ); - for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x1(i) == & xm(i,3) ); - } + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( xm, Kokkos::pair< int, int >( 1, 9 ), Kokkos::ALL ); - Kokkos::View x2 = - Kokkos::subview( xm, Kokkos::pair(1,9), Kokkos::ALL ); + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0; j < int( x2.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2( i, j ) == & xm( 1 + i, j ) ); + } - ASSERT_TRUE( ! x2.is_contiguous() ); - for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2(i,j) == & xm(1+i,j) ); - } + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2c = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 2, 4 ) ); - Kokkos::View x2c = - Kokkos::subview( xm, Kokkos::ALL, std::pair(2,4) ); + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0; j < int( x2c.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2c.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2c( i, j ) == & xm( i, 2 + j ) ); + } - ASSERT_TRUE( x2c.is_contiguous() ); - for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2c(i,j) == & xm(i,2+j) ); - } + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2_n1 = + Kokkos::subview( xm, std::pair< int, int >( 1, 1 ), Kokkos::ALL ); - Kokkos::View x2_n1 = - Kokkos::subview( xm , std::pair(1,1) , Kokkos::ALL ); + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - ASSERT_TRUE( x2_n1.dimension_0() == 0 ); - ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - - Kokkos::View x2_n2 = - Kokkos::subview( xm , Kokkos::ALL , std::pair(1,1) ); - - ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); - ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2_n2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 1, 1 ) ); + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); } } @@ -474,766 +478,814 @@ void test_left_3() template< class Space > void test_right_0() { - typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space > - view_static_8_type ; + typedef Kokkos::View< int [2][3][4][5][2][3][4][5], Kokkos::LayoutRight, Space > view_static_8_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility::accessible ) { + view_static_8_type x_static_8( "x_static_right_8" ); - view_static_8_type x_static_8("x_static_right_8"); + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( x_static_8, 0, 0, 0, 0, 0, 0, 0, 0 ); - Kokkos::View x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + ASSERT_TRUE( & x0() == & x_static_8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = + Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - Kokkos::View x1 = - Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair(1,3) ); + ASSERT_TRUE( x1.dimension_0() == 2 ); + ASSERT_TRUE( & x1( 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x1( 1 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 2 ) ); - ASSERT_TRUE( x1.dimension_0() == 2 ); - ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,1) ); - ASSERT_TRUE( & x1(1) == & x_static_8(0,1,2,3,0,1,2,2) ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) + , 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - Kokkos::View x2 = - Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair(1,3) - , 0, 1, 2, Kokkos::pair(1,3) ); + ASSERT_TRUE( x2.dimension_0() == 2 ); + ASSERT_TRUE( x2.dimension_1() == 2 ); + ASSERT_TRUE( & x2( 0, 0 ) == & x_static_8( 0, 1, 2, 1, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x_static_8( 0, 1, 2, 2, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x_static_8( 0, 1, 2, 1, 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x_static_8( 0, 1, 2, 2, 0, 1, 2, 2 ) ); - ASSERT_TRUE( x2.dimension_0() == 2 ); - ASSERT_TRUE( x2.dimension_1() == 2 ); - ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,1,0,1,2,1) ); - ASSERT_TRUE( & x2(1,0) == & x_static_8(0,1,2,2,0,1,2,1) ); - ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,1,0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x_static_8(0,1,2,2,0,1,2,2) ); + // Kokkos::View< int**, Kokkos::LayoutRight, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - // Kokkos::View error_2 = - Kokkos::View sx2 = - Kokkos::subview( x_static_8, 1, Kokkos::pair(0,2), 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( sx2.dimension_0() == 2 ); + ASSERT_TRUE( sx2.dimension_1() == 2 ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x_static_8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x_static_8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( sx2.dimension_0() == 2 ); - ASSERT_TRUE( sx2.dimension_1() == 2 ); - ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); - - Kokkos::View sx4 = - Kokkos::subview( x_static_8, 0, Kokkos::pair(0,2) /* of [3] */ - , 1, Kokkos::pair(1,3) /* of [5] */ - , 1, Kokkos::pair(0,2) /* of [3] */ - , 2, Kokkos::pair(2,4) /* of [5] */ - ); - - ASSERT_TRUE( sx4.dimension_0() == 2 ); - ASSERT_TRUE( sx4.dimension_1() == 2 ); - ASSERT_TRUE( sx4.dimension_2() == 2 ); - ASSERT_TRUE( sx4.dimension_3() == 2 ); - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0, 0+i0, 1, 1+i1, 1, 0+i2, 2, 2+i3) ); - } + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); + ASSERT_TRUE( sx4.dimension_0() == 2 ); + ASSERT_TRUE( sx4.dimension_1() == 2 ); + ASSERT_TRUE( sx4.dimension_2() == 2 ); + ASSERT_TRUE( sx4.dimension_3() == 2 ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x_static_8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_right_1() { - typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space > - view_type ; + typedef Kokkos::View< int ****[2][3][4][5], Kokkos::LayoutRight, Space > view_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility::accessible ) { + view_type x8( "x_right_8", 2, 3, 4, 5 ); - view_type x8("x_right_8",2,3,4,5); + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( x8, 0, 0, 0, 0, 0, 0, 0, 0 ); - Kokkos::View x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + ASSERT_TRUE( & x0() == & x8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = + Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - Kokkos::View x1 = - Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair(1,3) ); + ASSERT_TRUE( & x1( 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x1( 1 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 2 ) ); - ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,1) ); - ASSERT_TRUE( & x1(1) == & x8(0,1,2,3,0,1,2,2) ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( x8, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) + , 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - Kokkos::View x2 = - Kokkos::subview( x8, 0, 1, 2, Kokkos::pair(1,3) - , 0, 1, 2, Kokkos::pair(1,3) ); + ASSERT_TRUE( & x2( 0, 0 ) == & x8( 0, 1, 2, 1, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x8( 0, 1, 2, 2, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x8( 0, 1, 2, 1, 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x8( 0, 1, 2, 2, 0, 1, 2, 2 ) ); - ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,1,0,1,2,1) ); - ASSERT_TRUE( & x2(1,0) == & x8(0,1,2,2,0,1,2,1) ); - ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,1,0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x8(0,1,2,2,0,1,2,2) ); + // Kokkos::View< int**, Kokkos::LayoutRight, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - // Kokkos::View error_2 = - Kokkos::View sx2 = - Kokkos::subview( x8, 1, Kokkos::pair(0,2), 2, 3 - , Kokkos::pair(0,2), 1, 2, 3 ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); - - Kokkos::View sx4 = - Kokkos::subview( x8, 0, Kokkos::pair(0,2) /* of [3] */ - , 1, Kokkos::pair(1,3) /* of [5] */ - , 1, Kokkos::pair(0,2) /* of [3] */ - , 2, Kokkos::pair(2,4) /* of [5] */ - ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_right_3() { - typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ; + typedef Kokkos::View< int **, Kokkos::LayoutRight, Space > view_type; - if(Kokkos::Impl::SpaceAccessibility::accessible) { + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type xm( "x4", 10, 5 ); - view_type xm("x4",10,5); + ASSERT_TRUE( xm.is_contiguous() ); - ASSERT_TRUE( xm.is_contiguous() ); + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( xm, 5, 3 ); - Kokkos::View x0 = Kokkos::subview( xm , 5, 3 ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm( 5, 3 ) ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & xm(5,3) ); + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = Kokkos::subview( xm, 3, Kokkos::ALL ); - Kokkos::View x1 = - Kokkos::subview( xm, 3, Kokkos::ALL ); + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0; i < int( xm.dimension_1() ); ++i ) { + ASSERT_TRUE( & x1( i ) == & xm( 3, i ) ); + } - ASSERT_TRUE( x1.is_contiguous() ); - for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) { - ASSERT_TRUE( & x1(i) == & xm(3,i) ); - } + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2c = + Kokkos::subview( xm, Kokkos::pair< int, int >( 1, 9 ), Kokkos::ALL ); - Kokkos::View x2c = - Kokkos::subview( xm, Kokkos::pair(1,9), Kokkos::ALL ); + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0; j < int( x2c.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2c.dimension_0() ); ++i ) { + ASSERT_TRUE( & x2c( i, j ) == & xm( 1 + i, j ) ); + } - ASSERT_TRUE( x2c.is_contiguous() ); - for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2c(i,j) == & xm(1+i,j) ); - } + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 2, 4 ) ); - Kokkos::View x2 = - Kokkos::subview( xm, Kokkos::ALL, std::pair(2,4) ); + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0; j < int( x2.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2( i, j ) == & xm( i, 2 + j ) ); + } - ASSERT_TRUE( ! x2.is_contiguous() ); - for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2(i,j) == & xm(i,2+j) ); - } + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2_n1 = + Kokkos::subview( xm, std::pair< int, int >( 1, 1 ), Kokkos::ALL ); - Kokkos::View x2_n1 = - Kokkos::subview( xm , std::pair(1,1) , Kokkos::ALL ); + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - ASSERT_TRUE( x2_n1.dimension_0() == 0 ); - ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - - Kokkos::View x2_n2 = - Kokkos::subview( xm , Kokkos::ALL , std::pair(1,1) ); - - ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); - ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2_n2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 1, 1 ) ); + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); } } namespace Impl { -constexpr int N0=113; -constexpr int N1=11; -constexpr int N2=17; -constexpr int N3=5; -constexpr int N4=7; +constexpr int N0 = 113; +constexpr int N1 = 11; +constexpr int N2 = 17; +constexpr int N3 = 5; +constexpr int N4 = 7; -template -void test_Check1D(SubView a, View b, std::pair range) { +template< class SubView, class View > +void test_Check1D( SubView a, View b, std::pair< int, int > range ) { int errors = 0; - for(int i=0;i0) - std::cout << "Error Suviews test_Check1D: " << errors < 0 ) { + std::cout << "Error Suviews test_Check1D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template -void test_Check1D2D(SubView a, View b, int i0, std::pair range) { +template< class SubView, class View > +void test_Check1D2D( SubView a, View b, int i0, std::pair< int, int > range ) { int errors = 0; - for(int i1=0;i10) - std::cout << "Error Suviews test_Check1D2D: " << errors < 0 ) { + std::cout << "Error Suviews test_Check1D2D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template -void test_Check2D3D(SubView a, View b, int i0, std::pair range1, std::pair range2) { +template< class SubView, class View > +void test_Check2D3D( SubView a, View b, int i0, std::pair< int, int > range1 + , std::pair< int, int > range2 ) +{ int errors = 0; - for(int i1=0;i10) - std::cout << "Error Suviews test_Check2D3D: " << errors < 0 ) { + std::cout << "Error Suviews test_Check2D3D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template -void test_Check3D5D(SubView a, View b, int i0, int i1, std::pair range2, std::pair range3, std::pair range4) { +template +void test_Check3D5D( SubView a, View b, int i0, int i1, std::pair< int, int > range2 + , std::pair< int, int > range3, std::pair< int, int > range4 ) +{ int errors = 0; - for(int i2=0;i20) - std::cout << "Error Suviews test_Check3D5D: " << errors < 0 ) { + std::cout << "Error Suviews test_Check3D5D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_1d_assign_impl() { - - { //Breaks - Kokkos::View a_org("A",N0); - Kokkos::View a(a_org); + { // Breaks. + Kokkos::View< int*, LayoutOrg, Space > a_org( "A", N0 ); + Kokkos::View< int*, LayoutOrg, Space, MemTraits > a( a_org ); Kokkos::fence(); - for(int i=0; i a1(a); + Kokkos::View< int[N0], Layout, Space, MemTraits > a1( a ); Kokkos::fence(); - test_Check1D(a1,a,std::pair(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); - Kokkos::View a2(a1); + Kokkos::View< int[N0], LayoutSub, Space, MemTraits > a2( a1 ); Kokkos::fence(); - test_Check1D(a2,a,std::pair(0,N0)); + test_Check1D( a2, a, std::pair< int, int >( 0, N0 ) ); a1 = a; - test_Check1D(a1,a,std::pair(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); - //Runtime Fail expected - //Kokkos::View afail1(a); + // Runtime Fail expected. + //Kokkos::View< int[N1] > afail1( a ); - //Compile Time Fail expected - //Kokkos::View afail2(a1); + // Compile Time Fail expected. + //Kokkos::View< int[N1] > afail2( a1 ); } - { // Works - Kokkos::View a("A"); - Kokkos::View a1(a); + { // Works. + Kokkos::View< int[N0], LayoutOrg, Space, MemTraits > a( "A" ); + Kokkos::View< int*, Layout, Space, MemTraits > a1( a ); Kokkos::fence(); - test_Check1D(a1,a,std::pair(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); a1 = a; Kokkos::fence(); - test_Check1D(a1,a,std::pair(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); } } -template +template< class Space, class Type, class TypeSub, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_2d_subview_3d_impl_type() { - Kokkos::View a_org("A",N0,N1,N2); - Kokkos::View a(a_org); - for(int i0=0; i0 a1; - a1 = Kokkos::subview(a,3,Kokkos::ALL,Kokkos::ALL); - Kokkos::fence(); - test_Check2D3D(a1,a,3,std::pair(0,N1),std::pair(0,N2)); + Kokkos::View< int***, LayoutOrg, Space > a_org( "A", N0, N1, N2 ); + Kokkos::View< Type, Layout, Space, MemTraits > a( a_org ); - Kokkos::View a2(a,3,Kokkos::ALL,Kokkos::ALL); + for ( int i0 = 0; i0 < N0; i0++ ) + for ( int i1 = 0; i1 < N1; i1++ ) + for ( int i2 = 0; i2 < N2; i2++ ) + { + a_org( i0, i1, i2 ) = i0 * 1000000 + i1 * 1000 + i2; + } + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a1; + a1 = Kokkos::subview( a, 3, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check2D3D(a2,a,3,std::pair(0,N1),std::pair(0,N2)); + test_Check2D3D( a1, a, 3, std::pair< int, int >( 0, N1 ), std::pair< int, int >( 0, N2 ) ); + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a2( a, 3, Kokkos::ALL, Kokkos::ALL ); + Kokkos::fence(); + test_Check2D3D( a2, a, 3, std::pair< int, int >( 0, N1 ), std::pair< int, int >( 0, N2 ) ); } -template +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_2d_subview_3d_impl_layout() { - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, int** [N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int** [N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int** [N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, int*** , int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int*** , int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int*** , int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); - test_2d_subview_3d_impl_type(); + test_2d_subview_3d_impl_type< Space, const int*** , const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int*** , const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int*** , const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); } -template +template< class Space, class Type, class TypeSub, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_3d_subview_5d_impl_type() { - Kokkos::View a_org("A",N0,N1,N2,N3,N4); - Kokkos::View a(a_org); - for(int i0=0; i0 a1; - a1 = Kokkos::subview(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); - Kokkos::fence(); - test_Check3D5D(a1,a,3,5,std::pair(0,N2),std::pair(0,N3),std::pair(0,N4)); + Kokkos::View< int*****, LayoutOrg, Space > a_org( "A", N0, N1, N2, N3, N4 ); + Kokkos::View< Type, Layout, Space, MemTraits > a( a_org ); - Kokkos::View a2(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); + for ( int i0 = 0; i0 < N0; i0++ ) + for ( int i1 = 0; i1 < N1; i1++ ) + for ( int i2 = 0; i2 < N2; i2++ ) + for ( int i3 = 0; i3 < N3; i3++ ) + for ( int i4 = 0; i4 < N4; i4++ ) + { + a_org( i0, i1, i2, i3, i4 ) = i0 * 1000000 + i1 * 10000 + i2 * 100 + i3 * 10 + i4; + } + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a1; + a1 = Kokkos::subview( a, 3, 5, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check3D5D(a2,a,3,5,std::pair(0,N2),std::pair(0,N3),std::pair(0,N4)); + test_Check3D5D( a1, a, 3, 5, std::pair< int, int >( 0, N2 ), std::pair< int, int >( 0, N3 ), std::pair< int, int >( 0, N4 ) ); + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a2( a, 3, 5, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL ); + Kokkos::fence(); + test_Check3D5D( a2, a, 3, 5, std::pair< int, int >( 0, N2 ), std::pair< int, int >( 0, N3 ), std::pair< int, int >( 0, N4 ) ); } -template +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_3d_subview_5d_impl_layout() { - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, int***** , int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); - test_3d_subview_5d_impl_type(); + test_3d_subview_5d_impl_type< Space, const int***** , const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); } inline void test_subview_legal_args_right() { - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair>::value)); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); } inline void test_subview_legal_args_left() { - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t,int,int>::value)); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t,int>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,int,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::pair>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime,Kokkos::pair,Kokkos::pair>::value)); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); } -} +} // namespace Impl -template< class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_1d_assign() { - Impl::test_1d_assign_impl(); - //Impl::test_1d_assign_impl(); - Impl::test_1d_assign_impl(); - //Impl::test_1d_assign_impl(); - Impl::test_1d_assign_impl(); - Impl::test_1d_assign_impl(); - //Impl::test_1d_assign_impl(); - //Impl::test_1d_assign_impl(); - Impl::test_1d_assign_impl(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutRight, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutStride, Kokkos::LayoutLeft >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutStride, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } -template +template< class Space, class MemTraits = void > void test_2d_subview_3d() { - Impl::test_2d_subview_3d_impl_layout(); - Impl::test_2d_subview_3d_impl_layout(); - Impl::test_2d_subview_3d_impl_layout(); - Impl::test_2d_subview_3d_impl_layout(); - Impl::test_2d_subview_3d_impl_layout(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } -template +template< class Space, class MemTraits = void > void test_3d_subview_5d_right() { - Impl::test_3d_subview_5d_impl_layout(); - Impl::test_3d_subview_5d_impl_layout(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutRight, MemTraits >(); } -template +template< class Space, class MemTraits = void > void test_3d_subview_5d_left() { - Impl::test_3d_subview_5d_impl_layout(); - Impl::test_3d_subview_5d_impl_layout(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } - - namespace Impl { - template - struct FillView_3D { - Kokkos::View a; +template< class Layout, class Space > +struct FillView_3D { + Kokkos::View< int***, Layout, Space > a; - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same::value ? - ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()); - const int j = std::is_same::value ? - (ii / a.dimension_0()) % a.dimension_1() : (ii / a.dimension_2()) % a.dimension_1(); - const int k = std::is_same::value ? - ii / (a.dimension_0() * a.dimension_1()) : ii % a.dimension_2(); - a(i,j,k) = 1000000 * i + 1000 * j + k; + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const + { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % a.dimension_0() + : ii / ( a.dimension_1() * a.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / a.dimension_0() ) % a.dimension_1() + : ( ii / a.dimension_2() ) % a.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( a.dimension_0() * a.dimension_1() ) + : ii % a.dimension_2(); + + a( i, j, k ) = 1000000 * i + 1000 * j + k; + } +}; + +template< class Layout, class Space > +struct FillView_4D { + Kokkos::View< int****, Layout, Space > a; + + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % a.dimension_0() + : ii / ( a.dimension_1() * a.dimension_2() * a.dimension_3() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / a.dimension_0() ) % a.dimension_1() + : ( ii / ( a.dimension_2() * a.dimension_3() ) % a.dimension_1() ); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ( ii / ( a.dimension_0() * a.dimension_1() ) ) % a.dimension_2() + : ( ii / a.dimension_3() ) % a.dimension_2(); + + const int l = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( a.dimension_0() * a.dimension_1() * a.dimension_2() ) + : ii % a.dimension_3(); + + a( i, j, k, l ) = 1000000 * i + 10000 * j + 100 * k + l; + } +}; + +template< class Layout, class Space, class MemTraits > +struct CheckSubviewCorrectness_3D_3D { + Kokkos::View< const int***, Layout, Space, MemTraits > a; + Kokkos::View< const int***, Layout, Space, MemTraits > b; + int offset_0, offset_2; + + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const + { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % b.dimension_0() + : ii / ( b.dimension_1() * b.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / b.dimension_0() ) % b.dimension_1() + : ( ii / b.dimension_2() ) % b.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( b.dimension_0() * b.dimension_1() ) + : ii % b.dimension_2(); + + if ( a( i + offset_0, j, k + offset_2 ) != b( i, j, k ) ) { + Kokkos::abort( "Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)" ); } - }; + } +}; - template - struct FillView_4D { - Kokkos::View a; +template< class Layout, class Space, class MemTraits > +struct CheckSubviewCorrectness_3D_4D { + Kokkos::View< const int****, Layout, Space, MemTraits > a; + Kokkos::View< const int***, Layout, Space, MemTraits > b; + int offset_0, offset_2, index; - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same::value ? - ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()*a.dimension_3()); - const int j = std::is_same::value ? - (ii / a.dimension_0()) % a.dimension_1() : (ii / (a.dimension_2()*a.dimension_3()) % a.dimension_1()); - const int k = std::is_same::value ? - (ii / (a.dimension_0() * a.dimension_1())) % a.dimension_2() : (ii / a.dimension_3()) % a.dimension_2(); - const int l = std::is_same::value ? - ii / (a.dimension_0() * a.dimension_1() * a.dimension_2()) : ii % a.dimension_3(); - a(i,j,k,l) = 1000000 * i + 10000 * j + 100 * k + l; + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % b.dimension_0() + : ii / ( b.dimension_1() * b.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / b.dimension_0() ) % b.dimension_1() + : ( ii / b.dimension_2() ) % b.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( b.dimension_0() * b.dimension_1() ) + : ii % b.dimension_2(); + + int i0, i1, i2, i3; + + if ( std::is_same< Layout, Kokkos::LayoutLeft >::value ) { + i0 = i + offset_0; + i1 = j; + i2 = k + offset_2; + i3 = index; } - }; - - template - struct CheckSubviewCorrectness_3D_3D { - Kokkos::View a; - Kokkos::View b; - int offset_0,offset_2; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same::value ? - ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); - const int j = std::is_same::value ? - (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); - const int k = std::is_same::value ? - ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); - if( a(i+offset_0,j,k+offset_2) != b(i,j,k)) - Kokkos::abort("Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + else { + i0 = index; + i1 = i + offset_0; + i2 = j; + i3 = k + offset_2; } - }; - template - struct CheckSubviewCorrectness_3D_4D { - Kokkos::View a; - Kokkos::View b; - int offset_0,offset_2,index; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same::value ? - ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); - const int j = std::is_same::value ? - (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); - const int k = std::is_same::value ? - ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); - - int i0,i1,i2,i3; - if(std::is_same::value) { - i0 = i + offset_0; - i1 = j; - i2 = k + offset_2; - i3 = index; - } else { - i0 = index; - i1 = i + offset_0; - i2 = j; - i3 = k + offset_2; - } - if( a(i0,i1,i2,i3) != b(i,j,k)) - Kokkos::abort("Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + if ( a( i0, i1, i2, i3 ) != b( i, j, k ) ) { + Kokkos::abort( "Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)" ); } - }; -} + } +}; -template +} // namespace Impl + +template< class Space, class MemTraits = void > void test_layoutleft_to_layoutleft() { Impl::test_subview_legal_args_left(); { - Kokkos::View a("A",100,4,3); - Kokkos::View b(a,Kokkos::pair(16,32),Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 3 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::ALL ); - Impl::FillView_3D fill; + Impl::FillView_3D< Kokkos::LayoutLeft, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 0; - Kokkos::parallel_for(Kokkos::RangePolicy(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } + { - Kokkos::View a("A",100,4,5); - Kokkos::View b(a,Kokkos::pair(16,32),Kokkos::ALL,Kokkos::pair(1,3)); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 5 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::pair< int, int >( 1, 3 ) ); - Impl::FillView_3D fill; + Impl::FillView_3D fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } + { - Kokkos::View a("A",100,4,5,3); - Kokkos::View b(a,Kokkos::pair(16,32),Kokkos::ALL,Kokkos::pair(1,3),1); + Kokkos::View< int****, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 5, 3 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::pair< int, int >( 1, 3 ), 1 ); - Impl::FillView_4D fill; + Impl::FillView_4D< Kokkos::LayoutLeft, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) * a.extent( 3 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_4D check; + Impl::CheckSubviewCorrectness_3D_4D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 1; check.index = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } } -template +template< class Space, class MemTraits = void > void test_layoutright_to_layoutright() { Impl::test_subview_legal_args_right(); { - Kokkos::View a("A",100,4,3); - Kokkos::View b(a,Kokkos::pair(16,32),Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int***, Kokkos::LayoutRight, Space > a( "A", 100, 4, 3 ); + Kokkos::View< int***, Kokkos::LayoutRight, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::ALL ); - Impl::FillView_3D fill; + Impl::FillView_3D fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutRight, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 0; - Kokkos::parallel_for(Kokkos::RangePolicy(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } + { - Kokkos::View a("A",3,4,5,100); - Kokkos::View b(a,1,Kokkos::pair(1,3),Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int****, Kokkos::LayoutRight, Space > a( "A", 3, 4, 5, 100 ); + Kokkos::View< int***, Kokkos::LayoutRight, Space > b( a, 1, Kokkos::pair< int, int >( 1, 3 ), Kokkos::ALL, Kokkos::ALL ); - - Impl::FillView_4D fill; + Impl::FillView_4D< Kokkos::LayoutRight, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) * a.extent( 3 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_4D check; + Impl::CheckSubviewCorrectness_3D_4D< Kokkos::LayoutRight, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 1; check.offset_2 = 0; check.index = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } } - -} -//---------------------------------------------------------------------------- - +} // namespace TestViewSubview diff --git a/lib/kokkos/core/unit_test/UnitTestMain.cpp b/lib/kokkos/core/unit_test/UnitTestMain.cpp index f952ab3db5..4f52fc9567 100644 --- a/lib/kokkos/core/unit_test/UnitTestMain.cpp +++ b/lib/kokkos/core/unit_test/UnitTestMain.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include -int main(int argc, char *argv[]) { - ::testing::InitGoogleTest(&argc,argv); +int main( int argc, char *argv[] ) { + ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); } - diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp index 36b9b0688b..768b039204 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp @@ -40,31 +40,25 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_CUDA_HPP #define KOKKOS_TEST_CUDA_HPP + #include #include - #include #include - -//---------------------------------------------------------------------------- - #include #include - - #include #include #include #include #include #include - #include - #include #include #include @@ -73,20 +67,16 @@ #include #include #include - - #include #include #include #include - #include - #include namespace Test { -// For Some Reason I can only have the definition of SetUp and TearDown in one cpp file ... +// For some reason I can only have the definition of SetUp and TearDown in one cpp file ... class cuda : public ::testing::Test { protected: static void SetUpTestCase(); @@ -95,17 +85,19 @@ protected: #ifdef TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN void cuda::SetUpTestCase() - { - Kokkos::Cuda::print_configuration( std::cout ); - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); - } +{ + Kokkos::print_configuration( std::cout ); + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( 0 ) ); +} void cuda::TearDownTestCase() - { - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); - } -#endif +{ + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); } #endif + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp index ff379dc805..7cf19b26d1 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp @@ -40,164 +40,164 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , atomics ) +TEST_F( cuda, atomics ) { - const int loop_count = 1e3 ; + const int loop_count = 1e3; - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Cuda >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Cuda>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 3 ) ) ); } -TEST_F( cuda , atomic_operations ) +TEST_F( cuda, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 4 ) ) ); } } -TEST_F( cuda , atomic_views_integral ) +TEST_F( cuda, atomic_views_integral ) { const long length = 1000000; + { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 8 ) ) ); + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 8 ) ) ); } } -TEST_F( cuda , atomic_views_nonintegral ) +TEST_F( cuda, atomic_views_nonintegral ) { const long length = 1000000; - { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 4 ) ) ); + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 4 ) ) ); } } - -TEST_F( cuda , atomic_view_api ) +TEST_F( cuda, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Cuda >(); } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp index aeaa2a0e81..e655193a51 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -40,63 +40,68 @@ // ************************************************************************ //@HEADER */ + #define TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN + #include namespace Test { -TEST_F( cuda , init ) { +TEST_F( cuda, init ) +{ ; } -TEST_F( cuda , md_range ) { - TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100); +TEST_F( cuda , mdrange_for ) { + TestMDRange_2D< Kokkos::Cuda >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Cuda >::test_for3( 100, 100, 100 ); + TestMDRange_4D< Kokkos::Cuda >::test_for4( 100, 10, 100, 10 ); + TestMDRange_5D< Kokkos::Cuda >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Cuda >::test_for6( 100, 10, 5, 2, 10, 5 ); } -TEST_F( cuda, policy_construction) { +TEST_F( cuda, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Cuda >(); TestTeamPolicyConstruction< Kokkos::Cuda >(); } -TEST_F( cuda , range_tag ) +TEST_F( cuda, range_tag ) { - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(0); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 0 ); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(2); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(2); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(2); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 2 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 2 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 2 ); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(3); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 3 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 3 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 3 ); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(1000); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 1000 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 1000 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 1000 ); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_for(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule >::test_scan(1001); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_for( 1001 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 1001 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule >::test_scan( 1001 ); } - //---------------------------------------------------------------------------- -TEST_F( cuda , compiler_macros ) +TEST_F( cuda, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); } //---------------------------------------------------------------------------- -TEST_F( cuda , memory_pool ) +TEST_F( cuda, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Cuda >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -110,24 +115,24 @@ TEST_F( cuda , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( cuda , task_fib ) +TEST_F( cuda, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Cuda >::run(i, (i+1)*(i+1)*10000 ); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Cuda >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); } } -TEST_F( cuda , task_depend ) +TEST_F( cuda, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run( i ); } } -TEST_F( cuda , task_team ) +TEST_F( cuda, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -135,55 +140,55 @@ TEST_F( cuda , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) -TEST_F( cuda , cxx11 ) +TEST_F( cuda, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Cuda >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Cuda >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 4 ) ) ); } } #endif TEST_F( cuda, tile_layout ) { - TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 9, 10 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 9, 9 ); - TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Cuda, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 4, 2 >( 9, 9 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 11 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 11 ); } -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if defined (KOKKOS_COMPILER_CLANG) -TEST_F( cuda , dispatch ) +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if defined( KOKKOS_COMPILER_CLANG ) +TEST_F( cuda, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } #endif #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp index b9ab9fe72d..01eed4e023 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp @@ -40,17 +40,17 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , reducers ) +TEST_F( cuda, reducers ) { - TestReducers::execute_integer(); - TestReducers::execute_integer(); - TestReducers::execute_float(); - TestReducers, Kokkos::Cuda>::execute_basic(); + TestReducers< int, Kokkos::Cuda >::execute_integer(); + TestReducers< size_t, Kokkos::Cuda >::execute_integer(); + TestReducers< double, Kokkos::Cuda >::execute_float(); + TestReducers< Kokkos::complex, Kokkos::Cuda >::execute_basic(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp index c588d752dd..7f4e0973e7 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp @@ -40,38 +40,44 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, long_reduce) { - TestReduce< long , Kokkos::Cuda >( 0 ); - TestReduce< long , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, double_reduce) { - TestReduce< double , Kokkos::Cuda >( 0 ); - TestReduce< double , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Cuda >( 0 ); - TestReduceDynamic< long , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Cuda >( 0 ); - TestReduceDynamic< double , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Cuda >( 0 ); - TestReduceDynamicView< long , Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda , scan ) +TEST_F( cuda, long_reduce ) { - TestScan< Kokkos::Cuda >::test_range( 1 , 1000 ); + TestReduce< long, Kokkos::Cuda >( 0 ); + TestReduce< long, Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, double_reduce ) +{ + TestReduce< double, Kokkos::Cuda >( 0 ); + TestReduce< double, Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Cuda >( 0 ); + TestReduceDynamic< long, Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Cuda >( 0 ); + TestReduceDynamic< double, Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Cuda >( 0 ); + TestReduceDynamicView< long, Kokkos::Cuda >( 1000000 ); +} + +TEST_F( cuda, scan ) +{ + TestScan< Kokkos::Cuda >::test_range( 1, 1000 ); TestScan< Kokkos::Cuda >( 0 ); TestScan< Kokkos::Cuda >( 100000 ); TestScan< Kokkos::Cuda >( 10000000 ); @@ -79,10 +85,11 @@ TEST_F( cuda , scan ) } #if 0 -TEST_F( cuda , scan_small ) +TEST_F( cuda, scan_small ) { - typedef TestScan< Kokkos::Cuda , Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::Cuda, Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -93,38 +100,39 @@ TEST_F( cuda , scan_small ) } #endif -TEST_F( cuda , team_scan ) +TEST_F( cuda, team_scan ) { - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 10000 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule >( 10000 ); } -TEST_F( cuda , team_long_reduce) { - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 100000 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule >( 100000 ); +TEST_F( cuda, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 100000 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule >( 100000 ); } -TEST_F( cuda , team_double_reduce) { - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 100000 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule >( 100000 ); +TEST_F( cuda, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 100000 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule >( 100000 ); } -TEST_F( cuda , reduction_deduction ) +TEST_F( cuda, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp index f3cbc3b889..5bed7640da 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -40,6 +40,7 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { @@ -47,353 +48,338 @@ namespace Test { __global__ void test_abort() { - Kokkos::abort("test_abort"); + Kokkos::abort( "test_abort" ); } __global__ void test_cuda_spaces_int_value( int * ptr ) { - if ( *ptr == 42 ) { *ptr = 2 * 42 ; } + if ( *ptr == 42 ) { *ptr = 2 * 42; } } -TEST_F( cuda , space_access ) +TEST_F( cuda, space_access ) { - //-------------------------------------- + static_assert( + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::assignable , "" ); - - static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::HostSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::HostSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace >::accessible, "" ); //-------------------------------------- static_assert( - ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaUVMSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); - + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space - , Kokkos::HostSpace >::value , "" ); + , Kokkos::HostSpace >::value, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space , Kokkos::Device< Kokkos::HostSpace::execution_space - , Kokkos::CudaUVMSpace > >::value , "" ); + , Kokkos::CudaUVMSpace > >::value, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space - , Kokkos::CudaHostPinnedSpace >::value , "" ); + , Kokkos::CudaHostPinnedSpace >::value, "" ); static_assert( std::is_same< Kokkos::Device< Kokkos::HostSpace::execution_space , Kokkos::CudaUVMSpace > , Kokkos::Device< Kokkos::HostSpace::execution_space - , Kokkos::CudaUVMSpace > >::value , "" ); + , Kokkos::CudaUVMSpace > >::value, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::Cuda >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); } TEST_F( cuda, uvm ) { if ( Kokkos::CudaUVMSpace::available() ) { + int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >( "uvm_ptr", sizeof( int ) ); - int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int)); - - *uvm_ptr = 42 ; + *uvm_ptr = 42; Kokkos::Cuda::fence(); - test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr); + test_cuda_spaces_int_value<<< 1, 1 >>>( uvm_ptr ); Kokkos::Cuda::fence(); - EXPECT_EQ( *uvm_ptr, int(2*42) ); - - Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr ); + EXPECT_EQ( *uvm_ptr, int( 2 * 42 ) ); + Kokkos::kokkos_free< Kokkos::CudaUVMSpace >( uvm_ptr ); } } TEST_F( cuda, uvm_num_allocs ) { - // The max number of uvm allocations allowed is 65536 + // The max number of UVM allocations allowed is 65536. #define MAX_NUM_ALLOCS 65536 if ( Kokkos::CudaUVMSpace::available() ) { - struct TestMaxUVMAllocs { - using view_type = Kokkos::View< double* , Kokkos::CudaUVMSpace >; - using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] + using view_type = Kokkos::View< double*, Kokkos::CudaUVMSpace >; + using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] , Kokkos::CudaUVMSpace >; - TestMaxUVMAllocs() - : view_allocs_test("view_allocs_test") + TestMaxUVMAllocs() : view_allocs_test( "view_allocs_test" ) { + for ( auto i = 0; i < MAX_NUM_ALLOCS; ++i ) { - for ( auto i = 0; i < MAX_NUM_ALLOCS ; ++i ) { - - // Kokkos will throw a runtime exception if an attempt is made to - // allocate more than the maximum number of uvm allocations + // Kokkos will throw a runtime exception if an attempt is made to + // allocate more than the maximum number of uvm allocations. // In this test, the max num of allocs occurs when i = MAX_NUM_ALLOCS - 1 // since the 'outer' view counts as one UVM allocation, leaving - // 65535 possible UVM allocations, that is 'i in [0 , 65535)' + // 65535 possible UVM allocations, that is 'i in [0, 65535)'. - // The test will catch the exception thrown in this case and continue + // The test will catch the exception thrown in this case and continue. - if ( i == ( MAX_NUM_ALLOCS - 1) ) { - EXPECT_ANY_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ; + if ( i == ( MAX_NUM_ALLOCS - 1 ) ) { + EXPECT_ANY_THROW( { view_allocs_test( i ) = view_type( "inner_view", 1 ); } ); } else { - if(i +template< class MemSpace, class ExecSpace > struct TestViewCudaAccessible { - enum { N = 1000 }; - using V = Kokkos::View ; + using V = Kokkos::View< double*, MemSpace >; - V m_base ; + V m_base; struct TagInit {}; struct TagTest {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + void operator()( const TagInit &, const int i ) const { m_base[i] = i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_base[i] != i + 1 ) ++error_count ; } + void operator()( const TagTest &, const int i, long & error_count ) const + { if ( m_base[i] != i + 1 ) ++error_count; } TestViewCudaAccessible() - : m_base("base",N) + : m_base( "base", N ) {} static void run() - { - TestViewCudaAccessible self ; - Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self ); - MemSpace::execution_space::fence(); - // Next access is a different execution space, must complete prior kernel. - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } + { + TestViewCudaAccessible self; + Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space, TagInit >( 0, N ), self ); + MemSpace::execution_space::fence(); + + // Next access is a different execution space, must complete prior kernel. + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagTest >( 0, N ), self, error_count ); + EXPECT_EQ( error_count, 0 ); + } }; -TEST_F( cuda , impl_view_accessible ) +TEST_F( cuda, impl_view_accessible ) { - TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaSpace, Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace, Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace, Kokkos::HostSpace::execution_space >::run(); - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace::execution_space >::run(); } template< class MemSpace > struct TestViewCudaTexture { - enum { N = 1000 }; - using V = Kokkos::View ; - using T = Kokkos::View ; + using V = Kokkos::View< double*, MemSpace >; + using T = Kokkos::View< const double*, MemSpace, Kokkos::MemoryRandomAccess >; - V m_base ; - T m_tex ; + V m_base; + T m_tex; struct TagInit {}; struct TagTest {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + void operator()( const TagInit &, const int i ) const { m_base[i] = i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_tex[i] != i + 1 ) ++error_count ; } + void operator()( const TagTest &, const int i, long & error_count ) const + { if ( m_tex[i] != i + 1 ) ++error_count; } TestViewCudaTexture() - : m_base("base",N) + : m_base( "base", N ) , m_tex( m_base ) {} static void run() - { - EXPECT_TRUE( ( std::is_same< typename V::reference_type - , double & - >::value ) ); + { + EXPECT_TRUE( ( std::is_same< typename V::reference_type, double & >::value ) ); + EXPECT_TRUE( ( std::is_same< typename T::reference_type, const double >::value ) ); - EXPECT_TRUE( ( std::is_same< typename T::reference_type - , const double - >::value ) ); + EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view. + EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value. - EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view - EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value + TestViewCudaTexture self; + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda, TagInit >( 0, N ), self ); - TestViewCudaTexture self ; - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda, TagTest >( 0, N ), self, error_count ); + EXPECT_EQ( error_count, 0 ); + } }; - -TEST_F( cuda , impl_view_texture ) +TEST_F( cuda, impl_view_texture ) { TestViewCudaTexture< Kokkos::CudaSpace >::run(); TestViewCudaTexture< Kokkos::CudaUVMSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp index fd8a647ef3..0aea35db51 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_assign_strided ) { +TEST_F( cuda, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_left_0 ) { +TEST_F( cuda, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_1 ) { +TEST_F( cuda, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_2 ) { +TEST_F( cuda, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_3 ) { +TEST_F( cuda, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_0 ) { +TEST_F( cuda, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_1 ) { +TEST_F( cuda, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_3 ) { +TEST_F( cuda, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp index 053fcfc209..f31f4cbe62 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_layoutleft_to_layoutleft) { +TEST_F( cuda, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits >(); } -TEST_F( cuda, view_subview_layoutright_to_layoutright) { +TEST_F( cuda, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp index 4c5f2ef72f..0213a196e8 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_1d_assign ) { +TEST_F( cuda, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp index aee6f1730d..181e1bab2c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp index 2ef48c686e..708cc1f5ba 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp index aec123ac23..a3db996f8d 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_2d_from_3d ) { +TEST_F( cuda, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp index e8ad231996..2f7cffa75d 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp index e86b4513fd..949c6f3e0b 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp index ad9dcc0fd1..3e68277a9e 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left ) { +TEST_F( cuda, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp index f97d97e59c..0cd91b7795 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp index 2a07f28f83..cd1c13f7d0 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp index 3c51d94201..22d2753543 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right ) { +TEST_F( cuda, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp index 835caa7b87..5dc5f87b4e 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp index 53bd5eee20..318d8edbbb 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits >(); +TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp index e4348319f6..a2158f06c7 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp index 13834d09ad..8d9b9328ba 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp @@ -40,81 +40,87 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , team_tag ) +TEST_F( cuda, team_tag ) { - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(0); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(2); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule >::test_reduce(1000); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule >::test_reduce( 1000 ); } -TEST_F( cuda , team_shared_request) { - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule >(); - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule >(); +TEST_F( cuda, team_shared_request ) +{ + TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule >(); + TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule >(); } -//THis Tests request to much L0 scratch -//TEST_F( cuda, team_scratch_request) { -// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule >(); -// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule >(); +// This tests request to much L0 scratch. +//TEST_F( cuda, team_scratch_request ) +//{ +// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule >(); +// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule >(); //} -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( cuda , team_lambda_shared_request) { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( cuda, team_lambda_shared_request ) +{ TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule >(); TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule >(); TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule >(); TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule >(); } #endif -TEST_F( cuda, shmem_size) { +TEST_F( cuda, shmem_size ) +{ TestShmemSize< Kokkos::Cuda >(); } -TEST_F( cuda, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule >(); - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule >(); +TEST_F( cuda, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule >(); + TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule >(); } -TEST_F( cuda , team_vector ) +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) +TEST_F( cuda, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 10 ) ) ); } +#endif TEST_F( cuda, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 16, 16 ); } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp index c01ca1c146..be0c4c5715 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp @@ -40,20 +40,21 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , impl_view_mapping_a ) { +TEST_F( cuda, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::CudaSpace >(); test_view_mapping_operator< Kokkos::CudaSpace >(); } -TEST_F( cuda , view_of_class ) +TEST_F( cuda, view_of_class ) { TestViewMappingClassValue< Kokkos::CudaSpace >::run(); TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp index 8e821ada00..b4d8e5d953 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , impl_view_mapping_d ) { +TEST_F( cuda, impl_view_mapping_d ) +{ test_view_mapping< Kokkos::CudaHostPinnedSpace >(); test_view_mapping_operator< Kokkos::CudaHostPinnedSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp index cf29a68e96..e4e6894c53 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , impl_view_mapping_c ) { +TEST_F( cuda, impl_view_mapping_c ) +{ test_view_mapping< Kokkos::CudaUVMSpace >(); test_view_mapping_operator< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp index db14b5158f..82a3dd83e8 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp @@ -40,73 +40,77 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , view_nested_view ) +TEST_F( cuda, view_nested_view ) { ::Test::view_nested_view< Kokkos::Cuda >(); } - - -TEST_F( cuda , view_remap ) +TEST_F( cuda, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::CudaUVMSpace > output_type ; + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::CudaUVMSpace > output_type; - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::CudaUVMSpace > input_type ; + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace > input_type; - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::CudaUVMSpace > diff_type ; + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace > diff_type; - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); Kokkos::fence(); - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + Kokkos::fence(); - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + Kokkos::fence(); - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } + Kokkos::fence(); } -//---------------------------------------------------------------------------- - -TEST_F( cuda , view_aggregate ) +TEST_F( cuda, view_aggregate ) { TestViewAggregate< Kokkos::Cuda >(); } -TEST_F( cuda , template_meta_functions ) +TEST_F( cuda, template_meta_functions ) { - TestTemplateMetaFunctions(); + TestTemplateMetaFunctions< int, Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp index 07d4256473..27450fa6ff 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp @@ -40,17 +40,20 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , impl_shared_alloc ) { - test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >(); +TEST_F( cuda, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::CudaSpace, Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaUVMSpace, Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace::execution_space >(); } -TEST_F( cuda , impl_view_mapping_b ) { +TEST_F( cuda, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::CudaSpace >(); test_view_mapping_subview< Kokkos::CudaUVMSpace >(); test_view_mapping_subview< Kokkos::CudaHostPinnedSpace >(); @@ -59,5 +62,4 @@ TEST_F( cuda , impl_view_mapping_b ) { TestViewMappingAtomic< Kokkos::CudaHostPinnedSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp index 34721f02dc..56524111ae 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp @@ -40,16 +40,17 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_api_a) { - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ; - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ; +TEST_F( cuda, view_api_a ) +{ + typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits > view_texture_managed; + typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits > view_texture_unmanaged; - TestViewAPI< double , Kokkos::Cuda >(); + TestViewAPI< double, Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp index abbcf3bf8b..d5fd24456d 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_api_b) { - TestViewAPI< double , Kokkos::CudaUVMSpace >(); +TEST_F( cuda, view_api_b ) +{ + TestViewAPI< double, Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp index 9899642035..649023e4af 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda, view_api_c) { - TestViewAPI< double , Kokkos::CudaHostPinnedSpace >(); +TEST_F( cuda, view_api_c ) +{ + TestViewAPI< double, Kokkos::CudaHostPinnedSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp index 9bc09ba893..b46b1e5f81 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( cuda , view_space_assign ) { - view_space_assign< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >(); - view_space_assign< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >(); +TEST_F( cuda, view_space_assign ) +{ + view_space_assign< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >(); + view_space_assign< Kokkos::CudaSpace, Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp index 28ae5b41b0..ed9bb68cd6 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_OPENMP_HPP #define KOKKOS_TEST_OPENMP_HPP + #include #include + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,13 +56,8 @@ #include #include - -//---------------------------------------------------------------------------- - #include #include - - #include #include #include @@ -74,15 +72,11 @@ #include #include #include - - #include #include #include #include - #include - #include namespace Test { @@ -95,23 +89,24 @@ protected: const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - const unsigned threads_count = std::max( 1u , numa_count ) * - std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + const unsigned threads_count = std::max( 1u, numa_count ) * + std::max( 2u, ( cores_per_numa * threads_per_core ) / 2 ); Kokkos::OpenMP::initialize( threads_count ); - Kokkos::OpenMP::print_configuration( std::cout , true ); - srand(10231); + Kokkos::print_configuration( std::cout, true ); + srand( 10231 ); } static void TearDownTestCase() { Kokkos::OpenMP::finalize(); - omp_set_num_threads(1); + omp_set_num_threads( 1 ); - ASSERT_EQ( 1 , omp_get_max_threads() ); + ASSERT_EQ( 1, omp_get_max_threads() ); } }; -} +} // namespace Test + #endif diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp index ed6c9f8d16..2585c01973 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp @@ -40,165 +40,162 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp , atomics ) +TEST_F( openmp, atomics ) { - const int loop_count = 1e4 ; + const int loop_count = 1e4; - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::OpenMP >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::OpenMP>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 3 ) ) ); } -TEST_F( openmp , atomic_operations ) +TEST_F( openmp, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 4 ) ) ); } - } - -TEST_F( openmp , atomic_views_integral ) +TEST_F( openmp, atomic_views_integral ) { const long length = 1000000; { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 8 ) ) ); - + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 8 ) ) ); } } -TEST_F( openmp , atomic_views_nonintegral ) +TEST_F( openmp, atomic_views_nonintegral ) { const long length = 1000000; { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 4 ) ) ); - + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 4 ) ) ); } } -TEST_F( openmp , atomic_view_api ) +TEST_F( openmp, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI(); + TestAtomicViews::TestAtomicViewAPI(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp index 126d730f0f..b4f32dac70 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -40,65 +40,90 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp , init ) { +TEST_F( openmp, init ) +{ ; } -TEST_F( openmp , md_range ) { - TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100); +TEST_F( openmp, mdrange_for ) +{ + Kokkos::Timer timer; + TestMDRange_2D< Kokkos::OpenMP >::test_for2( 10000, 1000 ); + std::cout << " 2D: " << timer.seconds() << std::endl; - TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100); + timer.reset(); + TestMDRange_3D< Kokkos::OpenMP >::test_for3( 100, 100, 1000 ); + std::cout << " 3D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_4D< Kokkos::OpenMP >::test_for4( 100, 10, 100, 100 ); + std::cout << " 4D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_5D< Kokkos::OpenMP >::test_for5( 100, 10, 10, 100, 50 ); + std::cout << " 5D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_6D< Kokkos::OpenMP >::test_for6( 10, 10, 10, 10, 50, 50 ); + std::cout << " 6D: " << timer.seconds() << std::endl; } -TEST_F( openmp, policy_construction) { +TEST_F( openmp, mdrange_reduce ) +{ + TestMDRange_2D< Kokkos::OpenMP >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::OpenMP >::test_reduce3( 100, 10, 100 ); +} + +TEST_F( openmp, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::OpenMP >(); TestTeamPolicyConstruction< Kokkos::OpenMP >(); } -TEST_F( openmp , range_tag ) +TEST_F( openmp, range_tag ) { - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_dynamic_policy(0); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_dynamic_policy( 0 ); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(2); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(2); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(2); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 2 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 2 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 2 ); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_dynamic_policy(3); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_dynamic_policy( 3 ); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(1000); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 1000 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 1000 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 1000 ); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_for(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_scan(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule >::test_dynamic_policy(1000); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_scan( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( openmp , compiler_macros ) +TEST_F( openmp, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); } //---------------------------------------------------------------------------- -TEST_F( openmp , memory_pool ) +TEST_F( openmp, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -112,24 +137,24 @@ TEST_F( openmp , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( openmp , task_fib ) +TEST_F( openmp, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::OpenMP >::run(i, (i+1)*(i+1)*10000 ); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::OpenMP >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); } } -TEST_F( openmp , task_depend ) +TEST_F( openmp, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run( i ); } } -TEST_F( openmp , task_team ) +TEST_F( openmp, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -137,53 +162,51 @@ TEST_F( openmp , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) -TEST_F( openmp , cxx11 ) +TEST_F( openmp, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::OpenMP >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 4 ) ) ); } } #endif TEST_F( openmp, tile_layout ) { - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 9 , 10 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 9, 10 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 9, 9 ); - TestTile::test< Kokkos::OpenMP , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 4 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::OpenMP, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 4, 2 >( 9, 9 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 11 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 11 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 11 ); } - -TEST_F( openmp , dispatch ) +TEST_F( openmp, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp index d41e1493ee..22c29308a6 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp @@ -40,46 +40,52 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, long_reduce) { - TestReduce< long , Kokkos::OpenMP >( 0 ); - TestReduce< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce) { - TestReduce< double , Kokkos::OpenMP >( 0 ); - TestReduce< double , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp , reducers ) +TEST_F( openmp, long_reduce ) { - TestReducers::execute_integer(); - TestReducers::execute_integer(); - TestReducers::execute_float(); - TestReducers, Kokkos::OpenMP>::execute_basic(); + TestReduce< long, Kokkos::OpenMP >( 0 ); + TestReduce< long, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::OpenMP >( 0 ); - TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::OpenMP >( 0 ); - TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::OpenMP >( 0 ); - TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp , scan ) +TEST_F( openmp, double_reduce ) { - TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 ); + TestReduce< double, Kokkos::OpenMP >( 0 ); + TestReduce< double, Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, reducers ) +{ + TestReducers< int, Kokkos::OpenMP >::execute_integer(); + TestReducers< size_t, Kokkos::OpenMP >::execute_integer(); + TestReducers< double, Kokkos::OpenMP >::execute_float(); + TestReducers< Kokkos::complex, Kokkos::OpenMP >::execute_basic(); +} + +TEST_F( openmp, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::OpenMP >( 0 ); + TestReduceDynamic< long, Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::OpenMP >( 0 ); + TestReduceDynamic< double, Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::OpenMP >( 0 ); + TestReduceDynamicView< long, Kokkos::OpenMP >( 1000000 ); +} + +TEST_F( openmp, scan ) +{ + TestScan< Kokkos::OpenMP >::test_range( 1, 1000 ); TestScan< Kokkos::OpenMP >( 0 ); TestScan< Kokkos::OpenMP >( 100000 ); TestScan< Kokkos::OpenMP >( 10000000 ); @@ -87,10 +93,11 @@ TEST_F( openmp , scan ) } #if 0 -TEST_F( openmp , scan_small ) +TEST_F( openmp, scan_small ) { - typedef TestScan< Kokkos::OpenMP , Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::OpenMP, Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -101,38 +108,39 @@ TEST_F( openmp , scan_small ) } #endif -TEST_F( openmp , team_scan ) +TEST_F( openmp, team_scan ) { - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 10000 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule >( 10000 ); } -TEST_F( openmp , team_long_reduce) { - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 100000 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule >( 100000 ); +TEST_F( openmp, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 100000 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule >( 100000 ); } -TEST_F( openmp , team_double_reduce) { - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 100000 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule >( 100000 ); +TEST_F( openmp, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 100000 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule >( 100000 ); } -TEST_F( openmp , reduction_deduction ) +TEST_F( openmp, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp index 9854417e42..fefae07322 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_assign_strided ) { +TEST_F( openmp, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_0 ) { +TEST_F( openmp, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_1 ) { +TEST_F( openmp, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_2 ) { +TEST_F( openmp, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_3 ) { +TEST_F( openmp, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_0 ) { +TEST_F( openmp, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_1 ) { +TEST_F( openmp, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_3 ) { +TEST_F( openmp, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp index 2aa1fc5c63..7de7ca91bd 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_layoutleft_to_layoutleft) { +TEST_F( openmp, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -TEST_F( openmp, view_subview_layoutright_to_layoutright) { +TEST_F( openmp, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp index 1a6871cfca..d727ec0ee5 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_1d_assign ) { +TEST_F( openmp, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp index b04edbb997..df43f555d3 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp index 765e235830..38f241ebf7 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp index 9d8b62708a..11a4ea8ac2 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_2d_from_3d ) { +TEST_F( openmp, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp index 9c19cf0e57..a91baa34df 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp index c1bdf72351..20d4d9bd64 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp index 08a3b5a54a..528df1c070 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left ) { +TEST_F( openmp, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp index 0864ebbdaa..d9eea8dba9 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp index e38dfecbf6..f909dc33c0 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp index b7e4683d23..59996d5e33 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right ) { +TEST_F( openmp, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp index fc3e66fd48..3f9c215d9b 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp index e21a13ee57..d3a73483a0 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits >(); +TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp index 9da159ab57..399c6e92e4 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp index 38cf0a0f40..216789e8bf 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -40,67 +40,73 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp , team_tag ) +TEST_F( openmp, team_tag ) { - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(0); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(2); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule >::test_reduce(1000); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule >::test_reduce( 1000 ); } -TEST_F( openmp , team_shared_request) { - TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule >(); - TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule >(); +TEST_F( openmp, team_shared_request ) +{ + TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule >(); + TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule >(); } -TEST_F( openmp, team_scratch_request) { - TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule >(); - TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule >(); +TEST_F( openmp, team_scratch_request ) +{ + TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule >(); + TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( openmp , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( openmp, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule >(); } #endif -TEST_F( openmp, shmem_size) { +TEST_F( openmp, shmem_size ) +{ TestShmemSize< Kokkos::OpenMP >(); } -TEST_F( openmp, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule >(); - TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule >(); +TEST_F( openmp, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule >(); + TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule >(); } -TEST_F( openmp , team_vector ) +TEST_F( openmp, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -112,11 +118,10 @@ TEST_F( openmp , team_vector ) #ifndef SKIP_TEST TEST_F( openmp, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp index 82cbf3ea18..aead381a11 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp , impl_view_mapping_a ) { +TEST_F( openmp, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::OpenMP >(); test_view_mapping_operator< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp index b2d4f87fdd..c802fb79ca 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( openmp , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >(); +TEST_F( openmp, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::OpenMP >(); } -TEST_F( openmp , impl_view_mapping_b ) { +TEST_F( openmp, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::OpenMP >(); TestViewMappingAtomic< Kokkos::OpenMP >::run(); } -TEST_F( openmp, view_api) { - TestViewAPI< double , Kokkos::OpenMP >(); +TEST_F( openmp, view_api ) +{ + TestViewAPI< double, Kokkos::OpenMP >(); } -TEST_F( openmp , view_nested_view ) +TEST_F( openmp, view_nested_view ) { ::Test::view_nested_view< Kokkos::OpenMP >(); } - - -TEST_F( openmp , view_remap ) +TEST_F( openmp, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::OpenMP > output_type ; + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::OpenMP > output_type; - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > input_type ; + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::OpenMP > input_type; - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > diff_type ; + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::OpenMP > diff_type; - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + int value = 0; - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( openmp , view_aggregate ) +TEST_F( openmp, view_aggregate ) { TestViewAggregate< Kokkos::OpenMP >(); } -TEST_F( openmp , template_meta_functions ) +TEST_F( openmp, template_meta_functions ) { - TestTemplateMetaFunctions(); + TestTemplateMetaFunctions< int, Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp new file mode 100644 index 0000000000..907fe23ea5 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_QTHREADS_HPP +#define KOKKOS_TEST_QTHREADS_HPP + +#include + +#include + +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Test { + +class qthreads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + const unsigned threads_count = std::max( 1u, numa_count ) * + std::max( 2u, ( cores_per_numa * threads_per_core ) / 2 ); + + Kokkos::Qthreads::initialize( threads_count ); + Kokkos::print_configuration( std::cout, true ); + + srand( 10231 ); + } + + static void TearDownTestCase() + { + Kokkos::Qthreads::finalize(); + } +}; + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp new file mode 100644 index 0000000000..e64c3305db --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, atomics ) +{ +#if 0 + const int loop_count = 1e4; + + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 3 ) ) ); +#endif +} + +TEST_F( qthreads, atomic_operations ) +{ +#if 0 + const int start = 1; // Avoid zero for division. + const int end = 11; + + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_views_integral ) +{ +#if 0 + const long length = 1000000; + + { + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 8 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_views_nonintegral ) +{ +#if 0 + const long length = 1000000; + + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 4 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_view_api ) +{ +#if 0 + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp new file mode 100644 index 0000000000..0faec84056 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, init ) +{ + ; +} + +TEST_F( qthreads, md_range ) +{ +#if 0 + TestMDRange_2D< Kokkos::Qthreads >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Qthreads >::test_for3( 100, 100, 100 ); +#endif +} + +TEST_F( qthreads, policy_construction ) +{ +#if 0 + TestRangePolicyConstruction< Kokkos::Qthreads >(); + TestTeamPolicyConstruction< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, range_tag ) +{ +#if 0 + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 0 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 2 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 3 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 1000 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 1000 ); +#endif +} + +//---------------------------------------------------------------------------- + +TEST_F( qthreads, compiler_macros ) +{ +#if 0 + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthreads >() ) ); +#endif +} + +//---------------------------------------------------------------------------- + +TEST_F( qthreads, memory_pool ) +{ +#if 0 + bool val = TestMemoryPool::test_mempool< Kokkos::Qthreads >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Qthreads >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Qthreads >(); +#endif +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +TEST_F( qthreads, task_fib ) +{ +#if 0 + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Qthreads >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); + } +#endif +} + +TEST_F( qthreads, task_depend ) +{ +#if 0 + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Qthreads >::run( i ); + } +#endif +} + +TEST_F( qthreads, task_team ) +{ +#if 0 + TestTaskScheduler::TestTaskTeam< Kokkos::Qthreads >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Qthreads >::run( 1000 ); // Put back after testing. +#endif +} + +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) + +TEST_F( qthreads, cxx11 ) +{ +#if 0 + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Qthreads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 4 ) ) ); + } +#endif +} + +#endif + +TEST_F( qthreads, tile_layout ) +{ +#if 0 + TestTile::test< Kokkos::Qthreads, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Qthreads, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::Qthreads, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Qthreads, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Qthreads, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::Qthreads, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 11 ); +#endif +} + +TEST_F( qthreads, dispatch ) +{ +#if 0 + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Qthreads >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp new file mode 100644 index 0000000000..a2470ac15c --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp @@ -0,0 +1,168 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, long_reduce ) +{ +#if 0 + TestReduce< long, Kokkos::Qthreads >( 0 ); + TestReduce< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, double_reduce ) +{ +#if 0 + TestReduce< double, Kokkos::Qthreads >( 0 ); + TestReduce< double, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, reducers ) +{ +#if 0 + TestReducers< int, Kokkos::Qthreads >::execute_integer(); + TestReducers< size_t, Kokkos::Qthreads >::execute_integer(); + TestReducers< double, Kokkos::Qthreads >::execute_float(); + TestReducers< Kokkos::complex, Kokkos::Qthreads>::execute_basic(); +#endif +} + +TEST_F( qthreads, long_reduce_dynamic ) +{ +#if 0 + TestReduceDynamic< long, Kokkos::Qthreads >( 0 ); + TestReduceDynamic< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, double_reduce_dynamic ) +{ +#if 0 + TestReduceDynamic< double, Kokkos::Qthreads >( 0 ); + TestReduceDynamic< double, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, long_reduce_dynamic_view ) +{ +#if 0 + TestReduceDynamicView< long, Kokkos::Qthreads >( 0 ); + TestReduceDynamicView< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, scan ) +{ +#if 0 + TestScan< Kokkos::Qthreads >::test_range( 1, 1000 ); + TestScan< Kokkos::Qthreads >( 0 ); + TestScan< Kokkos::Qthreads >( 100000 ); + TestScan< Kokkos::Qthreads >( 10000000 ); + Kokkos::Qthreads::fence(); +#endif +} + +TEST_F( qthreads, scan_small ) +{ +#if 0 + typedef TestScan< Kokkos::Qthreads, Kokkos::Impl::QthreadsExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::Qthreads::fence(); +#endif +} + +TEST_F( qthreads, team_scan ) +{ +#if 0 + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10000 ); +#endif +} + +TEST_F( qthreads, team_long_reduce ) +{ +#if 0 + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); +#endif +} + +TEST_F( qthreads, team_double_reduce ) +{ +#if 0 + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); +#endif +} + +TEST_F( qthreads, reduction_deduction ) +{ +#if 0 + TestCXX11::test_reduction_deduction< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp new file mode 100644 index 0000000000..ab873359a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_auto_1d_left ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_auto_1d_right ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_auto_1d_stride ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_assign_strided ) +{ +#if 0 + TestViewSubview::test_1d_strided_assignment< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_0 ) +{ +#if 0 + TestViewSubview::test_left_0< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_1 ) +{ +#if 0 + TestViewSubview::test_left_1< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_2 ) +{ +#if 0 + TestViewSubview::test_left_2< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_3 ) +{ +#if 0 + TestViewSubview::test_left_3< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_0 ) +{ +#if 0 + TestViewSubview::test_right_0< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_1 ) +{ +#if 0 + TestViewSubview::test_right_1< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_3 ) +{ +#if 0 + TestViewSubview::test_right_3< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp new file mode 100644 index 0000000000..199c5c7955 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp @@ -0,0 +1,66 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_layoutleft_to_layoutleft ) +{ +#if 0 + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +TEST_F( qthreads, view_subview_layoutright_to_layoutright ) +{ +#if 0 + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp new file mode 100644 index 0000000000..f44909f3da --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp new file mode 100644 index 0000000000..7bb936f8dd --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign_atomic ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp new file mode 100644 index 0000000000..27073dfa81 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign_randomaccess ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp new file mode 100644 index 0000000000..1b3cf48852 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp new file mode 100644 index 0000000000..34dda63e64 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d_atomic ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp new file mode 100644 index 0000000000..5a4ee50fb2 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d_randomaccess ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp new file mode 100644 index 0000000000..fe386e34a8 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp new file mode 100644 index 0000000000..a3e0ab2529 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left_atomic ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp new file mode 100644 index 0000000000..df1f570e9d --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left_randomaccess ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp new file mode 100644 index 0000000000..cc3c80d10d --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp new file mode 100644 index 0000000000..14b331a458 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right_atomic ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp new file mode 100644 index 0000000000..571382e66f --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right_randomaccess ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp new file mode 100644 index 0000000000..ab984c5f30 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp @@ -0,0 +1,12 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp new file mode 100644 index 0000000000..e7b81283fb --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp @@ -0,0 +1,143 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, team_tag ) +{ +#if 0 + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); + + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); + + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); +#endif +} + +TEST_F( qthreads, team_shared_request ) +{ +#if 0 + TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule >(); + TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule >(); +#endif +} + +TEST_F( qthreads, team_scratch_request ) +{ +#if 0 + TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); + TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); +#endif +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( qthreads, team_lambda_shared_request ) +{ +#if 0 + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule >(); +#endif +} +#endif + +TEST_F( qthreads, shmem_size ) +{ +#if 0 + TestShmemSize< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, multi_level_scratch ) +{ +#if 0 + TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); + TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); +#endif +} + +TEST_F( qthreads, team_vector ) +{ +#if 0 + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 10 ) ) ); +#endif +} + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#ifndef SKIP_TEST +TEST_F( qthreads, triple_nested_parallelism ) +{ +#if 0 + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 16, 16 ); +#endif +} +#endif + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp new file mode 100644 index 0000000000..cd876a36bf --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp @@ -0,0 +1,56 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, impl_view_mapping_a ) +{ +#if 0 + test_view_mapping< Kokkos::Qthreads >(); + test_view_mapping_operator< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp new file mode 100644 index 0000000000..adf048b613 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { + +TEST_F( qthreads, impl_shared_alloc ) +{ +#if 0 + test_shared_alloc< Kokkos::HostSpace, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, impl_view_mapping_b ) +{ +#if 0 + test_view_mapping_subview< Kokkos::Qthreads >(); + TestViewMappingAtomic< Kokkos::Qthreads >::run(); +#endif +} + +TEST_F( qthreads, view_api ) +{ +#if 0 + TestViewAPI< double, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_nested_view ) +{ +#if 0 + ::Test::view_nested_view< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_remap ) +{ +#if 0 + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Qthreads > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Qthreads > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Qthreads > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } +#endif +} + +TEST_F( qthreads, view_aggregate ) +{ +#if 0 + TestViewAggregate< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, template_meta_functions ) +{ +#if 0 + TestTemplateMetaFunctions< int, Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial.hpp b/lib/kokkos/core/unit_test/serial/TestSerial.hpp index c0ffa6afb1..03da07e065 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial.hpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_SERIAL_HPP #define KOKKOS_TEST_SERIAL_HPP + #include #include + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,21 +56,14 @@ #include #include - -//---------------------------------------------------------------------------- - #include #include - - #include #include #include #include #include - #include - #include #include #include @@ -76,15 +72,11 @@ #include #include #include - - #include #include #include #include - #include - #include namespace Test { @@ -92,14 +84,16 @@ namespace Test { class serial : public ::testing::Test { protected: static void SetUpTestCase() - { - Kokkos::HostSpace::execution_space::initialize(); - } + { + Kokkos::HostSpace::execution_space::initialize(); + } + static void TearDownTestCase() - { - Kokkos::HostSpace::execution_space::finalize(); - } + { + Kokkos::HostSpace::execution_space::finalize(); + } }; -} +} // namespace Test + #endif diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp index 729a76556d..81ba532a3d 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp @@ -40,165 +40,165 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial , atomics ) +TEST_F( serial, atomics ) { - const int loop_count = 1e6 ; + const int loop_count = 1e6; - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Serial >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Serial>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 3 ) ) ); } -TEST_F( serial , atomic_operations ) +TEST_F( serial, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 4 ) ) ); } - } -TEST_F( serial , atomic_views_integral ) +TEST_F( serial, atomic_views_integral ) { const long length = 1000000; - { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 8 ) ) ); + { + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 8 ) ) ); } } -TEST_F( serial , atomic_views_nonintegral ) +TEST_F( serial, atomic_views_nonintegral ) { const long length = 1000000; - { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 4 ) ) ); + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 4 ) ) ); } } -TEST_F( serial , atomic_view_api ) +TEST_F( serial, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp index 43fc4c3587..b40ed3f4af 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -40,50 +40,61 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial , md_range ) { - TestMDRange_2D< Kokkos::Serial >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100); +TEST_F( serial , mdrange_for ) +{ + TestMDRange_2D< Kokkos::Serial >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Serial >::test_for3( 100, 10, 100 ); + TestMDRange_4D< Kokkos::Serial >::test_for4( 100, 10, 10, 10 ); + TestMDRange_5D< Kokkos::Serial >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Serial >::test_for6( 10, 10, 10, 10, 5, 5 ); } -TEST_F( serial, policy_construction) { +TEST_F( serial , mdrange_reduce ) +{ + TestMDRange_2D< Kokkos::Serial >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::Serial >::test_reduce3( 100, 10, 100 ); +} + +TEST_F( serial, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Serial >(); TestTeamPolicyConstruction< Kokkos::Serial >(); } -TEST_F( serial , range_tag ) +TEST_F( serial, range_tag ) { - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_scan(0); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_scan( 0 ); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_for(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_reduce(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_scan(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_for(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_reduce(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_scan(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule >::test_dynamic_policy(1000); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_for( 1000 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 1000 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_scan( 1000 ); + + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_for( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_scan( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( serial , compiler_macros ) +TEST_F( serial, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); } //---------------------------------------------------------------------------- -TEST_F( serial , memory_pool ) +TEST_F( serial, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -97,24 +108,24 @@ TEST_F( serial , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( serial , task_fib ) +TEST_F( serial, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Serial >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Serial >::run( i ); } } -TEST_F( serial , task_depend ) +TEST_F( serial, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run( i ); } } -TEST_F( serial , task_team ) +TEST_F( serial, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -122,44 +133,40 @@ TEST_F( serial , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) -TEST_F( serial , cxx11 ) +TEST_F( serial, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 4 ) ) ); } } #endif TEST_F( serial, tile_layout ) { - TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 9, 10 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 9, 9 ); - TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Serial, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 4, 2 >( 9, 9 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 9, 11 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 9, 11 ); } - - - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp index 25b5ac6d16..8a3d518cfb 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp @@ -40,83 +40,90 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, long_reduce) { - TestReduce< long , Kokkos::Serial >( 0 ); - TestReduce< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce) { - TestReduce< double , Kokkos::Serial >( 0 ); - TestReduce< double , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial , reducers ) +TEST_F( serial, long_reduce ) { - TestReducers::execute_integer(); - TestReducers::execute_integer(); - TestReducers::execute_float(); - TestReducers, Kokkos::Serial>::execute_basic(); + TestReduce< long, Kokkos::Serial >( 0 ); + TestReduce< long, Kokkos::Serial >( 1000000 ); } -TEST_F( serial, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Serial >( 0 ); - TestReduceDynamic< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Serial >( 0 ); - TestReduceDynamic< double , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Serial >( 0 ); - TestReduceDynamicView< long , Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial , scan ) +TEST_F( serial, double_reduce ) { - TestScan< Kokkos::Serial >::test_range( 1 , 1000 ); + TestReduce< double, Kokkos::Serial >( 0 ); + TestReduce< double, Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, reducers ) +{ + TestReducers< int, Kokkos::Serial >::execute_integer(); + TestReducers< size_t, Kokkos::Serial >::execute_integer(); + TestReducers< double, Kokkos::Serial >::execute_float(); + TestReducers< Kokkos::complex, Kokkos::Serial>::execute_basic(); +} + +TEST_F( serial, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Serial >( 0 ); + TestReduceDynamic< long, Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Serial >( 0 ); + TestReduceDynamic< double, Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Serial >( 0 ); + TestReduceDynamicView< long, Kokkos::Serial >( 1000000 ); +} + +TEST_F( serial, scan ) +{ + TestScan< Kokkos::Serial >::test_range( 1, 1000 ); TestScan< Kokkos::Serial >( 0 ); TestScan< Kokkos::Serial >( 10 ); TestScan< Kokkos::Serial >( 10000 ); } -TEST_F( serial , team_scan ) +TEST_F( serial, team_scan ) { - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 10000 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule >( 10000 ); } -TEST_F( serial , team_long_reduce) { - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 100000 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule >( 100000 ); +TEST_F( serial, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 100000 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule >( 100000 ); } -TEST_F( serial , team_double_reduce) { - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 100000 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule >( 100000 ); +TEST_F( serial, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 100000 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule >( 100000 ); } -TEST_F( serial , reduction_deduction ) +TEST_F( serial, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp index bc838ccde4..3dc3e2019d 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Serial >(); } -TEST_F( serial, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Serial >(); } -TEST_F( serial, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Serial >(); } -TEST_F( serial, view_subview_assign_strided ) { +TEST_F( serial, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_0 ) { +TEST_F( serial, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_1 ) { +TEST_F( serial, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_2 ) { +TEST_F( serial, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_3 ) { +TEST_F( serial, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_0 ) { +TEST_F( serial, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_1 ) { +TEST_F( serial, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_3 ) { +TEST_F( serial, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp index e6a5b56d3e..536c3bf197 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_layoutleft_to_layoutleft) { +TEST_F( serial, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits >(); } -TEST_F( serial, view_subview_layoutright_to_layoutright) { +TEST_F( serial, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp index 0b7a0d3bfa..579a12bf78 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_1d_assign ) { +TEST_F( serial, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp index 8ca7285c1f..ff009fef27 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp index 1d156c7415..a20478433c 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp index ebf0e5c991..a34b26d9f7 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_2d_from_3d ) { +TEST_F( serial, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp index 74acb92f1b..6d1882cf04 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp index 8075d46e0f..12fb883b63 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp index 9ce8222643..8aae20c023 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left ) { +TEST_F( serial, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp index c8a5c8f33f..e75db8d52d 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp index b66f15f17d..b9cea2ce89 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp index 5e5e3cf3d1..e5dbcead37 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right ) { +TEST_F( serial, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp index 55a353bcaf..3005030f93 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp index a168e1e232..fee8cb7af2 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits >(); +TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp index a489b0fcb5..24dc6b5061 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp index df400b4cb5..f13b2ce1b4 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp @@ -40,62 +40,68 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial , team_tag ) +TEST_F( serial, team_tag ) { - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_reduce(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_reduce(0); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_reduce(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule >::test_reduce(1000); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule >::test_reduce( 1000 ); } -TEST_F( serial , team_shared_request) { - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule >(); - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule >(); +TEST_F( serial, team_shared_request ) +{ + TestSharedTeam< Kokkos::Serial, Kokkos::Schedule >(); + TestSharedTeam< Kokkos::Serial, Kokkos::Schedule >(); } -TEST_F( serial, team_scratch_request) { - TestScratchTeam< Kokkos::Serial , Kokkos::Schedule >(); - TestScratchTeam< Kokkos::Serial , Kokkos::Schedule >(); +TEST_F( serial, team_scratch_request ) +{ + TestScratchTeam< Kokkos::Serial, Kokkos::Schedule >(); + TestScratchTeam< Kokkos::Serial, Kokkos::Schedule >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( serial , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( serial, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule >(); } #endif -TEST_F( serial, shmem_size) { +TEST_F( serial, shmem_size ) +{ TestShmemSize< Kokkos::Serial >(); } -TEST_F( serial, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule >(); - TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule >(); +TEST_F( serial, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule >(); + TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule >(); } -TEST_F( serial , team_vector ) +TEST_F( serial, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -107,11 +113,10 @@ TEST_F( serial , team_vector ) #ifndef SKIP_TEST TEST_F( serial, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp index 4c655fe770..2192159b84 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial , impl_view_mapping_a ) { +TEST_F( serial, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::Serial >(); test_view_mapping_operator< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp index 4947f2eaae..8c48ad2ced 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( serial , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >(); +TEST_F( serial, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::Serial >(); } -TEST_F( serial , impl_view_mapping_b ) { +TEST_F( serial, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::Serial >(); TestViewMappingAtomic< Kokkos::Serial >::run(); } -TEST_F( serial, view_api) { - TestViewAPI< double , Kokkos::Serial >(); +TEST_F( serial, view_api ) +{ + TestViewAPI< double, Kokkos::Serial >(); } -TEST_F( serial , view_nested_view ) +TEST_F( serial, view_nested_view ) { ::Test::view_nested_view< Kokkos::Serial >(); } - - -TEST_F( serial , view_remap ) +TEST_F( serial, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Serial > output_type ; + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Serial > output_type; - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > input_type ; + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Serial > input_type; - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > diff_type ; + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Serial > diff_type; - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + int value = 0; - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( serial , view_aggregate ) +TEST_F( serial, view_aggregate ) { TestViewAggregate< Kokkos::Serial >(); } -TEST_F( serial , template_meta_functions ) +TEST_F( serial, template_meta_functions ) { - TestTemplateMetaFunctions(); + TestTemplateMetaFunctions< int, Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads.hpp b/lib/kokkos/core/unit_test/threads/TestThreads.hpp index 4f611cf99c..0afd6772fe 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads.hpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_THREADS_HPP #define KOKKOS_TEST_THREADS_HPP + #include #include + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,13 +56,8 @@ #include #include - -//---------------------------------------------------------------------------- - #include #include - - #include #include #include @@ -74,15 +72,11 @@ #include #include #include - - #include #include #include #include - #include - #include namespace Test { @@ -95,13 +89,13 @@ protected: const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - unsigned threads_count = 0 ; + unsigned threads_count = 0; - threads_count = std::max( 1u , numa_count ) - * std::max( 2u , cores_per_numa * threads_per_core ); + threads_count = std::max( 1u, numa_count ) + * std::max( 2u, cores_per_numa * threads_per_core ); Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::print_configuration( std::cout , true /* detailed */ ); + Kokkos::print_configuration( std::cout, true /* detailed */ ); } static void TearDownTestCase() @@ -110,6 +104,6 @@ protected: } }; +} // namespace Test -} #endif diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp index 6e24c4973e..d2a5ea5d63 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp @@ -40,165 +40,161 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads , atomics ) +TEST_F( threads, atomics ) { - const int loop_count = 1e4 ; + const int loop_count = 1e4; - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Threads >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop ,Kokkos::Threads>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 3 ) ) ); } -TEST_F( threads , atomic_operations ) +TEST_F( threads, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType(start, end-i, 12 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 4 ) ) ); } - } - -TEST_F( threads , atomic_views_integral ) +TEST_F( threads, atomic_views_integral ) { const long length = 1000000; { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType(length, 8 ) ) ); - + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 8 ) ) ); } } -TEST_F( threads , atomic_views_nonintegral ) +TEST_F( threads, atomic_views_nonintegral ) { const long length = 1000000; { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType(length, 4 ) ) ); - + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 4 ) ) ); } } -TEST_F( threads , atomic_view_api ) +TEST_F( threads, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp index ac0356eeb4..7d268c1454 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -40,65 +40,74 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads , init ) { +TEST_F( threads, init ) +{ ; } -TEST_F( threads , md_range ) { - TestMDRange_2D< Kokkos::Threads >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100); +TEST_F( threads , mdrange_for ) { + TestMDRange_2D< Kokkos::Threads >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Threads >::test_for3( 100, 10, 100 ); + TestMDRange_4D< Kokkos::Threads >::test_for4( 100, 10, 10, 10 ); + TestMDRange_5D< Kokkos::Threads >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Threads >::test_for6( 10, 10, 10, 10, 5, 5 ); } -TEST_F( threads, policy_construction) { +TEST_F( threads , mdrange_reduce ) { + TestMDRange_2D< Kokkos::Threads >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::Threads >::test_reduce3( 100, 10, 100 ); +} + +TEST_F( threads, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Threads >(); TestTeamPolicyConstruction< Kokkos::Threads >(); } -TEST_F( threads , range_tag ) +TEST_F( threads, range_tag ) { - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(0); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_dynamic_policy(0); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_dynamic_policy( 0 ); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(2); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(2); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(2); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 2 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 2 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 2 ); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(3); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(3); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(3); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_dynamic_policy(3); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_dynamic_policy( 3 ); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(1000); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 1000 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 1000 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 1000 ); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_for(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_reduce(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_scan(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule >::test_dynamic_policy(1000); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_for( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_scan( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( threads , compiler_macros ) +TEST_F( threads, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); } //---------------------------------------------------------------------------- -TEST_F( threads , memory_pool ) +TEST_F( threads, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -112,24 +121,24 @@ TEST_F( threads , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) /* -TEST_F( threads , task_fib ) +TEST_F( threads, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Threads >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Threads >::run( i ); } } -TEST_F( threads , task_depend ) +TEST_F( threads, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run( i ); } } -TEST_F( threads , task_team ) +TEST_F( threads, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run( 1000 ); // Put back after testing. } */ #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -137,53 +146,51 @@ TEST_F( threads , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) -TEST_F( threads , cxx11 ) +TEST_F( threads, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Threads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 4 ) ) ); } } #endif TEST_F( threads, tile_layout ) { - TestTile::test< Kokkos::Threads , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Threads , 1 , 1 >( 9 , 10 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 9, 10 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 9, 9 ); - TestTile::test< Kokkos::Threads , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 4 , 2 >( 9 , 9 ); + TestTile::test< Kokkos::Threads, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 4, 2 >( 9, 9 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 11 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 9, 11 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 9, 11 ); } - -TEST_F( threads , dispatch ) +TEST_F( threads, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp index a637d1e3ab..d2b75ca892 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp @@ -40,46 +40,52 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, long_reduce) { - TestReduce< long , Kokkos::Threads >( 0 ); - TestReduce< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce) { - TestReduce< double , Kokkos::Threads >( 0 ); - TestReduce< double , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads , reducers ) +TEST_F( threads, long_reduce ) { - TestReducers::execute_integer(); - TestReducers::execute_integer(); - TestReducers::execute_float(); - TestReducers, Kokkos::Threads>::execute_basic(); + TestReduce< long, Kokkos::Threads >( 0 ); + TestReduce< long, Kokkos::Threads >( 1000000 ); } -TEST_F( threads, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Threads >( 0 ); - TestReduceDynamic< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Threads >( 0 ); - TestReduceDynamic< double , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Threads >( 0 ); - TestReduceDynamicView< long , Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads , scan ) +TEST_F( threads, double_reduce ) { - TestScan< Kokkos::Threads >::test_range( 1 , 1000 ); + TestReduce< double, Kokkos::Threads >( 0 ); + TestReduce< double, Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, reducers ) +{ + TestReducers< int, Kokkos::Threads >::execute_integer(); + TestReducers< size_t, Kokkos::Threads >::execute_integer(); + TestReducers< double, Kokkos::Threads >::execute_float(); + TestReducers< Kokkos::complex, Kokkos::Threads >::execute_basic(); +} + +TEST_F( threads, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Threads >( 0 ); + TestReduceDynamic< long, Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Threads >( 0 ); + TestReduceDynamic< double, Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Threads >( 0 ); + TestReduceDynamicView< long, Kokkos::Threads >( 1000000 ); +} + +TEST_F( threads, scan ) +{ + TestScan< Kokkos::Threads >::test_range( 1, 1000 ); TestScan< Kokkos::Threads >( 0 ); TestScan< Kokkos::Threads >( 100000 ); TestScan< Kokkos::Threads >( 10000000 ); @@ -87,10 +93,11 @@ TEST_F( threads , scan ) } #if 0 -TEST_F( threads , scan_small ) +TEST_F( threads, scan_small ) { - typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::Threads, Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -101,38 +108,39 @@ TEST_F( threads , scan_small ) } #endif -TEST_F( threads , team_scan ) +TEST_F( threads, team_scan ) { - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 10000 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 10 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 10000 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule >( 10000 ); } -TEST_F( threads , team_long_reduce) { - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 100000 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule >( 100000 ); +TEST_F( threads, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 3 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 100000 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule >( 100000 ); } -TEST_F( threads , team_double_reduce) { - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 0 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 100000 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule >( 100000 ); +TEST_F( threads, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 0 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 3 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 100000 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule >( 100000 ); } -TEST_F( threads , reduction_deduction ) +TEST_F( threads, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp index 2df9e19deb..68a9da6aed 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Threads >(); } -TEST_F( threads, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Threads >(); } -TEST_F( threads, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Threads >(); } -TEST_F( threads, view_subview_assign_strided ) { +TEST_F( threads, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_0 ) { +TEST_F( threads, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_1 ) { +TEST_F( threads, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_2 ) { +TEST_F( threads, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_3 ) { +TEST_F( threads, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_0 ) { +TEST_F( threads, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_1 ) { +TEST_F( threads, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_3 ) { +TEST_F( threads, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp index d57dbe97c0..c5cf061e82 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_layoutleft_to_layoutleft) { +TEST_F( threads, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits >(); } -TEST_F( threads, view_subview_layoutright_to_layoutright) { +TEST_F( threads, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp index 67d998c0e8..9018c1f4f7 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_1d_assign ) { +TEST_F( threads, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp index e340240c48..9483abd9cc 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp index ad27fa0fa6..e252a26565 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp index 6fca47cc4c..3e211b1a58 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_2d_from_3d ) { +TEST_F( threads, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp index c7dfca9415..865d50b1a1 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp index 38e8394918..c5840073b6 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp index 1f01fe6b5e..7b8825ef62 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left ) { +TEST_F( threads, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp index e9a1ccbe30..7bc16a5827 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp index c8b6c8743d..57b87b6098 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp index 7cef6fa07b..1875a883d4 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right ) { +TEST_F( threads, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp index d67bf3157e..cf6428b18e 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp index e8a2c825cf..7060fdb273 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits >(); +TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp index 4690be4d3a..d802d65830 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp @@ -40,67 +40,73 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads , team_tag ) +TEST_F( threads, team_tag ) { - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(0); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(2); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule >::test_reduce(1000); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule >::test_reduce( 1000 ); } -TEST_F( threads , team_shared_request) { - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule >(); - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule >(); +TEST_F( threads, team_shared_request ) +{ + TestSharedTeam< Kokkos::Threads, Kokkos::Schedule >(); + TestSharedTeam< Kokkos::Threads, Kokkos::Schedule >(); } -TEST_F( threads, team_scratch_request) { - TestScratchTeam< Kokkos::Threads , Kokkos::Schedule >(); - TestScratchTeam< Kokkos::Threads , Kokkos::Schedule >(); +TEST_F( threads, team_scratch_request ) +{ + TestScratchTeam< Kokkos::Threads, Kokkos::Schedule >(); + TestScratchTeam< Kokkos::Threads, Kokkos::Schedule >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( threads , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( threads, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule >(); } #endif -TEST_F( threads, shmem_size) { +TEST_F( threads, shmem_size ) +{ TestShmemSize< Kokkos::Threads >(); } -TEST_F( threads, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule >(); - TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule >(); +TEST_F( threads, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule >(); + TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule >(); } -TEST_F( threads , team_vector ) +TEST_F( threads, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -112,11 +118,10 @@ TEST_F( threads , team_vector ) #ifndef SKIP_TEST TEST_F( threads, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp index 46a576b027..36eae28793 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads , impl_view_mapping_a ) { +TEST_F( threads, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::Threads >(); test_view_mapping_operator< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp index b5d6ac843d..8c78d09443 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include namespace Test { -TEST_F( threads , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >(); +TEST_F( threads, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::Threads >(); } -TEST_F( threads , impl_view_mapping_b ) { +TEST_F( threads, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::Threads >(); TestViewMappingAtomic< Kokkos::Threads >::run(); } -TEST_F( threads, view_api) { - TestViewAPI< double , Kokkos::Threads >(); +TEST_F( threads, view_api ) +{ + TestViewAPI< double, Kokkos::Threads >(); } -TEST_F( threads , view_nested_view ) +TEST_F( threads, view_nested_view ) { ::Test::view_nested_view< Kokkos::Threads >(); } - - -TEST_F( threads , view_remap ) +TEST_F( threads, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Threads > output_type ; + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Threads > output_type; - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > input_type ; + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Threads > input_type; - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > diff_type ; + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Threads > diff_type; - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + int value = 0; - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( threads , view_aggregate ) +TEST_F( threads, view_aggregate ) { TestViewAggregate< Kokkos::Threads >(); } -TEST_F( threads , template_meta_functions ) +TEST_F( threads, template_meta_functions ) { - TestTemplateMetaFunctions(); + TestTemplateMetaFunctions< int, Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/doc/design_notes_space_instances.md b/lib/kokkos/doc/design_notes_space_instances.md index 487fa25bcb..0124dfbc87 100644 --- a/lib/kokkos/doc/design_notes_space_instances.md +++ b/lib/kokkos/doc/design_notes_space_instances.md @@ -1,35 +1,41 @@ # Design Notes for Execution and Memory Space Instances +## Objective -## Execution Spaces + * Enable Kokkos interoperability with coarse-grain tasking models + +## Requirements - * Work is *dispatched* to an execution space instance + * Backwards compatable with existing Kokkos API + * Support existing Host execution spaces (Serial, Threads, OpenMP, maybe Qthreads) + * Support DARMA threading model (may require a new Host execution space) + * Support Uintah threading model, i.e. indepentant worker threadpools working of of shared task queues + + +## Execution Space + * Parallel work is *dispatched* on an execution space instance + + * Execution space instances are conceptually disjoint/independant from each other + - -## Host Associated Execution Space Instances - -Vocabulary and examples assuming C++11 Threads Support Library +## Host Execution Space Instances * A host-side *control* thread dispatches work to an instance - * `this_thread` is the control thread - * `main` is the initial control thread - * An execution space instance is a pool of threads + * A host execution space instance is an organized thread pool - * All instances are disjoint thread pools + * All instances are disjoint, i.e. hardware resources are not shared between instances * Exactly one control thread is associated with an instance and only that control thread may dispatch work to to that instance - * A control thread may be a member of an instance, - if so then it is also the control thread associated - with that instance + * The control thread is a member of the instance - * The pool of threads associated with an instances is not mutatable + * The pool of threads associated with an instances is not mutatable during that instance existance * The pool of threads associated with an instance may be masked @@ -37,130 +43,89 @@ Vocabulary and examples assuming C++11 Threads Support Library - Example: only one hyperthread per core of the instance - - When a mask is applied to an instance that mask - remains until cleared or another mask is applied - - - Masking is portable by defining it as using a fraction - of the available resources (threads) - - * Instances are shared (referenced counted) objects, - just like `Kokkos::View` + - A mask can be applied during the policy creation of a parallel algorithm + + - Masking is portable by defining it as ceiling of fraction between [0.0, 1.0] + of the available resources ``` -struct StdThread { - void mask( float fraction ); - void unmask() { mask( 1.0 ); } -}; -``` +class ExecutionSpace { +public: + using execution_space = ExecutionSpace; + using memory_space = ...; + using device_type = Kokkos::Device; + using array_layout = ...; + using size_type = ...; + using scratch_memory_space = ...; + + + class Instance + { + int thread_pool_size( int depth = 0 ); + ... + }; + + class InstanceRequest + { + public: + using Control = std::function< void( Instance * )>; + + InstanceRequest( Control control + , unsigned thread_count + , unsigned use_numa_count = 0 + , unsigned use_cores_per_numa = 0 + ); + + }; + + static bool in_parallel(); + + static bool sleep(); + static bool wake(); + + static void fence(); + + static void print_configuration( std::ostream &, const bool detailed = false ); + + static void initialize( unsigned thread_count = 0 + , unsigned use_numa_count = 0 + , unsigned use_cores_per_numa = 0 + ); + + // Partition the current instance into the requested instances + // and run the given functions on the cooresponding instances + // will block until all the partitioned instances complete and + // the original instance will be restored + // + // Requires that the space has already been initialized + // Requires that the request can be statisfied by the current instance + // i.e. the sum of number of requested threads must be less than the + // max_hardware_threads + // + // Each control functor will accept a handle to its new default instance + // Each instance must be independant of all other instances + // i.e. no assumption on scheduling between instances + // The user is responible for checking the return code for errors + static int run_instances( std::vector< InstanceRequest> const& requests ); + + static void finalize(); - - -### Requesting an Execution Space Instance - - * `Space::request(` *who* `,` *what* `,` *control-opt* `)` - - * *who* is an identifier for subsquent queries regarding - who requested each instance - - * *what* is the number of threads and how they should be placed - - - Placement within locality-topology hierarchy; e.g., HWLOC - - - Compact within a level of hierarchy, or striped across that level; - e.g., socket or NUMA region - - - Granularity of request is core - - * *control-opt* optionally specifies whether the instance - has a new control thread - - - *control-opt* includes a control function / closure - - - The new control thread is a member of the instance - - - The control function is called by the new control thread - and is passed a `const` instance - - - The instance is **not** returned to the creating control thread - - * `std::thread` that is not a member of an instance is - *hard blocked* on a `std::mutex` - - - One global mutex or one mutex per thread? - - * `std::thread` that is a member of an instance is - *spinning* waiting for work, or are working + static int is_initialized(); + + static int concurrency(); + + static int thread_pool_size( int depth = 0 ); + + static int thread_pool_rank(); + + static int max_hardware_threads(); + + static int hardware_thread_id(); + + }; ``` -struct StdThread { - - struct Resource ; - - static StdThread request(); // default - - static StdThread request( const std::string & , const Resource & ); - - // If the instance can be reserved then - // allocate a copy of ControlClosure and invoke - // ControlClosure::operator()( const StdThread intance ) const - template< class ControlClosure > - static bool request( const std::string & , const Resource & - , const ControlClosure & ); -}; -``` - -### Relinquishing an Execution Space Instance - - * De-referencing the last reference-counted instance - relinquishes the pool of threads - - * If a control thread was created for the instance then - it is relinquished when that control thread returns - from the control function - - - Requires the reference count to be zero, an error if not - - * No *forced* relinquish + -## CUDA Associated Execution Space Instances - - * Only a signle CUDA architecture - - * An instance is a device + stream - - * A stream is exclusive to an instance - - * Only a host-side control thread can dispatch work to an instance - - * Finite number of streams per device - - * ISSUE: How to use CUDA `const` memory with multiple streams? - - * Masking can be mapped to restricting the number of CUDA blocks - to the fraction of available resources; e.g., maximum resident blocks - - -### Requesting an Execution Space Instance - - * `Space::request(` *who* `,` *what* `)` - - * *who* is an identifier for subsquent queries regarding - who requested each instance - - * *what* is which device, the stream is a requested/relinquished resource - - -``` -struct Cuda { - - struct Resource ; - - static Cuda request(); - - static Cuda request( const std::string & , const Resource & ); -}; -``` - - diff --git a/lib/kokkos/example/md_skeleton/types.h b/lib/kokkos/example/md_skeleton/types.h index 7f92b7cd0f..c9689188a1 100644 --- a/lib/kokkos/example/md_skeleton/types.h +++ b/lib/kokkos/example/md_skeleton/types.h @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,7 +50,7 @@ typedef Kokkos::DefaultExecutionSpace execution_space ; -#if ! defined( KOKKOS_HAVE_CUDA ) +#if ! defined( KOKKOS_ENABLE_CUDA ) struct double2 { double x, y; KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index 326d064105..249d44ab55 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -100,7 +100,7 @@ int main (int argc, char* argv[]) { // order. Parallel for loops may execute in any order. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). -#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) { // printf works in a CUDA parallel kernel; std::ostream does not. printf ("Hello from i = %i\n", i); diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp index 70eea43240..f7f467ad2d 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -69,7 +69,7 @@ int main (int argc, char* argv[]) { // It also handles any other syntax needed for CUDA. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) { lsum += i*i; }, sum); @@ -85,7 +85,7 @@ int main (int argc, char* argv[]) { printf ("Sum of squares of integers from 0 to %i, " "computed sequentially, is %i\n", n - 1, seqSum); Kokkos::finalize (); -#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) return (sum == seqSum) ? 0 : -1; #else return 0; diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp index dd0641be54..3450ad1bb4 100644 --- a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -99,7 +99,7 @@ int main (int argc, char* argv[]) { // ask for one. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) { // Acesss the View just like a Fortran array. The layout depends // on the View's memory space, so don't rely on the View's diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp index 216db7f125..9ea5e8b707 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -79,7 +79,7 @@ int main (int narg, char* args[]) { int sum = 0; // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) { lsum += 1; // TeamPolicy<>::member_type provides functions to query the diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index e7bd9da36b..e671293ff1 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -5,153 +5,166 @@ MAKE_J_OPTION="32" while [[ $# > 0 ]] do -key="$1" + key="$1" -case $key in + case $key in --kokkos-path*) - KOKKOS_PATH="${key#*=}" - ;; + KOKKOS_PATH="${key#*=}" + ;; + --qthreads-path*) + QTHREADS_PATH="${key#*=}" + ;; --prefix*) - PREFIX="${key#*=}" - ;; + PREFIX="${key#*=}" + ;; --with-cuda) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" - CUDA_PATH_NVCC=`which nvcc` - CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH_NVCC=`which nvcc` + CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} + ;; # Catch this before '--with-cuda*' --with-cuda-options*) - KOKKOS_CUDA_OPT="${key#*=}" - ;; + KOKKOS_CUDA_OPT="${key#*=}" + ;; --with-cuda*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" - CUDA_PATH="${key#*=}" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH="${key#*=}" + ;; --with-openmp) - KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" + ;; --with-pthread) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" + ;; --with-serial) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" - ;; - --with-qthread*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthread" - QTHREAD_PATH="${key#*=}" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" + ;; + --with-qthreads*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthreads" + if [ -z "$QTHREADS_PATH" ]; then + QTHREADS_PATH="${key#*=}" + fi + ;; --with-devices*) - DEVICES="${key#*=}" - KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" - ;; + DEVICES="${key#*=}" + KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" + ;; --with-gtest*) - GTEST_PATH="${key#*=}" - ;; + GTEST_PATH="${key#*=}" + ;; --with-hwloc*) - HWLOC_PATH="${key#*=}" - ;; + HWLOC_PATH="${key#*=}" + ;; --arch*) - KOKKOS_ARCH="${key#*=}" - ;; + KOKKOS_ARCH="${key#*=}" + ;; --cxxflags*) - CXXFLAGS="${key#*=}" - ;; + CXXFLAGS="${key#*=}" + ;; --ldflags*) - LDFLAGS="${key#*=}" - ;; + LDFLAGS="${key#*=}" + ;; --debug|-dbg) - KOKKOS_DEBUG=yes - ;; + KOKKOS_DEBUG=yes + ;; --make-j*) - MAKE_J_OPTION="${key#*=}" - ;; + MAKE_J_OPTION="${key#*=}" + ;; --compiler*) - COMPILER="${key#*=}" - CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` - if [ ${CNUM} -gt 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - if [[ ! -n ${COMPILER} ]]; then - echo "Empty compiler specified by --compiler command." - exit - fi - CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l` - if [ ${CNUM} -eq 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - ;; - --with-options*) - KOKKOS_OPT="${key#*=}" - ;; + COMPILER="${key#*=}" + CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` + if [ ${CNUM} -gt 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + if [[ ! -n ${COMPILER} ]]; then + echo "Empty compiler specified by --compiler command." + exit + fi + CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l` + if [ ${CNUM} -eq 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + ;; + --with-options*) + KOKKOS_OPT="${key#*=}" + ;; --help) - echo "Kokkos configure options:" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" - echo "--prefix=/Install/Path: Path to where the Kokkos library should be installed" - echo "" - echo "--with-cuda[=/Path/To/Cuda]: enable Cuda and set path to Cuda Toolkit" - echo "--with-openmp: enable OpenMP backend" - echo "--with-pthread: enable Pthreads backend" - echo "--with-serial: enable Serial backend" - echo "--with-qthread=/Path/To/Qthread: enable Qthread backend" - echo "--with-devices: explicitly add a set of backends" - echo "" - echo "--arch=[OPTIONS]: set target architectures. Options are:" - echo " ARMv80 = ARMv8.0 Compatible CPU" - echo " ARMv81 = ARMv8.1 Compatible CPU" - echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" - echo " SNB = Intel Sandy/Ivy Bridge CPUs" - echo " HSW = Intel Haswell CPUs" - echo " BDW = Intel Broadwell Xeon E-class CPUs" - echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" - echo " KNC = Intel Knights Corner Xeon Phi" - echo " KNL = Intel Knights Landing Xeon Phi" - echo " Kepler30 = NVIDIA Kepler generation CC 3.0" - echo " Kepler35 = NVIDIA Kepler generation CC 3.5" - echo " Kepler37 = NVIDIA Kepler generation CC 3.7" - echo " Pascal60 = NVIDIA Pascal generation CC 6.0" - echo " Pascal61 = NVIDIA Pascal generation CC 6.1" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Power8 = IBM POWER8 CPUs" - echo " Power9 = IBM POWER9 CPUs" - echo "" - echo "--compiler=/Path/To/Compiler set the compiler" - echo "--debug,-dbg: enable Debugging" - echo "--cxxflags=[FLAGS] overwrite CXXFLAGS for library build and test build" - echo " This will still set certain required flags via" - echo " KOKKOS_CXXFLAGS (such as -fopenmp, --std=c++11, etc.)" - echo "--ldflags=[FLAGS] overwrite LDFLAGS for library build and test build" - echo " This will still set certain required flags via" - echo " KOKKOS_LDFLAGS (such as -fopenmp, -lpthread, etc.)" - echo "--with-gtest=/Path/To/Gtest: set path to gtest (used in unit and performance tests" - echo "--with-hwloc=/Path/To/Hwloc: set path to hwloc" - echo "--with-options=[OPTIONS]: additional options to Kokkos:" - echo " aggressive_vectorization = add ivdep on loops" - echo "--with-cuda-options=[OPT]: additional options to CUDA:" - echo " force_uvm, use_ldg, enable_lambda, rdc" - echo "--make-j=[NUM]: set -j flag used during build." - exit 0 - ;; + echo "Kokkos configure options:" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." + echo "--qthreads-path=/Path/To/Qthreads: Path to Qthreads install directory." + echo " Overrides path given by --with-qthreads." + echo "--prefix=/Install/Path: Path to install the Kokkos library." + echo "" + echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." + echo "--with-openmp: Enable OpenMP backend." + echo "--with-pthread: Enable Pthreads backend." + echo "--with-serial: Enable Serial backend." + echo "--with-qthreads[=/Path/To/Qthreads]: Enable Qthreads backend." + echo "--with-devices: Explicitly add a set of backends." + echo "" + echo "--arch=[OPT]: Set target architectures. Options are:" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo "" + echo "--compiler=/Path/To/Compiler Set the compiler." + echo "--debug,-dbg: Enable Debugging." + echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," + echo " --std=c++11, etc.)." + echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," + echo " -lpthread, etc.)." + echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" + echo " tests.)" + echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc." + echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " aggressive_vectorization = add ivdep on loops" + echo "--with-cuda-options=[OPT]: Additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc" + echo "--make-j=[NUM]: Set -j flag used during build." + exit 0 + ;; *) - echo "warning: ignoring unknown option $key" - ;; -esac -shift + echo "warning: ignoring unknown option $key" + ;; + esac + + shift done -# If KOKKOS_PATH undefined, assume parent dir of this -# script is the KOKKOS_PATH +# Remove leading ',' from KOKKOS_DEVICES. +KOKKOS_DEVICES=$(echo $KOKKOS_DEVICES | sed 's/^,//') + +# If KOKKOS_PATH undefined, assume parent dir of this script is the KOKKOS_PATH. if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) + KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then -echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" -exit + echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" + exit fi KOKKOS_SRC_PATH=${KOKKOS_PATH} @@ -160,52 +173,63 @@ KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" #KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}" if [ ${#COMPILER} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" fi + if [ ${#KOKKOS_DEVICES} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" fi + if [ ${#KOKKOS_ARCH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" fi + if [ ${#KOKKOS_DEBUG} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" fi + if [ ${#CUDA_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" fi + if [ ${#CXXFLAGS} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" fi + if [ ${#LDFLAGS} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" fi + if [ ${#GTEST_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" else -GTEST_PATH=${KOKKOS_PATH}/tpls/gtest -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" + GTEST_PATH=${KOKKOS_PATH}/tpls/gtest + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" fi + if [ ${#HWLOC_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" fi -if [ ${#QTHREAD_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREAD_PATH=${QTHREAD_PATH}" + +if [ ${#QTHREADS_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREADS_PATH=${QTHREADS_PATH}" fi + if [ ${#KOKKOS_OPT} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" fi + if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" fi KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" KOKKOS_TEST_INSTALL_PATH="${PWD}/install" if [ ${#PREFIX} -gt 0 ]; then -KOKKOS_INSTALL_PATH="${PREFIX}" + KOKKOS_INSTALL_PATH="${PREFIX}" else -KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} + KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} fi @@ -229,7 +253,7 @@ mkdir example/fenl mkdir example/tutorial if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then -mkdir example/ichol + mkdir example/ichol fi KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" diff --git a/lib/linalg/Install.py b/lib/linalg/Install.py new file mode 100644 index 0000000000..c7076ca52f --- /dev/null +++ b/lib/linalg/Install.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +# install.py tool to do build of the linear algebra library +# used to automate the steps described in the README file in this dir + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# make the library + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.%s clean; make -f Makefile.%s" % (machine,machine) +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) diff --git a/lib/linalg/README b/lib/linalg/README index 20f3ff094d..725df86c4c 100644 --- a/lib/linalg/README +++ b/lib/linalg/README @@ -3,11 +3,16 @@ USER-AWPMD packages, and possibly by other packages in the future. Note that this is an *incomplete* subset of full BLAS/LAPACK. -You should only need to build and use the resulting library in this -directory if you want to build LAMMPS with the USER-ATC and/or -USER-AWPMD packages AND you do not have any other suitable BLAS and -LAPACK libraries installed on your system. E.g. ATLAS, GOTO-BLAS, -OpenBLAS, ACML, or MKL. +You should only need to build and use the library in this directory if +you want to build LAMMPS with the USER-ATC and/or USER-AWPMD packages +AND you do not have any other suitable BLAS and LAPACK libraries +installed on your system. E.g. ATLAS, GOTO-BLAS, OpenBLAS, ACML, or +MKL. + +You can type "make lib-linalg" from the src directory to see help on +how to build this library via make commands, or you can do the same +thing by typing "python Install.py" from within this directory, or you +can do it manually by following the instructions below. Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: @@ -20,4 +25,5 @@ directory: liblinalg.a the library LAMMPS will link against You can then include this library and its path in the Makefile.lammps -file of any packages that need it, e.g. in lib/atc/Makefile.lammps. +file of any packages that need it. As an example, see the +lib/atc/Makefile.lammps.linalg file. diff --git a/lib/meam/Install.py b/lib/meam/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/meam/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/meam/README b/lib/meam/README index 436259ee81..b3111c1317 100644 --- a/lib/meam/README +++ b/lib/meam/README @@ -15,6 +15,11 @@ links against when using the MEAM package. This library must be built with a F90 compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-meam" from the src directory to see help on how +to build this library via make commands, or you can do the same thing +by typing "python Install.py" from within this directory, or you can +do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: diff --git a/lib/molfile/Makefile.lammps b/lib/molfile/Makefile.lammps index 08118991a0..a181f48aec 100644 --- a/lib/molfile/Makefile.lammps +++ b/lib/molfile/Makefile.lammps @@ -6,6 +6,9 @@ # When you build LAMMPS with the USER-MOLFILE package installed, it will # use the 3 settings in this file. They should be set as follows. # +# The molfile_SYSINC setting is to point to the folder with the VMD +# plugin headers. By default it points to bundled headers in this folder +# # The molfile_SYSLIB setting is for a system dynamic loading library # that will be used to load the molfile plugins. It contains functions # like dlopen(), dlsym() and so on for dynamic linking of executable @@ -24,7 +27,10 @@ # Settings that the LAMMPS build will import when this package is installed -molfile_SYSINC = +# change this to -I/path/to/your/lib/vmd/plugins/include if the bundled +# header files are incompatible with your VMD plugsins +molfile_SYSINC =-I../../lib/molfile +# ifneq ($(LIBOBJDIR),/Obj_mingw32) ifneq ($(LIBOBJDIR),/Obj_mingw64) ifneq ($(LIBOBJDIR),/Obj_mingw32-mpi) diff --git a/lib/molfile/README b/lib/molfile/README index 09ea3cc5c6..9e8260c202 100644 --- a/lib/molfile/README +++ b/lib/molfile/README @@ -6,17 +6,30 @@ and write_dump commands in a LAMMPS input script. More information about the VMD molfile plugins can be found at http://www.ks.uiuc.edu/Research/vmd/plugins/molfile. -More specifically, to be able to dynamically load and execute the -plugins from inside LAMMPS, you need to link with a system library -containing functions like dlopen(), dlsym() and so on for dynamic -linking of executable code into an executable. This library is -defined by setting the molfile_SYSLIB variable in the Makefile.lammps -file in this dir. +NOTE: while the programming interface (API) of the VMD molfile plugins +is backward compatible (i.e. you can expect to be able to compile this +package for plugins from newer VMD packages), the binary interface +(ABI) is not. So it is necessary to compile this package with the +VMD molfile plugin header files (vmdplugin.h and molfile_plugin.h) +matching VMD installation that the (binary) plugin files are taken from. +These header files can be found inside the VMD installation tree under +"plugins/include". For convenience, this package includes a set of +header files that is compatible with VMD 1.9.3 (the current version +in April 2017). You need to adjust the molfile_SYSINC variable in the +Makefile.lammps file in this directory, in case you want to use VMD +molfile plugins from a different version. The interface is compatible +with plugins starting from VMD version 1.8.4. + +In order to be able to dynamically load and execute the plugins from +inside LAMMPS, you need to link with a system library containing functions +like dlopen(), dlsym() and so on for dynamic linking of executable code +into an executable. This library is defined by setting the molfile_SYSLIB +variable in the Makefile.lammps file in this dir. For Linux and most current unix-like operating systems, this can be kept at the default setting of "-ldl" (on some platforms this library is called "-ldld"). For compilation on Windows, a slightly different mechanism is used that is part of the Windows programming environment -and this library is not needed. +and this kind of library is not needed. See the header of Makefile.lammps for more info. diff --git a/src/USER-MOLFILE/molfile_plugin.h b/lib/molfile/molfile_plugin.h similarity index 92% rename from src/USER-MOLFILE/molfile_plugin.h rename to lib/molfile/molfile_plugin.h index 7a2d7ca42e..c79e7a5abf 100644 --- a/src/USER-MOLFILE/molfile_plugin.h +++ b/lib/molfile/molfile_plugin.h @@ -11,7 +11,7 @@ * * $RCSfile: molfile_plugin.h,v $ * $Author: johns $ $Locker: $ $State: Exp $ - * $Revision: 1.103 $ $Date: 2011/03/05 03:56:11 $ + * $Revision: 1.108 $ $Date: 2016/02/26 03:17:01 $ * ***************************************************************************/ @@ -60,6 +60,21 @@ typedef ssize_t molfile_ssize_t; /**< for frame counts */ #define MOLFILE_MAXWAVEPERTS 25 /**< maximum number of wavefunctions * per timestep */ +/** + * Hard-coded direct-I/O page size constants for use by both VMD + * and the plugins that want to use direct, unbuffered I/O for high + * performance with SSDs etc. We use two constants to define the + * range of hardware page sizes that we can support, so that we can + * add support for larger 8KB or 16KB page sizes in the future + * as they become more prevalent in high-end storage systems. + * + * At present, VMD uses a hard-coded 4KB page size to reduce memory + * fragmentation, but these constants will make it easier to enable the + * use of larger page sizes in the future if it becomes necessary. + */ +#define MOLFILE_DIRECTIO_MIN_BLOCK_SIZE 4096 +#define MOLFILE_DIRECTIO_MAX_BLOCK_SIZE 4096 + /** * File level comments, origin information, and annotations. @@ -96,8 +111,17 @@ typedef struct { char resname[8]; /**< required residue name string */ int resid; /**< required integer residue ID */ char segid[8]; /**< required segment name string, or "" */ +#if 0 && vmdplugin_ABIVERSION > 17 + /* The new PDB file formats allows for much larger structures, */ + /* which can therefore require longer chain ID strings. The */ + /* new PDBx/mmCIF file formats do not have length limits on */ + /* fields, so PDB chains could be arbitrarily long strings */ + /* in such files. At present, we know we need at least 3-char */ + /* chains for existing PDBx/mmCIF files. */ + char chain[4]; /**< required chain name, or "" */ +#else char chain[2]; /**< required chain name, or "" */ - +#endif /* rest are optional; use optflags to specify what's present */ char altloc[2]; /**< optional PDB alternate location code */ char insertion[2]; /**< optional PDB insertion code */ @@ -107,6 +131,23 @@ typedef struct { float charge; /**< optional charge value */ float radius; /**< optional radius value */ int atomicnumber; /**< optional element atomic number */ + +#if 0 + char complex[16]; + char assembly[16]; + int qmregion; + int qmregionlink; + int qmlayer; + int qmlayerlink; + int qmfrag; + int qmfraglink; + string qmecp; + int qmadapt; + int qmect; /**< boolean */ + int qmparam; + int autoparam; +#endif + #if defined(DESRES_CTNUMBER) int ctnumber; /**< mae ct block, 0-based, including meta */ #endif @@ -140,23 +181,19 @@ typedef struct { #define MOLFILE_QMTS_SCFITER 0x0002 /*@}*/ -#if vmdplugin_ABIVERSION > 10 typedef struct molfile_timestep_metadata { unsigned int count; /**< total # timesteps; -1 if unknown */ unsigned int avg_bytes_per_timestep; /** bytes per timestep */ int has_velocities; /**< if timesteps have velocities */ } molfile_timestep_metadata_t; -#endif /* * Per-timestep atom coordinates and periodic cell information */ typedef struct { float *coords; /**< coordinates of all atoms, arranged xyzxyzxyz */ -#if vmdplugin_ABIVERSION > 10 float *velocities; /**< space for velocities of all atoms; same layout */ /**< NULL unless has_velocities is set */ -#endif /*@{*/ /** @@ -169,9 +206,7 @@ typedef struct { float A, B, C, alpha, beta, gamma; /*@}*/ -#if vmdplugin_ABIVERSION > 10 double physical_time; /**< physical time point associated with this frame */ -#endif #if defined(DESRES_READ_TIMESTEP2) /* HACK to support generic trajectory information */ @@ -213,14 +248,33 @@ typedef struct { * physical size of the box, this is the number of voxels in each * direction, independent of the shape of the volume set. */ - int xsize; /**< number of grid cells along the X axis */ - int ysize; /**< number of grid cells along the Y axis */ - int zsize; /**< number of grid cells along the Z axis */ + int xsize; /**< number of grid cells along the X axis */ + int ysize; /**< number of grid cells along the Y axis */ + int zsize; /**< number of grid cells along the Z axis */ - int has_color; /**< flag indicating presence of voxel color data */ +#if vmdplugin_ABIVERSION > 16 + int has_scalar; /**< flag indicating presence of scalar volume */ + int has_gradient; /**< flag indicating presence of vector volume */ + int has_variance; /**< flag indicating presence of variance map */ +#endif + int has_color; /**< flag indicating presence of voxel color data */ } molfile_volumetric_t; +#if vmdplugin_ABIVERSION > 16 +/** + * Volumetric dataset read/write structure with both flag/parameter sets + * and VMD-allocated pointers for fields to be used by the plugin. + */ +typedef struct { + int setidx; /**< volumetric dataset index to load/save */ + float *scalar; /**< scalar density/potential field data */ + float *gradient; /**< gradient vector field */ + float *variance; /**< variance map indicating signal/noise */ + float *rgb3f; /**< RGB floating point color texture map */ + unsigned char *rgb3u; /**< RGB unsigned byte color texture map */ +} molfile_volumetric_readwrite_t; +#endif /************************************************************** @@ -231,9 +285,6 @@ typedef struct { ************************************************************** **************************************************************/ -#if vmdplugin_ABIVERSION > 9 - - /* macros for the convergence status of a QM calculation. */ #define MOLFILE_QMSTATUS_UNKNOWN -1 /* don't know yet */ #define MOLFILE_QMSTATUS_OPT_CONV 0 /* optimization converged */ @@ -485,8 +536,6 @@ typedef struct { } molfile_qm_timestep_t; -#endif - /************************************************************** **************************************************************/ @@ -609,12 +658,8 @@ typedef struct { * This function can be called only after read_structure(). * Return MOLFILE_SUCCESS if no errors occur. */ -#if vmdplugin_ABIVERSION > 14 int (*read_bonds)(void *, int *nbonds, int **from, int **to, float **bondorder, int **bondtype, int *nbondtypes, char ***bondtypename); -#else - int (*read_bonds)(void *, int *nbonds, int **from, int **to, float **bondorder); -#endif /** * XXX this function will be augmented and possibly superceded by a @@ -684,6 +729,9 @@ typedef struct { */ int (* read_volumetric_data)(void *, int set, float *datablock, float *colorblock); +#if vmdplugin_ABIVERSION > 16 + int (* read_volumetric_data_ex)(void *, molfile_volumetric_readwrite_t *v); +#endif /** * Read raw graphics data stored in this file. Return the number of data @@ -723,14 +771,9 @@ typedef struct { * bondtypenames can only be used of bondtypes is also given. * Return MOLFILE_SUCCESS if no errors occur. */ -#if vmdplugin_ABIVERSION > 14 int (* write_bonds)(void *, int nbonds, int *from, int *to, float *bondorder, int *bondtype, int nbondtypes, char **bondtypename); -#else - int (* write_bonds)(void *, int nbonds, int *from, int *to, float *bondorder); -#endif -#if vmdplugin_ABIVERSION > 9 /** * Write the specified volumetric data set into the space pointed to by * datablock. The * allocated for the datablock must be equal to @@ -740,8 +783,11 @@ typedef struct { */ int (* write_volumetric_data)(void *, molfile_volumetric_t *metadata, float *datablock, float *colorblock); +#if vmdplugin_ABIVERSION > 16 + int (* write_volumetric_data_ex)(void *, molfile_volumetric_t *metadata, + molfile_volumetric_readwrite_t *v); +#endif -#if vmdplugin_ABIVERSION > 15 /** * Read in Angles, Dihedrals, Impropers, and Cross Terms and optionally types. * (Cross terms pertain to the CHARMM/NAMD CMAP feature) @@ -764,33 +810,6 @@ typedef struct { const int *impropers, const int *impropertypes, int numimpropertypes, const char **impropertypenames, int numcterms, const int *cterms, int ctermcols, int ctermrows); -#else - /** - * Read in Angles, Dihedrals, Impropers, and Cross Terms - * Forces are in Kcal/mol - * (Cross terms pertain to the CHARMM/NAMD CMAP feature, forces are given - * as a 2-D matrix) - */ - int (* read_angles)(void *, - int *numangles, int **angles, double **angleforces, - int *numdihedrals, int **dihedrals, double **dihedralforces, - int *numimpropers, int **impropers, double **improperforces, - int *numcterms, int **cterms, - int *ctermcols, int *ctermrows, double **ctermforces); - - /** - * Write out Angles, Dihedrals, Impropers, and Cross Terms - * Forces are in Kcal/mol - * (Cross terms pertain to the CHARMM/NAMD CMAP feature, forces are given - * as a 2-D matrix) - */ - int (* write_angles)(void *, - int numangles, const int *angles, const double *angleforces, - int numdihedrals, const int *dihedrals, const double *dihedralforces, - int numimpropers, const int *impropers, const double *improperforces, - int numcterms, const int *cterms, - int ctermcols, int ctermrows, const double *ctermforces); -#endif /** @@ -839,14 +858,9 @@ typedef struct { */ int (* read_timestep)(void *, int natoms, molfile_timestep_t *, molfile_qm_metadata_t *, molfile_qm_timestep_t *); -#endif -#if vmdplugin_ABIVERSION > 10 int (* read_timestep_metadata)(void *, molfile_timestep_metadata_t *); -#endif -#if vmdplugin_ABIVERSION > 11 int (* read_qm_timestep_metadata)(void *, molfile_qm_timestep_metadata_t *); -#endif #if defined(DESRES_READ_TIMESTEP2) /** @@ -864,7 +878,6 @@ typedef struct { double * times ); #endif -#if vmdplugin_ABIVERSION > 13 /** * Console output, READ-ONLY function pointer. * Function pointer that plugins can use for printing to the host @@ -883,8 +896,8 @@ typedef struct { * application-provided services */ int (* cons_fputs)(const int, const char*); -#endif } molfile_plugin_t; #endif + diff --git a/src/USER-MOLFILE/vmdplugin.h b/lib/molfile/vmdplugin.h similarity index 98% rename from src/USER-MOLFILE/vmdplugin.h rename to lib/molfile/vmdplugin.h index 37299408fe..842d1e431c 100644 --- a/src/USER-MOLFILE/vmdplugin.h +++ b/lib/molfile/vmdplugin.h @@ -11,7 +11,7 @@ * * $RCSfile: vmdplugin.h,v $ * $Author: johns $ $Locker: $ $State: Exp $ - * $Revision: 1.32 $ $Date: 2009/02/24 05:12:35 $ + * $Revision: 1.33 $ $Date: 2015/10/29 05:10:54 $ * ***************************************************************************/ @@ -144,7 +144,7 @@ typedef struct { /** * Use this macro to initialize the abiversion member of each plugin */ -#define vmdplugin_ABIVERSION 16 +#define vmdplugin_ABIVERSION 17 /*@{*/ /** Use this macro to indicate a plugin's thread-safety at registration time */ diff --git a/lib/mscg/Install.py b/lib/mscg/Install.py new file mode 100644 index 0000000000..e547232614 --- /dev/null +++ b/lib/mscg/Install.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +# Install.py tool to download, unpack, build, and link to the MS-CG library +# used to automate the steps described in the README file in this dir + +import sys,os,re,commands + +# help message + +help = """ +Syntax: python Install.py -h hpath hdir -g -b [suffix] -l + specify one or more options, order does not matter + -h = set home dir of MS-CG to be hpath/hdir + hpath can be full path, contain '~' or '.' chars + default hpath = . = lib/mscg + default hdir = MSCG-release-master = what GitHub zipfile unpacks to + -g = grab (download) zipfile from MS-CG GitHub website + unpack it to hpath/hdir + hpath must already exist + if hdir already exists, it will be deleted before unpack + -b = build MS-CG library in its src dir + optional suffix specifies which src/Make/Makefile.suffix to use + default suffix = g++_simple + -l = create 2 softlinks (includelink,liblink) in lib/mscg to MS-CG src dir +""" + +# settings + +url = "https://github.com/uchicago-voth/MSCG-release/archive/master.zip" +zipfile = "MS-CG-master.zip" +zipdir = "MSCG-release-master" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# expand to full path name +# process leading '~' or relative path + +def fullpath(path): + return os.path.abspath(os.path.expanduser(path)) + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +homepath = "." +homedir = zipdir + +grabflag = 0 +buildflag = 0 +msuffix = "g++_simple" +linkflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-h": + if iarg+3 > nargs: error() + homepath = args[iarg+1] + homedir = args[iarg+2] + iarg += 3 + elif args[iarg] == "-g": + grabflag = 1 + iarg += 1 + elif args[iarg] == "-b": + buildflag = 1 + if iarg+1 < nargs and args[iarg+1][0] != '-': + msuffix = args[iarg+1] + iarg += 1 + iarg += 1 + elif args[iarg] == "-l": + linkflag = 1 + iarg += 1 + else: error() + +homepath = fullpath(homepath) +if not os.path.isdir(homepath): error("MS-CG path does not exist") +homedir = "%s/%s" % (homepath,homedir) + +# download and unpack MS-CG zipfile + +if grabflag: + print "Downloading MS-CG ..." + cmd = "curl -L %s > %s/%s" % (url,homepath,zipfile) + print cmd + print commands.getoutput(cmd) + + print "Unpacking MS-CG zipfile ..." + if os.path.exists("%s/%s" % (homepath,zipdir)): + commands.getoutput("rm -rf %s/%s" % (homepath,zipdir)) + cmd = "cd %s; unzip %s" % (homepath,zipfile) + commands.getoutput(cmd) + if os.path.basename(homedir) != zipdir: + if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) + os.rename("%s/%s" % (homepath,zipdir),homedir) + +# build MS-CG + +if buildflag: + print "Building MS-CG ..." + cmd = "cd %s/src; cp Make/Makefile.%s .; make -f Makefile.%s" % \ + (homedir,msuffix,msuffix) + txt = commands.getoutput(cmd) + print txt + +# create 2 links in lib/mscg to MS-CG src dir + +if linkflag: + print "Creating links to MS-CG include and lib files" + if os.path.isfile("includelink") or os.path.islink("includelink"): + os.remove("includelink") + if os.path.isfile("liblink") or os.path.islink("liblink"): + os.remove("liblink") + cmd = "ln -s %s/src includelink" % homedir + commands.getoutput(cmd) + cmd = "ln -s %s/src liblink" % homedir + commands.getoutput(cmd) diff --git a/lib/mscg/Makefile.lammps b/lib/mscg/Makefile.lammps index 0aa55b087d..f0d9a9b8a0 100644 --- a/lib/mscg/Makefile.lammps +++ b/lib/mscg/Makefile.lammps @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -mscg_SYSINC = -mscg_SYSLIB = -lm -lgsl -llapack -lcblas +mscg_SYSINC = -std=c++11 +mscg_SYSLIB = -lm -lgsl -llapack -lgslcblas mscg_SYSPATH = diff --git a/lib/mscg/README b/lib/mscg/README index cc4fc9a667..b73c8563cd 100755 --- a/lib/mscg/README +++ b/lib/mscg/README @@ -6,6 +6,15 @@ The MS-CG library is available at https://github.com/uchicago-voth/MSCG-release and was developed by Jacob Wagner in Greg Voth's group at the University of Chicago. +This library requires a compiler with C++11 support (e.g., g++ v4.9+), +LAPACK, and the GNU scientific library (GSL v 2.1+). + +You can type "make lib-mscg" from the src directory to see help on how +to download and build this library via make commands, or you can do +the same thing by typing "python Install.py" from within this +directory, or you can do it manually by following the instructions +below. + ----------------- You must perform the following steps yourself. @@ -14,16 +23,21 @@ You must perform the following steps yourself. either as a tarball or via SVN, and unpack the tarball either in this /lib/mscg directory or somewhere else on your system. -2. Compile MS-CG from within its home directory using your makefile choice: +2. Ensure that you have LAPACK and GSL (or Intel MKL) as well as a compiler + with support for C++11. + +3. Compile MS-CG from within its home directory using your makefile of choice: % make -f Makefile."name" libmscg.a + It is recommended that you start with Makefile.g++_simple + for most machines -3. There is no need to install MS-CG if you only wish +4. There is no need to install MS-CG if you only wish to use it from LAMMPS. -4. Create two soft links in this dir (lib/mscg) to the MS-CG src +5. Create two soft links in this dir (lib/mscg) to the MS-CG src directory. E.g if you built MS-CG in this dir: - % ln -s mscgfm-master/src includelink - % ln -s mscgfm-master/src liblink + % ln -s src includelink + % ln -s src liblink These links could instead be set to the include and lib directories created by a MS-CG install, e.g. % ln -s /usr/local/include includelink @@ -46,8 +60,8 @@ somewhere else, you will also need to repeat steps 1,2,3. The Makefile.lammps file in this directory is there for compatibility with the way other libraries under the lib dir are linked with by -LAMMPS. MS-CG requires the GSL, LAPACK, and BLAS libraries as listed -in Makefile.lammps. If they are not in default locations where your +LAMMPS. MS-CG requires the GSL and LAPACK libraries as listed in +Makefile.lammps. If they are not in default locations where your LD_LIBRARY_PATH environment settings can find them, then you should add the approrpriate -L paths to the mscg_SYSPATH variable in Makefile.lammps. diff --git a/lib/netcdf/README b/lib/netcdf/README index 00db8df001..b18ea1d276 100644 --- a/lib/netcdf/README +++ b/lib/netcdf/README @@ -1,6 +1,9 @@ The Makefile.lammps file in this directory is used when building LAMMPS with packages that make use of the NetCDF library or its -parallel version. The file has several settings needed to compile +parallel version. For example, the USER-NETCDF package which adds +dump netcdf and dump netcdf/mpiio commands. + +The file has several settings needed to compile and link LAMMPS with the NetCDF and parallel NetCDF support. For any regular NetCDF installation, all required flags should be autodetected. Please note that parallel NetCDF support is diff --git a/lib/poems/Install.py b/lib/poems/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/poems/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/poems/README b/lib/poems/README index 836595bdd1..e0ded85e46 100644 --- a/lib/poems/README +++ b/lib/poems/README @@ -40,6 +40,11 @@ links against when using the POEMA package. This library must be built with a C++ compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-poems" from the src directory to see help on +how to build this library via make commands, or you can do the same +thing by typing "python Install.py" from within this directory, or you +can do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: diff --git a/lib/python/Makefile.lammps b/lib/python/Makefile.lammps index 8538994694..4289674e99 100644 --- a/lib/python/Makefile.lammps +++ b/lib/python/Makefile.lammps @@ -1,6 +1,7 @@ # Settings that the LAMMPS build will import when this package library is used # See the README file for more explanation -python_SYSINC = $(shell which python2-config > /dev/null 2>&1 && python2-config --includes || python-config --includes ) -python_SYSLIB = $(shell which python2-config > /dev/null 2>&1 && python2-config --ldflags || python-config --ldflags) +python_SYSINC = $(shell which python-config > /dev/null 2>&1 && python-config --includes || :) +python_SYSLIB = $(shell which python-config > /dev/null 2>&1 && python-config --ldflags || :) python_SYSPATH = +PYTHON=python diff --git a/lib/python/Makefile.lammps.python2 b/lib/python/Makefile.lammps.python2 index 8538994694..7b54b4c3df 100644 --- a/lib/python/Makefile.lammps.python2 +++ b/lib/python/Makefile.lammps.python2 @@ -1,6 +1,7 @@ # Settings that the LAMMPS build will import when this package library is used # See the README file for more explanation -python_SYSINC = $(shell which python2-config > /dev/null 2>&1 && python2-config --includes || python-config --includes ) -python_SYSLIB = $(shell which python2-config > /dev/null 2>&1 && python2-config --ldflags || python-config --ldflags) +python_SYSINC = $(shell which python2-config > /dev/null 2>&1 && python2-config --includes || (which python-config > /dev/null 2>&1 && python-config --includes || :)) +python_SYSLIB = $(shell which python2-config > /dev/null 2>&1 && python2-config --ldflags || (which python-config > /dev/null 2>&1 && python-config --ldflags || :)) python_SYSPATH = +PYTHON=$(shell which python2 > /dev/null 2>&1 && echo python2 || echo python) diff --git a/lib/python/Makefile.lammps.python2.7 b/lib/python/Makefile.lammps.python2.7 index b5807086f2..07c6a94b21 100644 --- a/lib/python/Makefile.lammps.python2.7 +++ b/lib/python/Makefile.lammps.python2.7 @@ -4,3 +4,4 @@ python_SYSINC = -I/usr/local/include/python2.7 python_SYSLIB = -lpython2.7 -lnsl -ldl -lreadline -ltermcap -lpthread -lutil -lm -Xlinker -export-dynamic python_SYSPATH = +PYTHON=python2.7 diff --git a/lib/python/Makefile.lammps.python3 b/lib/python/Makefile.lammps.python3 new file mode 100644 index 0000000000..5c43b45ff6 --- /dev/null +++ b/lib/python/Makefile.lammps.python3 @@ -0,0 +1,7 @@ +# Settings that the LAMMPS build will import when this package library is used +# See the README file for more explanation + +python_SYSINC = $(shell which python3-config > /dev/null 2>&1 && python3-config --includes || (which python-config > /dev/null 2>&1 && python-config --includes || :)) +python_SYSLIB = $(shell which python3-config > /dev/null 2>&1 && python3-config --ldflags || (which python-config > /dev/null 2>&1 && python-config --ldflags || :)) +python_SYSPATH = +PYTHON=$(shell which python3 > /dev/null 2>&1 && echo python3 || echo python) diff --git a/lib/python/README b/lib/python/README index ddccc1a21a..8de2bc4bd7 100644 --- a/lib/python/README +++ b/lib/python/README @@ -1,26 +1,26 @@ The Makefile.lammps file in this directory is used when building LAMMPS with its PYTHON package installed. The file has several settings needed to compile and link LAMMPS with the Python library. -You should choose a Makefile.lammps.* file compatible with your system -and your version of Python, and copy it to Makefile.lammps before -building LAMMPS itself. You may need to edit one of the provided -files to match your system. -Note that is not currently possible to use the PYTHON package with -Python 3, only with Python 2. The C API changed from Python 2 to 3 -and the LAMMPS code is not compatible with both. +The default Makefile.lammps will automatically choose the default +python interpreter of your system and will infer the flags from +the python-config utility, that is usually bundled with the python +installation. If needed, you can copy one of the other provided +Makefile.lammps.* files to to Makefile.lammps before building +LAMMPS itself. -If you create a new Makefile.lammps file suitable for some version of -Python on some system, that is not a match to one of the provided -Makefile.lammps.* files, you can send it to the developers, and we can -include it in the distribution for others to use. - -To illustrate, these are example settings from the -Makefile.lammps.python2.7 file: +The files Makefile.lammps.python2 and Makefile.lammps.python3 are +similar to the default file, but meant for the case that both, +python 2 and python 3, are installed simultaneously and you want +to prefer one over the other. If neither of these files work, you +may have to create a custom Makefile.lammps file suitable for +the version of Python on your system. To illustrate, these are +example settings from the Makefile.lammps.python2.7 file: python_SYSINC = -I/usr/local/include/python2.7 python_SYSLIB = -lpython2.7 -lnsl -ldl -lreadline -ltermcap -lpthread -lutil -lm -python_SYSPATH = +python_SYSPATH = +PYTHON=python2.7 python_SYSINC refers to the directory where Python's Python.h file is found. LAMMPS includes this file. @@ -30,10 +30,13 @@ application (LAMMPS in this case) to "embed" Python in the application. The Python library itself is listed (-lpython2.7) are are several system libraries needed by Python. -python_SYSPATH = refers to the path (e.g. -L/usr/local/lib) where the +python_SYSPATH refers to the path (e.g. -L/usr/local/lib) where the Python library can be found. You may not need this setting if the path is already included in your LD_LIBRARY_PATH environment variable. +PYTHON is the name of the python interpreter. It is used for +installing the LAMMPS python module with "make install-python" + ------------------------- Note that the trickiest issue to figure out for inclusion in diff --git a/lib/qmmm/Install.py b/lib/qmmm/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/qmmm/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/qmmm/README b/lib/qmmm/README index b50f25ed69..2746c9e86e 100644 --- a/lib/qmmm/README +++ b/lib/qmmm/README @@ -18,6 +18,15 @@ the only option. Adding support for a different QM code will require to write a new version of the top-level wrapper code, pwqmmm.c, and also an interface layer into the QM code similar to the one in QE. +You can type "make lib-qmmm" from the src directory to see help on how +to build this library (steps 1 and 2 below) via make commands, or you +can do the same thing by typing "python Install.py" from within this +directory, or you can do it manually by following the instructions +below. + +However you perform steps 1 and 2, you will need to perform steps 3 +and 4 manually, as outlined below. + ------------------------------------------------- WARNING: This is experimental code under developement and is provided diff --git a/lib/reax/Install.py b/lib/reax/Install.py new file mode 100644 index 0000000000..18b426f928 --- /dev/null +++ b/lib/reax/Install.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# install.py tool to do a generic build of a library +# soft linked to by many of the lib/Install.py files +# used to automate the steps described in the corresponding lib/README + +import sys,commands,os + +# help message + +help = """ +Syntax: python Install.py -m machine -e suffix + specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" + machine = suffix of a lib/Makefile.* file + -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix + does not alter existing Makefile.machine +""" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +machine = None +extraflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-m": + if iarg+2 > nargs: error() + machine = args[iarg+1] + iarg += 2 + elif args[iarg] == "-e": + if iarg+2 > nargs: error() + extraflag = 1 + suffix = args[iarg+1] + iarg += 2 + else: error() + +# set lib from working dir + +cwd = os.getcwd() +lib = os.path.basename(cwd) + +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + +if not os.path.exists("Makefile.%s" % machine): + error("lib/%s/Makefile.%s does not exist" % (lib,machine)) + +lines = open("Makefile.%s" % machine,'r').readlines() +fp = open("Makefile.auto",'w') + +for line in lines: + words = line.split() + if len(words) == 3 and extraflag and \ + words[0] == "EXTRAMAKE" and words[1] == '=': + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + print >>fp,line, + +fp.close() + +# make the library via Makefile.auto + +print "Building lib%s.a ..." % lib +cmd = "make -f Makefile.auto clean; make -f Makefile.auto" +txt = commands.getoutput(cmd) +print txt + +if os.path.exists("lib%s.a" % lib): print "Build was successful" +else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) +if not os.path.exists("Makefile.lammps"): + print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/reax/README b/lib/reax/README index 2840a242a5..f21a470618 100644 --- a/lib/reax/README +++ b/lib/reax/README @@ -17,6 +17,11 @@ links against when using the REAX package. This library must be built with a F90 compiler, before LAMMPS is built, so LAMMPS can link against it. +You can type "make lib-reax" from the src directory to see help on how +to build this library via make commands, or you can do the same thing +by typing "python Install.py" from within this directory, or you can +do it manually by following the instructions below. + Build the library using one of the provided Makefile.* files or create your own, specific to your compiler and system. For example: diff --git a/lib/smd/Install.py b/lib/smd/Install.py new file mode 100644 index 0000000000..dc0a3187ce --- /dev/null +++ b/lib/smd/Install.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +# Install.py tool to download, unpack, and point to the Eigen library +# used to automate the steps described in the README file in this dir + +import sys,os,re,glob,commands + +# help message + +help = """ +Syntax: python Install.py -h hpath hdir -g -l + specify one or more options, order does not matter + -h = set home dir of Eigen to be hpath/hdir + hpath can be full path, contain '~' or '.' chars + default hpath = . = lib/smd + default hdir = "ee" = what tarball unpacks to (eigen-eigen-*) + -g = grab (download) tarball from http://eigen.tuxfamily.org website + unpack it to hpath/hdir + hpath must already exist + if hdir already exists, it will be deleted before unpack + -l = create softlink (includelink) in lib/smd to Eigen src dir +""" + +# settings + +url = "http://bitbucket.org/eigen/eigen/get/3.3.3.tar.gz" +tarball = "eigen.tar.gz" + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# expand to full path name +# process leading '~' or relative path + +def fullpath(path): + return os.path.abspath(os.path.expanduser(path)) + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +homepath = "." +homedir = "ee" + +grabflag = 0 +linkflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-h": + if iarg+3 > nargs: error() + homepath = args[iarg+1] + homedir = args[iarg+2] + iarg += 3 + elif args[iarg] == "-g": + grabflag = 1 + iarg += 1 + elif args[iarg] == "-l": + linkflag = 1 + iarg += 1 + else: error() + +homepath = fullpath(homepath) +if not os.path.isdir(homepath): error("Eigen path does not exist") + +# download and unpack Eigen tarball +# glob to find name of dir it unpacks to + +if grabflag: + print "Downloading Eigen ..." + cmd = "curl -L %s > %s/%s" % (url,homepath,tarball) + print cmd + print commands.getoutput(cmd) + + print "Unpacking Eigen tarball ..." + edir = glob.glob("%s/eigen-eigen-*" % homepath) + for one in edir: + if os.path.isdir(one): commands.getoutput("rm -rf %s" % one) + cmd = "cd %s; tar zxvf %s" % (homepath,tarball) + commands.getoutput(cmd) + if homedir != "ee": + if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) + edir = glob.glob("%s/eigen-eigen-*" % homepath) + os.rename(edir[0],"%s/%s" % (homepath,homedir)) + +# create link in lib/smd to Eigen src dir + +if linkflag: + print "Creating link to Eigen files" + if os.path.isfile("includelink") or os.path.islink("includelink"): + os.remove("includelink") + if homedir == "ee": + edir = glob.glob("%s/eigen-eigen-*" % homepath) + linkdir = edir[0] + else: linkdir = "%s/%s" % (homepath,homedir) + cmd = "ln -s %s includelink" % linkdir + commands.getoutput(cmd) diff --git a/lib/smd/README b/lib/smd/README index 846c440dae..1bd5902a1f 100644 --- a/lib/smd/README +++ b/lib/smd/README @@ -4,9 +4,12 @@ to use the USER-SMD package in a LAMMPS input script. The Eigen library is available at http://eigen.tuxfamily.org. It's a general C++ template library for linear algebra. -You must perform the following steps yourself, or you can use the -install.py Python script to automate any or all steps of the process. -Type "python install.py" for instructions. +You can type "make lib-smd" from the src directory to see help on how +to download build this library via make commands, or you can do the +same thing by typing "python Install.py" from within this directory, +or you can do it manually by following the instructions below. + +Instructions: 1. Download the Eigen tarball at http://eigen.tuxfamily.org and unpack the tarball either in this /lib/smd directory or somewhere diff --git a/lib/voronoi/Install.py b/lib/voronoi/Install.py new file mode 100644 index 0000000000..7d847183b3 --- /dev/null +++ b/lib/voronoi/Install.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +# Install.py tool to download, unpack, build, and link to the Voro++ library +# used to automate the steps described in the README file in this dir + +import sys,os,re,urllib,commands + +# help message + +help = """ +Syntax: python Install.py -v version -h hpath hdir -g -b -l + specify one or more options, order does not matter + -v = version of Voro++ to download and build + default version = voro++-0.4.6 (current as of Jan 2015) + -h = set home dir of Voro++ to be hpath/hdir + hpath can be full path, contain '~' or '.' chars + default hpath = . = lib/voronoi + default hdir = voro++-0.4.6 = what tarball unpacks to + -g = grab (download) tarball from math.lbl.gov/voro++ website + unpack it to hpath/hdir + hpath must already exist + if hdir already exists, it will be deleted before unpack + -b = build Voro++ library in its src dir + -l = create 2 softlinks (includelink,liblink) in lib/voronoi to Voro++ src dir +""" + +# settings + +version = "voro++-0.4.6" +url = "http://math.lbl.gov/voro++/download/dir/%s.tar.gz" % version + +# print error message or help + +def error(str=None): + if not str: print help + else: print "ERROR",str + sys.exit() + +# expand to full path name +# process leading '~' or relative path + +def fullpath(path): + return os.path.abspath(os.path.expanduser(path)) + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +homepath = "." +homedir = version + +grabflag = 0 +buildflag = 0 +linkflag = 0 + +iarg = 0 +while iarg < nargs: + if args[iarg] == "-v": + if iarg+2 > nargs: error() + version = args[iarg+1] + iarg += 2 + elif args[iarg] == "-h": + if iarg+3 > nargs: error() + homepath = args[iarg+1] + homedir = args[iarg+2] + iarg += 3 + elif args[iarg] == "-g": + grabflag = 1 + iarg += 1 + elif args[iarg] == "-b": + buildflag = 1 + iarg += 1 + elif args[iarg] == "-l": + linkflag = 1 + iarg += 1 + else: error() + +homepath = fullpath(homepath) +if not os.path.isdir(homepath): error("Voro++ path does not exist") +homedir = "%s/%s" % (homepath,homedir) + +# download and unpack Voro++ tarball + +if grabflag: + print "Downloading Voro++ ..." + urllib.urlretrieve(url,"%s/%s.tar.gz" % (homepath,version)) + + print "Unpacking Voro++ tarball ..." + if os.path.exists("%s/%s" % (homepath,version)): + commands.getoutput("rm -rf %s/%s" % (homepath,version)) + cmd = "cd %s; tar zxvf %s.tar.gz" % (homepath,version) + commands.getoutput(cmd) + if os.path.basename(homedir) != version: + if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) + os.rename("%s/%s" % (homepath,version),homedir) + +# build Voro++ + +if buildflag: + print "Building Voro++ ..." + cmd = "cd %s; make" % homedir + txt = commands.getoutput(cmd) + print txt + +# create 2 links in lib/voronoi to Voro++ src dir + +if linkflag: + print "Creating links to Voro++ include and lib files" + if os.path.isfile("includelink") or os.path.islink("includelink"): + os.remove("includelink") + if os.path.isfile("liblink") or os.path.islink("liblink"): + os.remove("liblink") + cmd = "ln -s %s/src includelink" % homedir + commands.getoutput(cmd) + cmd = "ln -s %s/src liblink" % homedir + commands.getoutput(cmd) diff --git a/lib/voronoi/README b/lib/voronoi/README index 62acb30a5a..9863632be0 100644 --- a/lib/voronoi/README +++ b/lib/voronoi/README @@ -6,11 +6,15 @@ The Voro++ library is available at http://math.lbl.gov/voro++ and was developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley Laboratory. +You can type "make lib-voronoi" from the src directory to see help on +how to download and build this library via make commands, or you can +do the same thing by typing "python Install.py" from within this +directory, or you can do it manually by following the instructions +below. + ----------------- -You must perform the following steps yourself, or you can use the -install.py Python script to automate any or all steps of the process. -Type "python install.py" for instructions. +Instructions: 1. Download Voro++ at http://math.lbl.gov/voro++/download either as a tarball or via SVN, and unpack the diff --git a/lib/voronoi/install.py b/lib/voronoi/install.py deleted file mode 100644 index 645d167564..0000000000 --- a/lib/voronoi/install.py +++ /dev/null @@ -1,163 +0,0 @@ -#!usr/local/python - -# install.py tool to download, unpack, build, and link to the Voro++ library -# used to automate the steps described in the README file in this dir - -import sys,os,re,urllib,commands - -help = """ -Syntax: install.py -d dir -v version -g -b -i installdir -l incdir libdir - specify one or more options, order does not matter - -d = dir to download tarball to, unpack tarball in, perform build in - dir will be created if it doesn't exist (only last level) - default = this dir - -v = version of Voro++ to download and work with - default = voro++-0.4.6 (current as of Jan 2015) - -g = download (grab) tarball from - http://math.lbl.gov/voro++/download/dir/version - -b = build Voro++ by invoking "make" in its home dir - no default - -i = install Voro++ by invoking "make install" in its home dir - installdir arg is optional: - if not specified, installs at PREFIX defined in config.mk file - if specified, will overwrite PREFIX and install there - if PREFIX starts with /usr, will invoke "sudo make install" - -l = create two links to incdir and libdir - incdir and libdir are optional (specify neither or both): - if specified, includelink and liblink are to those two dirs - these are dirs where Voro++ include files and lib file are - if not specified and no install, links are to Voro++ src dir - if not specified and install performed, - links are to include and lib dirs under PREFIX -""" - -def error(): - print help - sys.exit() - -# parse args - -args = sys.argv - -if len(args) == 1: error() - -dir = "." -version = "voro++-0.4.6" -grabflag = 0 -buildflag = 0 -installflag = 0 -linkflag = 0 - -iarg = 1 -while iarg < len(args): - if args[iarg] == "-d": - if iarg+2 > len(args): error() - dir = args[iarg+1] - iarg += 2 - elif args[iarg] == "-v": - if iarg+2 > len(args): error() - version = args[iarg+1] - iarg += 2 - elif args[iarg] == "-g": - grabflag = 1 - iarg += 1 - elif args[iarg] == "-b": - buildflag = 1 - iarg += 1 - elif args[iarg] == "-i": - installflag = 1 - if iarg+1 == len(args) or args[iarg+1][0] == '-': - installdir = "" - iarg += 1 - else: - if iarg+2 > len(args): error() - installdir = args[iarg+1] - iarg += 2 - elif args[iarg] == "-l": - linkflag = 1 - if iarg+1 == len(args) or args[iarg+1][0] == '-' or \ - iarg+2 == len(args) or args[iarg+2][0] == '-': - includedir = libdir = "" - iarg += 1 - else: - if iarg+3 > len(args): error() - includedir = args[iarg+1] - libdir = args[iarg+2] - iarg += 3 - else: error() - -dir = os.path.abspath(dir) -url = "http://math.lbl.gov/voro++/download/dir/%s.tar.gz" % version - -# create dir if does not exist - -if not os.path.isdir(dir): - if os.path.isfile(dir): - print "ERROR: Dir already exists as file" - sys.exit() - os.mkdir(dir) - if not os.path.isdir(dir): - print "ERROR: Unable to create dir" - sys.exit() - -# download and unpack tarball - -if grabflag: - print "Downloading Voro++ tarball ..." - urllib.urlretrieve(url,"%s/%s.tar.gz" % (dir,version)) - print "Unpacking Voro++ tarball ..." - cmd = "cd %s; tar zxvf %s.tar.gz" % (dir,version) - txt = commands.getoutput(cmd) - -# build Voro++ in its dir - -if buildflag: - print "Building Voro++ ..." - cmd = "cd %s/%s; make" % (dir,version) - txt = commands.getoutput(cmd) - print txt - -# install Voro++ -# if installdir set, overwrite PREFIX var in its config.mk file -# if PREFIX var starts with /usr, invoke sudo make install, else make install - -if installflag: - print "Installing Voro++ ..." - if installdir: - txt = open("%s/%s/config.mk" % (dir,version),'r').read() - txt = re.sub("PREFIX=.*?\n","PREFIX=%s\n" % installdir,txt) - open("%s/%s/config.mk" % (dir,version),'w').write(txt) - print "TXT:",txt - txt = open("%s/%s/config.mk" % (dir,version),'r').read() - var = re.findall("PREFIX=.*?\n",txt) - prefix = var[0].split('=')[1].strip() - if prefix.startswith("/usr"): - cmd = "cd %s/%s; sudo make install" % (dir,version) - else: - cmd = "cd %s/%s; make install" % (dir,version) - txt = commands.getoutput(cmd) - print txt - -# create links in this dir to Voro++ include and lib files - -if linkflag: - print "Creating links to Voro++ include and lib files" - if os.path.isfile("includelink") or os.path.islink("includelink"): - os.remove("includelink") - if os.path.isfile("liblink") or os.path.islink("liblink"): - os.remove("liblink") - if includedir: - cmd = "ln -s %s includelink" % includedir - txt = commands.getoutput(cmd) - cmd = "ln -s %s liblink" % linkdir - txt = commands.getoutput(cmd) - elif not installflag: - cmd = "ln -s %s/%s/src includelink" % (dir,version) - txt = commands.getoutput(cmd) - cmd = "ln -s %s/%s/src liblink" % (dir,version) - txt = commands.getoutput(cmd) - else: - cmd = "ln -s %s/include includelink" % prefix - txt = commands.getoutput(cmd) - cmd = "ln -s %s/lib liblink" % prefix - txt = commands.getoutput(cmd) diff --git a/lib/vtk/Makefile.lammps b/lib/vtk/Makefile.lammps index e3b28ed928..b86856a9c6 100644 --- a/lib/vtk/Makefile.lammps +++ b/lib/vtk/Makefile.lammps @@ -1,13 +1,12 @@ # Settings that the LAMMPS build will import when this package library is used -# + # settings for VTK-5.8.0 on RHEL/CentOS 6.x vtk_SYSINC = -I/usr/include/vtk vtk_SYSLIB = -lvtkCommon -lvtkIO vtk_SYSPATH = -L/usr/lib64/vtk -# + # settings for VTK 6.2.0 on Fedora 23 #vtk_SYSINC = -I/usr/include/vtk #vtk_SYSLIB = -lvtkCommonCore -lvtkIOCore -lvtkCommonDataModel -lvtkIOXML -lvtkIOLegacy -lvtkIOParallelXML #vtk_SYSPATH = -L/usr/lib64/vtk -# diff --git a/lib/vtk/README b/lib/vtk/README index 11add94f52..61e2a40c23 100644 --- a/lib/vtk/README +++ b/lib/vtk/README @@ -1,14 +1,15 @@ -The Makefile.lammps file in this directory is used when building LAMMPS with -its USER-VTK package installed. The file has several settings needed to -compile and link LAMMPS with the VTK library. You should choose a -Makefile.lammps.* file compatible with your system and your version of VTK, and -copy it to Makefile.lammps before building LAMMPS itself. You may need to edit -one of the provided files to match your system. +The Makefile.lammps file in this directory is used when building +LAMMPS with its USER-VTK package installed. The file has several +settings needed to compile and link LAMMPS with the VTK library. You +should choose a Makefile.lammps.* file compatible with your system and +your version of VTK, and copy it to Makefile.lammps before building +LAMMPS itself. You may need to edit one of the provided files to +match your system. -If you create a new Makefile.lammps file suitable for some version of VTK on -some system, that is not a match to one of the provided Makefile.lammps.* -files, you can send it to the developers, and we can include it in the -distribution for others to use. +If you create a new Makefile.lammps file suitable for some version of +VTK on some system, that is not a match to one of the provided +Makefile.lammps.* files, you can send it to the developers, and we can +include it in the distribution for others to use. To illustrate, these are example settings from the Makefile.lammps.ubuntu14.04_vtk6 file: @@ -19,10 +20,11 @@ vtk_SYSPATH = vtk_SYSINC refers to the include directory of the installed VTK library -vtk_SYSLIB refers to the libraries needed to link to from an application -(LAMMPS in this case) to "embed" VTK in the application. VTK consists of -multiple shared libraries which are needed when using the USER-VTK package. +vtk_SYSLIB refers to the libraries needed to link to from an +application (LAMMPS in this case) to "embed" VTK in the +application. VTK consists of multiple shared libraries which are +needed when using the USER-VTK package. -vtk_SYSPATH = refers to the path (e.g. -L/usr/local/lib) where the VTK library -can be found. You may not need this setting if the path is already included in -your LD_LIBRARY_PATH environment variable. +vtk_SYSPATH = refers to the path (e.g. -L/usr/local/lib) where the VTK +library can be found. You may not need this setting if the path is +already included in your LD_LIBRARY_PATH environment variable. diff --git a/potentials/SiC.edip b/potentials/SiC.edip new file mode 100644 index 0000000000..a38f30d974 --- /dev/null +++ b/potentials/SiC.edip @@ -0,0 +1,38 @@ +# DATE: 2017-05-16 CONTRIBUTOR: Chao Jiang , Phys. Rev. B 86, 144118 (2012) +# element 1, element 2, element 3, +# A B cutoffA cutoffC alpha beta eta +# gamma lambda mu rho sigma Q0 +# u1 u2 u3 u4 +# +Si Si Si 5.488043 1.446435 2.941586 2.540193 3.066580 0.008593 0.589390 + 1.135256 2.417497 0.629131 1.343679 0.298443 208.924548 + -0.165799 32.557 0.286198 0.66 + +C C C 10.222599 0.959814 2.212263 1.741598 1.962090 0.025661 0.275605 + 1.084183 3.633621 0.594236 2.827634 0.536561 289.305617 + -0.165799 32.557 0.286198 0.66 + +C Si Si 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.432497 + 1.191567 3.025559 0.611684 2.061835 0.423863 249.115082 + -0.165799 32.557000 0.286198 0.660000 + +Si C C 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.432497 + 1.191567 3.025559 0.611684 2.061835 0.423863 249.115082 + -0.165799 32.557000 0.286198 0.660000 + +Si Si C 5.488043 1.446435 2.941586 2.540193 3.066580 0.008593 0.510944 + 1.135256 2.721528 0.620407 1.343679 0.298443 229.019815 + -0.165799 32.557000 0.286198 0.660000 + +Si C Si 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.510944 + 1.191567 2.721528 0.620407 2.061835 0.423863 229.019815 + -0.165799 32.557000 0.286198 0.660000 + +C C Si 10.222599 0.959814 2.212263 1.741598 1.962090 0.025661 0.354051 + 1.084183 3.329590 0.602960 2.827634 0.536561 269.210350 + -0.165799 32.557000 0.286198 0.660000 + +C Si C 7.535967 1.177019 2.534972 1.973974 2.507738 0.015347 0.354051 + 1.191567 3.329590 0.602960 2.061835 0.423863 269.210350 + -0.165799 32.557000 0.286198 0.660000 + diff --git a/potentials/SiC.gw b/potentials/SiC.gw new file mode 100644 index 0000000000..1c14e3a53e --- /dev/null +++ b/potentials/SiC.gw @@ -0,0 +1,19 @@ +# DATE: 2016-05-06 CONTRIBUTOR: German Samolyuk, samolyuk@gmail.com CITATION: ??? +# Gao-Weber parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units: + +# format of a single entry (one or more lines): +# element 1, element 2, element 3, +# m, gamma, lambda3, c, d, h, n, beta, lambda2, X_ij*B, R, D, lambda1, A + +#E1 E2 E3 m gamma lambda3 c d h n beta lambda2 B R D lambda1 A + +Si Si Si 1 0.013318 0 14 2.1 -1 0.78000 1 1.80821400248640 632.658058300867 2.35 0.15 2.38684248328205 1708.79738703139 +Si Si C 1 0.013318 0 14 2.1 -1 0.78000 1 1.80821400248640 632.658058300867 2.35 0.15 2.38684248328205 1708.79738703139 +Si C Si 1 0.013318 0 14 2.1 -1 0.78000 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 +C Si Si 1 0.011304 0 19 2.5 -1 0.80468 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 +C C Si 1 0.011304 0 19 2.5 -1 0.80469 1 1.76776695296637 203.208547714849 2.35 0.15 2.54558441227157 458.510465798439 +C Si C 1 0.011304 0 19 2.5 -1 0.80469 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 +Si C C 1 0.013318 0 14 2.1 -1 0.78000 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 +C C C 1 0.011304 0 19 2.5 -1 0.80469 1 1.76776695296637 203.208547714849 2.35 0.15 2.54558441227157 458.510465798439 diff --git a/potentials/SiC.gw.zbl b/potentials/SiC.gw.zbl new file mode 100644 index 0000000000..8129763b10 --- /dev/null +++ b/potentials/SiC.gw.zbl @@ -0,0 +1,19 @@ +# DATE: 2016-05-06 CONTRIBUTOR: German Samolyuk, samolyuk@gmail.com CITATION: ??? +# Gao-Weber parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units: + +# format of a single entry (one or more lines): +# element 1, element 2, element 3, +# m, gamma, lambda3, c, d, h, n, beta, lambda2, X_ij*B, R, D, lambda1, A + +#E1 E2 E3 m gamma lambda3 c d h n beta lambda2 B R D lambda1 A Z_i, Z_j, ZBLcut, ZBLexpscale + +Si Si Si 1 0.013318 0 14 2.1 -1 0.78000 1 1.80821400248640 632.658058300867 2.35 0.15 2.38684248328205 1708.79738703139 14 14 .95 14 +Si Si C 1 0.013318 0 14 2.1 -1 0.78000 1 1.80821400248640 632.658058300867 2.35 0.15 2.38684248328205 1708.79738703139 14 14 .95 14 +Si C Si 1 0.013318 0 14 2.1 -1 0.78000 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 14 6 .95 14 +C Si Si 1 0.011304 0 19 2.5 -1 0.80468 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 6 14 .95 14 +C C Si 1 0.011304 0 19 2.5 -1 0.80469 1 1.76776695296637 203.208547714849 2.35 0.15 2.54558441227157 458.510465798439 6 6 .95 14 +C Si C 1 0.011304 0 19 2.5 -1 0.80469 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 6 14 .95 14 +Si C C 1 0.013318 0 14 2.1 -1 0.78000 1 1.96859970919818 428.946015420752 2.35 0.15 3.03361215187440 1820.05673775234 14 6 .95 14 +C C C 1 0.011304 0 19 2.5 -1 0.80469 1 1.76776695296637 203.208547714849 2.35 0.15 2.54558441227157 458.510465798439 6 6 .95 14 diff --git a/potentials/TiO.meam.spline b/potentials/TiO.meam.spline new file mode 100644 index 0000000000..ed2a67a962 --- /dev/null +++ b/potentials/TiO.meam.spline @@ -0,0 +1,130 @@ +# Ti-O cubic spline potential where O is in the dilute limit. DATE: 2016-06-05 CONTRIBUTOR: Pinchao Zhang, Dallas R. Trinkle +meam/spline 2 Ti O +spline3eq +13 +-20 0 +1.742692837 3.744277175966 99.4865081627958 +2.05580176725 0.910839730906 10.8702523265355 +2.3689106975 0.388045896634 -1.55322418749562 +2.68201962775 -0.018840906533 2.43630041329215 +2.995128558 -0.248098929639 2.67912713976835 +3.30823748825 -0.264489550297 -0.125056384603077 +3.6213464185 -0.227196189283 1.10662555360438 +3.93445534875 -0.129293090176 -0.592053676745914 +4.247564279 -0.059685366933 -0.470123414607672 +4.56067320925 -0.031100025561 -0.0380739973059663 +4.8737821395 -0.013847363202 -0.0711547960695406 +5.18689106975 -0.003203412728 -0.081768292420175 +5.5 0 -0.0571422964883619 +spline3eq +5 +0.155001355787331 0 +1.9 0.533321679606674 0 +2.8 0.456402081843862 -1.60311717015859 +3.7 -0.324281383502201 1.19940299483249 +4.6 -0.474029826906675 1.47909794595154 +5.5 0 -2.49521499855605 +spline3eq +13 +0 0 +1.742692837 0 0 +2.05580176725 0 0 +2.3689106975 0 0 +2.68201962775 0 0 +2.995128558 0 0 +3.30823748825 0 0 +3.6213464185 0 0 +3.93445534875 0 0 +4.247564279 0 0 +4.56067320925 0 0 +4.8737821395 0 0 +5.18689106975 0 0 +5.5 0 0 +spline3eq +11 +-1 0 +2.055801767 1.7475279661 -525.869786904802 +2.2912215903 -5.8677963945 252.796316927755 +2.5266414136 -8.3376288737 71.7318388721015 +2.7620612369 -5.8398712842 -1.93587742753693 +2.9974810602 -3.1140648231 -39.2999192667503 +3.2329008835 -1.7257245065 14.3424136002004 +3.4683207068 -0.4428977017 -29.4925534559498 +3.7037405301 -0.1466643003 -3.18010534572236 +3.9391603534 -0.2095507945 3.33490838803603 +4.1745801767 -0.1442384563 3.71918691359508 +4.41 0 -9.66717019857564 +spline3eq +5 +-61.9827585211652 0 +1.9 11.2293641315584 0 +2.8 -27.9976343076148 122.648031332411 +3.7 -8.32979773113248 -54.3340881766381 +4.6 -1.00863195297399 3.23150064581724 +5.5 0 -5.3514242228123 +spline3eq +4 +0.00776934946045395 0.105197706160344 +-55.14233165 -0.29745568008 0.00152870603877451 +-44.7409899033333 -0.15449458722 0.00038933722543571 +-34.3396481566667 0.05098657168 0.00038124926922248 +-23.93830641 0.57342694704 0.0156639264890892 +spline3eq +5 +-0.00676745157022662 -0.0159520381982146 +-23.9928 0.297607384684645 0 +-15.9241175 0.216691597077105 -0.0024248755353942 +-7.855435 0.0637598673719069 0.00306245895013358 +0.213247499999998 -0.00183450621970427 -0.00177588407633909 +8.28193 -0.111277018874367 0 +spline3eq +10 +2.77327511656661 0 +2.055801767 -0.1485215264 72.2010867146919 +2.31737934844444 1.6845304918 -47.2744689053404 +2.57895692988889 2.0113365977 -15.1859578405326 +2.84053451133333 1.1444092747 3.33978204841873 +3.10211209277778 0.2861606803 2.587867603808 +3.36368967422222 -0.3459281126 6.14070694084556 +3.62526725566667 -0.6257480601 3.7397696717154 +3.88684483711111 -0.6119510826 4.64749084871402 +4.14842241855556 -0.3112059651 2.83275746415936 +4.41 0 -15.0612086827734 +spline3eq +5 +12.3315547862781 0 +1.9 2.62105440156724 0 +2.8 10.2850803058354 -25.439802988016 +3.7 3.23933763743897 -7.20203673434025 +4.6 -5.79049355858613 39.5509978688682 +5.5 0 -41.221771373642 +spline3eq +8 +8.33642274810572 -60.4024574736564 +-1 0.07651409193 -110.652321293778 +-0.724509054371429 0.14155824541 44.8853405500508 +-0.449018108742857 0.75788697341 -25.3065115342002 +-0.173527163114286 0.63011570378 -2.48510144915082 +0.101963782514286 0.09049597305 2.68769386908235 +0.377454728142857 -0.35741586657 -1.01558570129633 +0.652945673771428 -0.65293217647 13.4224786001212 +0.9284366194 -6.00912190653 -452.752542694929 +spline3eq +5 +0.137191606537625 -1.55094230968985 +-1 0.0513843442016519 0 +-0.5 0.0179024412245673 -2.44986494990154 +0 -0.260650876879273 3.91774583656401 +0.5 -0.190163791764901 -4.84414871911743 +1 -0.763795416646599 0 +spline3eq +8 +0 0 +-1 0 0 +-0.724509054371429 0 0 +-0.449018108742857 0 0 +-0.173527163114286 0 0 +0.101963782514286 0 0 +0.377454728142857 0 0 +0.652945673771428 0 0 +0.9284366194 0 0 diff --git a/python/lammps.py b/python/lammps.py index a36abb87e8..d428a097a8 100644 --- a/python/lammps.py +++ b/python/lammps.py @@ -30,7 +30,7 @@ from collections import namedtuple import os import select import re - +import sys class MPIAbortException(Exception): def __init__(self, message): @@ -151,9 +151,16 @@ class lammps(object): else: # magic to convert ptr to ctypes ptr - pythonapi.PyCObject_AsVoidPtr.restype = c_void_p - pythonapi.PyCObject_AsVoidPtr.argtypes = [py_object] - self.lmp = c_void_p(pythonapi.PyCObject_AsVoidPtr(ptr)) + if sys.version_info >= (3, 0): + # Python 3 (uses PyCapsule API) + pythonapi.PyCapsule_GetPointer.restype = c_void_p + pythonapi.PyCapsule_GetPointer.argtypes = [py_object, c_char_p] + self.lmp = c_void_p(pythonapi.PyCapsule_GetPointer(ptr, None)) + else: + # Python 2 (uses PyCObject API) + pythonapi.PyCObject_AsVoidPtr.restype = c_void_p + pythonapi.PyCObject_AsVoidPtr.argtypes = [py_object] + self.lmp = c_void_p(pythonapi.PyCObject_AsVoidPtr(ptr)) def __del__(self): if self.lmp and self.opened: @@ -305,7 +312,7 @@ class lammps(object): def set_variable(self,name,value): if name: name = name.encode() if value: value = str(value).encode() - return self.lib.lammps_set_variable(self.lmp,name,str(value)) + return self.lib.lammps_set_variable(self.lmp,name,value) # return current value of thermo keyword diff --git a/src/.gitignore b/src/.gitignore index 97bc2276b0..0cddfa6951 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -47,8 +47,6 @@ /dump_molfile.h /molfile_interface.cpp /molfile_interface.h -/molfile_plugin.h -/vmdplugin.h /type_detector.h /intel_buffers.cpp @@ -76,8 +74,8 @@ /pair_awpmd_cut.cpp /pair_awpmd_cut.h -/dihedral_charmmfsh.cpp -/dihedral_charmmfsh.h +/dihedral_charmmfsw.cpp +/dihedral_charmmfsw.h /pair_lj_charmmfsw_coul_charmmfsh.cpp /pair_lj_charmmfsw_coul_charmmfsh.h /pair_lj_charmmfsw_coul_long.cpp @@ -163,6 +161,8 @@ /bond_nonlinear.h /bond_oxdna_fene.cpp /bond_oxdna_fene.h +/bond_oxdna2_fene.cpp +/bond_oxdna2_fene.h /bond_quartic.cpp /bond_quartic.h /bond_table.cpp @@ -281,14 +281,14 @@ /dump_custom_gz.h /dump_custom_mpiio.cpp /dump_custom_mpiio.h -/dump_custom_vtk.cpp -/dump_custom_vtk.h /dump_h5md.cpp /dump_h5md.h -/dump_nc.cpp -/dump_nc.h -/dump_nc_mpiio.cpp -/dump_nc_mpiio.h +/dump_netcdf.cpp +/dump_netcdf.h +/dump_netcdf_mpiio.cpp +/dump_netcdf_mpiio.h +/dump_vtk.cpp +/dump_vtk.h /dump_xtc.cpp /dump_xtc.h /dump_xyz_mpiio.cpp @@ -461,8 +461,8 @@ /fix_qmmm.h /fix_reax_bonds.cpp /fix_reax_bonds.h -/fix_reax_c.cpp -/fix_reax_c.h +/fix_reaxc.cpp +/fix_reaxc.h /fix_reaxc_bonds.cpp /fix_reaxc_bonds.h /fix_reaxc_species.cpp @@ -637,6 +637,8 @@ /pair_eam_fs_opt.h /pair_edip.cpp /pair_edip.h +/pair_edip_multi.cpp +/pair_edip_multi.h /pair_eff_cut.cpp /pair_eff_cut.h /pair_eff_inline.h @@ -770,6 +772,8 @@ /pair_nm_cut_coul_long.h /pair_oxdna_*.cpp /pair_oxdna_*.h +/pair_oxdna2_*.cpp +/pair_oxdna2_*.h /mf_oxdna.h /pair_peri_eps.cpp /pair_peri_eps.h @@ -782,8 +786,8 @@ /pair_reax.cpp /pair_reax.h /pair_reax_fortran.h -/pair_reax_c.cpp -/pair_reax_c.h +/pair_reaxc.cpp +/pair_reaxc.h /pair_rebo.cpp /pair_rebo.h /pair_resquared.cpp @@ -846,8 +850,13 @@ /pppm_tip4p_cg.h /prd.cpp /prd.h -/python.cpp -/python.h +/python_impl.cpp +/python_impl.h +/python_compat.h +/fix_python.cpp +/fix_python.h +/pair_python.cpp +/pair_python.h /reader_molfile.cpp /reader_molfile.h /reaxc_allocate.cpp diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp index 029b76cb27..b6d37db6ce 100644 --- a/src/ASPHERE/compute_temp_asphere.cpp +++ b/src/ASPHERE/compute_temp_asphere.cpp @@ -73,6 +73,11 @@ ComputeTempAsphere::ComputeTempAsphere(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute temp/asphere command"); } + // when computing only the rotational temperature, + // do not remove DOFs for translation as set by default + + if (mode == ROTATE) extra_dof = 0; + vector = new double[6]; } @@ -391,6 +396,15 @@ void ComputeTempAsphere::remove_bias(int i, double *v) if (tbias) tbias->remove_bias(i,v); } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempAsphere::remove_bias_thr(int i, double *v, double *b) +{ + if (tbias) tbias->remove_bias_thr(i,v,b); +} + /* ---------------------------------------------------------------------- add back in velocity bias to atom I removed by remove_bias() assume remove_bias() was previously called @@ -400,3 +414,13 @@ void ComputeTempAsphere::restore_bias(int i, double *v) { if (tbias) tbias->restore_bias(i,v); } + +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempAsphere::restore_bias_thr(int i, double *v, double *b) +{ + if (tbias) tbias->restore_bias_thr(i,v,b); +} diff --git a/src/ASPHERE/compute_temp_asphere.h b/src/ASPHERE/compute_temp_asphere.h index d1cce38025..5ecbf8057a 100644 --- a/src/ASPHERE/compute_temp_asphere.h +++ b/src/ASPHERE/compute_temp_asphere.h @@ -35,6 +35,8 @@ class ComputeTempAsphere : public Compute { void remove_bias(int, double *); void restore_bias(int, double *); + void remove_bias_thr(int, double *, double *); + void restore_bias_thr(int, double *, double *); private: int mode; diff --git a/src/ASPHERE/pair_gayberne.cpp b/src/ASPHERE/pair_gayberne.cpp index bdff7a5cd6..25bdae14f1 100644 --- a/src/ASPHERE/pair_gayberne.cpp +++ b/src/ASPHERE/pair_gayberne.cpp @@ -281,7 +281,7 @@ void PairGayBerne::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/ASPHERE/pair_line_lj.cpp b/src/ASPHERE/pair_line_lj.cpp index 4e3df473a3..fc92ed4dc1 100644 --- a/src/ASPHERE/pair_line_lj.cpp +++ b/src/ASPHERE/pair_line_lj.cpp @@ -355,7 +355,7 @@ void PairLineLJ::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/ASPHERE/pair_resquared.cpp b/src/ASPHERE/pair_resquared.cpp index 172516aa49..ed9d9b36c4 100644 --- a/src/ASPHERE/pair_resquared.cpp +++ b/src/ASPHERE/pair_resquared.cpp @@ -253,7 +253,7 @@ void PairRESquared::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/ASPHERE/pair_tri_lj.cpp b/src/ASPHERE/pair_tri_lj.cpp index 773ad2d6a3..4f30b40e9a 100644 --- a/src/ASPHERE/pair_tri_lj.cpp +++ b/src/ASPHERE/pair_tri_lj.cpp @@ -426,7 +426,7 @@ void PairTriLJ::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/BODY/pair_body.cpp b/src/BODY/pair_body.cpp index 2a9edb37cc..b1be997310 100644 --- a/src/BODY/pair_body.cpp +++ b/src/BODY/pair_body.cpp @@ -372,7 +372,7 @@ void PairBody::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/CLASS2/pair_lj_class2.cpp b/src/CLASS2/pair_lj_class2.cpp index ee61aaae1f..e79dc0c6de 100644 --- a/src/CLASS2/pair_lj_class2.cpp +++ b/src/CLASS2/pair_lj_class2.cpp @@ -174,7 +174,7 @@ void PairLJClass2::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/CLASS2/pair_lj_class2_coul_cut.cpp b/src/CLASS2/pair_lj_class2_coul_cut.cpp index 45f0ccfe27..bec7f1da15 100644 --- a/src/CLASS2/pair_lj_class2_coul_cut.cpp +++ b/src/CLASS2/pair_lj_class2_coul_cut.cpp @@ -202,7 +202,7 @@ void PairLJClass2CoulCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/CLASS2/pair_lj_class2_coul_long.cpp b/src/CLASS2/pair_lj_class2_coul_long.cpp index b58094713f..5f7d738e92 100644 --- a/src/CLASS2/pair_lj_class2_coul_long.cpp +++ b/src/CLASS2/pair_lj_class2_coul_long.cpp @@ -240,7 +240,7 @@ void PairLJClass2CoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/COLLOID/pair_brownian.cpp b/src/COLLOID/pair_brownian.cpp index 84fda485a0..2bf01303b4 100644 --- a/src/COLLOID/pair_brownian.cpp +++ b/src/COLLOID/pair_brownian.cpp @@ -403,7 +403,7 @@ void PairBrownian::settings(int narg, char **arg) if (allocated) { for (int i = 1; i <= atom->ntypes; i++) - for (int j = i+1; j <= atom->ntypes; j++) + for (int j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/COLLOID/pair_colloid.cpp b/src/COLLOID/pair_colloid.cpp index 440d6f9d4f..68150f6eff 100644 --- a/src/COLLOID/pair_colloid.cpp +++ b/src/COLLOID/pair_colloid.cpp @@ -256,7 +256,7 @@ void PairColloid::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/COLLOID/pair_lubricate.cpp b/src/COLLOID/pair_lubricate.cpp index 71e08f3f19..93cb48a15d 100644 --- a/src/COLLOID/pair_lubricate.cpp +++ b/src/COLLOID/pair_lubricate.cpp @@ -489,7 +489,7 @@ void PairLubricate::settings(int narg, char **arg) if (allocated) { for (int i = 1; i <= atom->ntypes; i++) - for (int j = i+1; j <= atom->ntypes; j++) + for (int j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/COLLOID/pair_lubricateU.cpp b/src/COLLOID/pair_lubricateU.cpp index a50473a194..5d0a4243a7 100644 --- a/src/COLLOID/pair_lubricateU.cpp +++ b/src/COLLOID/pair_lubricateU.cpp @@ -1707,7 +1707,7 @@ void PairLubricateU::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/COLLOID/pair_lubricateU_poly.cpp b/src/COLLOID/pair_lubricateU_poly.cpp index 29e192cd94..428aa41cb6 100644 --- a/src/COLLOID/pair_lubricateU_poly.cpp +++ b/src/COLLOID/pair_lubricateU_poly.cpp @@ -1104,7 +1104,7 @@ void PairLubricateUPoly::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/COMPRESS/Install.sh b/src/COMPRESS/Install.sh index ef1c8920c8..ab9dac33dc 100644 --- a/src/COMPRESS/Install.sh +++ b/src/COMPRESS/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp index c57eb09e52..addd02e505 100644 --- a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp +++ b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp @@ -307,7 +307,7 @@ void PairLJCutDipoleCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/DIPOLE/pair_lj_cut_dipole_long.cpp b/src/DIPOLE/pair_lj_cut_dipole_long.cpp index ae85b55ff7..78922e356f 100644 --- a/src/DIPOLE/pair_lj_cut_dipole_long.cpp +++ b/src/DIPOLE/pair_lj_cut_dipole_long.cpp @@ -140,174 +140,174 @@ void PairLJCutDipoleLong::compute(int eflag, int vflag) jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - rinv = sqrt(r2inv); + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); - if (rsq < cut_coulsq) { - r = sqrt(rsq); - grij = g_ewald * r; - expm2 = exp(-grij*grij); - t = 1.0 / (1.0 + EWALD_P*grij); - erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + if (rsq < cut_coulsq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; - pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; - pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; - g0 = qtmp*q[j]; - g1 = qtmp*pjdotr - q[j]*pidotr + pdotp; - g2 = -pidotr*pjdotr; + g0 = qtmp*q[j]; + g1 = qtmp*pjdotr - q[j]*pidotr + pdotp; + g2 = -pidotr*pjdotr; - if (factor_coul > 0.0) { - b0 = erfc * rinv; - b1 = (b0 + pre1*expm2) * r2inv; - b2 = (3.0*b1 + pre2*expm2) * r2inv; - b3 = (5.0*b2 + pre3*expm2) * r2inv; + if (factor_coul > 0.0) { + b0 = erfc * rinv; + b1 = (b0 + pre1*expm2) * r2inv; + b2 = (3.0*b1 + pre2*expm2) * r2inv; + b3 = (5.0*b2 + pre3*expm2) * r2inv; - g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3; - fdx = delx * g0b1_g1b2_g2b3 - - b1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + - b2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]); - fdy = dely * g0b1_g1b2_g2b3 - - b1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + - b2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]); - fdz = delz * g0b1_g1b2_g2b3 - - b1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) + - b2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]); + g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3; + fdx = delx * g0b1_g1b2_g2b3 - + b1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + + b2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]); + fdy = dely * g0b1_g1b2_g2b3 - + b1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + + b2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]); + fdz = delz * g0b1_g1b2_g2b3 - + b1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) + + b2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]); - zdix = delx * (q[j]*b1 + b2*pjdotr) - b1*mu[j][0]; - zdiy = dely * (q[j]*b1 + b2*pjdotr) - b1*mu[j][1]; - zdiz = delz * (q[j]*b1 + b2*pjdotr) - b1*mu[j][2]; - zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mu[i][0]; - zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mu[i][1]; - zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mu[i][2]; + zdix = delx * (q[j]*b1 + b2*pjdotr) - b1*mu[j][0]; + zdiy = dely * (q[j]*b1 + b2*pjdotr) - b1*mu[j][1]; + zdiz = delz * (q[j]*b1 + b2*pjdotr) - b1*mu[j][2]; + zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mu[i][0]; + zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mu[i][1]; + zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mu[i][2]; - if (factor_coul < 1.0) { - fdx *= factor_coul; - fdy *= factor_coul; - fdz *= factor_coul; - zdix *= factor_coul; - zdiy *= factor_coul; - zdiz *= factor_coul; - zdjx *= factor_coul; - zdjy *= factor_coul; - zdjz *= factor_coul; - } - } else { - fdx = fdy = fdz = 0.0; - zdix = zdiy = zdiz = 0.0; - zdjx = zdjy = zdjz = 0.0; - } + if (factor_coul < 1.0) { + fdx *= factor_coul; + fdy *= factor_coul; + fdz *= factor_coul; + zdix *= factor_coul; + zdiy *= factor_coul; + zdiz *= factor_coul; + zdjx *= factor_coul; + zdjy *= factor_coul; + zdjz *= factor_coul; + } + } else { + fdx = fdy = fdz = 0.0; + zdix = zdiy = zdiz = 0.0; + zdjx = zdjy = zdjz = 0.0; + } - if (factor_coul < 1.0) { - d0 = (erfc - 1.0) * rinv; - d1 = (d0 + pre1*expm2) * r2inv; - d2 = (3.0*d1 + pre2*expm2) * r2inv; - d3 = (5.0*d2 + pre3*expm2) * r2inv; + if (factor_coul < 1.0) { + d0 = (erfc - 1.0) * rinv; + d1 = (d0 + pre1*expm2) * r2inv; + d2 = (3.0*d1 + pre2*expm2) * r2inv; + d3 = (5.0*d2 + pre3*expm2) * r2inv; - g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3; - fax = delx * g0d1_g1d2_g2d3 - - d1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + - d2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]); - fay = dely * g0d1_g1d2_g2d3 - - d1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + - d2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]); - faz = delz * g0d1_g1d2_g2d3 - - d1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) + - d2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]); + g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3; + fax = delx * g0d1_g1d2_g2d3 - + d1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + + d2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]); + fay = dely * g0d1_g1d2_g2d3 - + d1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + + d2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]); + faz = delz * g0d1_g1d2_g2d3 - + d1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) + + d2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]); - zaix = delx * (q[j]*d1 + d2*pjdotr) - d1*mu[j][0]; - zaiy = dely * (q[j]*d1 + d2*pjdotr) - d1*mu[j][1]; - zaiz = delz * (q[j]*d1 + d2*pjdotr) - d1*mu[j][2]; - zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mu[i][0]; - zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mu[i][1]; - zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mu[i][2]; + zaix = delx * (q[j]*d1 + d2*pjdotr) - d1*mu[j][0]; + zaiy = dely * (q[j]*d1 + d2*pjdotr) - d1*mu[j][1]; + zaiz = delz * (q[j]*d1 + d2*pjdotr) - d1*mu[j][2]; + zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mu[i][0]; + zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mu[i][1]; + zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mu[i][2]; - if (factor_coul > 0.0) { - facm1 = 1.0 - factor_coul; - fax *= facm1; - fay *= facm1; - faz *= facm1; - zaix *= facm1; - zaiy *= facm1; - zaiz *= facm1; - zajx *= facm1; - zajy *= facm1; - zajz *= facm1; - } - } else { - fax = fay = faz = 0.0; - zaix = zaiy = zaiz = 0.0; - zajx = zajy = zajz = 0.0; - } + if (factor_coul > 0.0) { + facm1 = 1.0 - factor_coul; + fax *= facm1; + fay *= facm1; + faz *= facm1; + zaix *= facm1; + zaiy *= facm1; + zaiz *= facm1; + zajx *= facm1; + zajy *= facm1; + zajz *= facm1; + } + } else { + fax = fay = faz = 0.0; + zaix = zaiy = zaiz = 0.0; + zajx = zajy = zajz = 0.0; + } - forcecoulx = fdx + fax; - forcecouly = fdy + fay; - forcecoulz = fdz + faz; + forcecoulx = fdx + fax; + forcecouly = fdy + fay; + forcecoulz = fdz + faz; - tixcoul = mu[i][1]*(zdiz + zaiz) - mu[i][2]*(zdiy + zaiy); - tiycoul = mu[i][2]*(zdix + zaix) - mu[i][0]*(zdiz + zaiz); - tizcoul = mu[i][0]*(zdiy + zaiy) - mu[i][1]*(zdix + zaix); - tjxcoul = mu[j][1]*(zdjz + zajz) - mu[j][2]*(zdjy + zajy); - tjycoul = mu[j][2]*(zdjx + zajx) - mu[j][0]*(zdjz + zajz); - tjzcoul = mu[j][0]*(zdjy + zajy) - mu[j][1]*(zdjx + zajx); + tixcoul = mu[i][1]*(zdiz + zaiz) - mu[i][2]*(zdiy + zaiy); + tiycoul = mu[i][2]*(zdix + zaix) - mu[i][0]*(zdiz + zaiz); + tizcoul = mu[i][0]*(zdiy + zaiy) - mu[i][1]*(zdix + zaix); + tjxcoul = mu[j][1]*(zdjz + zajz) - mu[j][2]*(zdjy + zajy); + tjycoul = mu[j][2]*(zdjx + zajx) - mu[j][0]*(zdjz + zajz); + tjzcoul = mu[j][0]*(zdjy + zajy) - mu[j][1]*(zdjx + zajx); - } else { - forcecoulx = forcecouly = forcecoulz = 0.0; - tixcoul = tiycoul = tizcoul = 0.0; - tjxcoul = tjycoul = tjzcoul = 0.0; - } - - // LJ interaction - - if (rsq < cut_ljsq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fforce = factor_lj * forcelj*r2inv; - } else fforce = 0.0; - - // total force - - fx = qqrd2e*forcecoulx + delx*fforce; - fy = qqrd2e*forcecouly + dely*fforce; - fz = qqrd2e*forcecoulz + delz*fforce; - - // force & torque accumulation - - f[i][0] += fx; - f[i][1] += fy; - f[i][2] += fz; - torque[i][0] += qqrd2e*tixcoul; - torque[i][1] += qqrd2e*tiycoul; - torque[i][2] += qqrd2e*tizcoul; - - if (newton_pair || j < nlocal) { - f[j][0] -= fx; - f[j][1] -= fy; - f[j][2] -= fz; - torque[j][0] += qqrd2e*tjxcoul; - torque[j][1] += qqrd2e*tjycoul; - torque[j][2] += qqrd2e*tjzcoul; - } - - if (eflag) { - if (rsq < cut_coulsq && factor_coul > 0.0) { - ecoul = qqrd2e*(b0*g0 + b1*g1 + b2*g2); - if (factor_coul < 1.0) { - ecoul *= factor_coul; - ecoul += (1-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2); + } else { + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; } - } else ecoul = 0.0; - if (rsq < cut_ljsq[itype][jtype]) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } else evdwl = 0.0; - } + // LJ interaction - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fx,fy,fz,delx,dely,delz); + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fforce = factor_lj * forcelj*r2inv; + } else fforce = 0.0; + + // total force + + fx = qqrd2e*forcecoulx + delx*fforce; + fy = qqrd2e*forcecouly + dely*fforce; + fz = qqrd2e*forcecoulz + delz*fforce; + + // force & torque accumulation + + f[i][0] += fx; + f[i][1] += fy; + f[i][2] += fz; + torque[i][0] += qqrd2e*tixcoul; + torque[i][1] += qqrd2e*tiycoul; + torque[i][2] += qqrd2e*tizcoul; + + if (newton_pair || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] += qqrd2e*tjxcoul; + torque[j][1] += qqrd2e*tjycoul; + torque[j][2] += qqrd2e*tjzcoul; + } + + if (eflag) { + if (rsq < cut_coulsq && factor_coul > 0.0) { + ecoul = qqrd2e*(b0*g0 + b1*g1 + b2*g2); + if (factor_coul < 1.0) { + ecoul *= factor_coul; + ecoul += (1-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2); + } + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fx,fy,fz,delx,dely,delz); } } } @@ -360,8 +360,8 @@ void PairLJCutDipoleLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } @@ -407,7 +407,7 @@ double PairLJCutDipoleLong::init_one(int i, int j) { if (setflag[i][j] == 0) { epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j], - sigma[i][i],sigma[j][j]); + sigma[i][i],sigma[j][j]); sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]); cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]); } @@ -472,9 +472,9 @@ void PairLJCutDipoleLong::write_restart(FILE *fp) for (j = i; j <= atom->ntypes; j++) { fwrite(&setflag[i][j],sizeof(int),1,fp); if (setflag[i][j]) { - fwrite(&epsilon[i][j],sizeof(double),1,fp); - fwrite(&sigma[i][j],sizeof(double),1,fp); - fwrite(&cut_lj[i][j],sizeof(double),1,fp); + fwrite(&epsilon[i][j],sizeof(double),1,fp); + fwrite(&sigma[i][j],sizeof(double),1,fp); + fwrite(&cut_lj[i][j],sizeof(double),1,fp); } } } @@ -496,14 +496,14 @@ void PairLJCutDipoleLong::read_restart(FILE *fp) if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); if (setflag[i][j]) { - if (me == 0) { - fread(&epsilon[i][j],sizeof(double),1,fp); - fread(&sigma[i][j],sizeof(double),1,fp); - fread(&cut_lj[i][j],sizeof(double),1,fp); - } - MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world); + if (me == 0) { + fread(&epsilon[i][j],sizeof(double),1,fp); + fread(&sigma[i][j],sizeof(double),1,fp); + fread(&cut_lj[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world); } } } diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp index ef865b66cd..15ac2e788c 100644 --- a/src/DIPOLE/pair_lj_long_dipole_long.cpp +++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp @@ -102,8 +102,8 @@ void PairLJLongDipoleLong::settings(int narg, char **arg) if (allocated) { // reset explicit cuts int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } @@ -343,9 +343,9 @@ void PairLJLongDipoleLong::write_restart(FILE *fp) for (j = i; j <= atom->ntypes; j++) { fwrite(&setflag[i][j],sizeof(int),1,fp); if (setflag[i][j]) { - fwrite(&epsilon_read[i][j],sizeof(double),1,fp); - fwrite(&sigma_read[i][j],sizeof(double),1,fp); - fwrite(&cut_lj_read[i][j],sizeof(double),1,fp); + fwrite(&epsilon_read[i][j],sizeof(double),1,fp); + fwrite(&sigma_read[i][j],sizeof(double),1,fp); + fwrite(&cut_lj_read[i][j],sizeof(double),1,fp); } } } @@ -367,14 +367,14 @@ void PairLJLongDipoleLong::read_restart(FILE *fp) if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); if (setflag[i][j]) { - if (me == 0) { - fread(&epsilon_read[i][j],sizeof(double),1,fp); - fread(&sigma_read[i][j],sizeof(double),1,fp); - fread(&cut_lj_read[i][j],sizeof(double),1,fp); - } - MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world); + if (me == 0) { + fread(&epsilon_read[i][j],sizeof(double),1,fp); + fread(&sigma_read[i][j],sizeof(double),1,fp); + fread(&cut_lj_read[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world); } } } diff --git a/src/Depend.sh b/src/Depend.sh index 951361d2d1..7eda5e1fd5 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -109,7 +109,7 @@ if (test $1 = "RIGID") then depend USER-OMP fi -if (test $1 = "USER-CG-CMM") then +if (test $1 = "USER-CGSDK") then depend GPU depend KOKKOS depend USER-OMP diff --git a/src/GPU/pair_lj_sdk_coul_long_gpu.cpp b/src/GPU/pair_lj_sdk_coul_long_gpu.cpp index 0b8d0f3b31..77c0dc0660 100644 --- a/src/GPU/pair_lj_sdk_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_sdk_coul_long_gpu.cpp @@ -48,7 +48,7 @@ using namespace LAMMPS_NS; // External functions from cuda library for atom decomposition -int cmml_gpu_init(const int ntypes, double **cutsq, int **lj_type, +int sdkl_gpu_init(const int ntypes, double **cutsq, int **lj_type, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, @@ -56,8 +56,8 @@ int cmml_gpu_init(const int ntypes, double **cutsq, int **lj_type, FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul, const double qqrd2e, const double g_ewald); -void cmml_gpu_clear(); -int ** cmml_gpu_compute_n(const int ago, const int inum, const int nall, +void sdkl_gpu_clear(); +int ** sdkl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, @@ -65,13 +65,13 @@ int ** cmml_gpu_compute_n(const int ago, const int inum, const int nall, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, double *prd); -void cmml_gpu_compute(const int ago, const int inum, const int nall, +void sdkl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success, double *host_q, const int nlocal, double *boxlo, double *prd); -double cmml_gpu_bytes(); +double sdkl_gpu_bytes(); #include "lj_sdk_common.h" @@ -95,7 +95,7 @@ PairLJSDKCoulLongGPU::PairLJSDKCoulLongGPU(LAMMPS *lmp) : PairLJSDKCoulLongGPU::~PairLJSDKCoulLongGPU() { - cmml_gpu_clear(); + sdkl_gpu_clear(); } /* ---------------------------------------------------------------------- */ @@ -112,7 +112,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode != GPU_FORCE) { inum = atom->nlocal; - firstneigh = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + firstneigh = sdkl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, domain->sublo, domain->subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, @@ -124,7 +124,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - cmml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + sdkl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q, atom->nlocal, domain->boxlo, domain->prd); @@ -185,7 +185,7 @@ void PairLJSDKCoulLongGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - int success = cmml_gpu_init(atom->ntypes+1, cutsq, lj_type, lj1, lj2, lj3, + int success = sdkl_gpu_init(atom->ntypes+1, cutsq, lj_type, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, cell_size, gpu_mode, screen, cut_ljsq, @@ -205,7 +205,7 @@ void PairLJSDKCoulLongGPU::init_style() double PairLJSDKCoulLongGPU::memory_usage() { double bytes = Pair::memory_usage(); - return bytes + cmml_gpu_bytes(); + return bytes + sdkl_gpu_bytes(); } /* ---------------------------------------------------------------------- */ diff --git a/src/GPU/pair_lj_sdk_coul_long_gpu.h b/src/GPU/pair_lj_sdk_coul_long_gpu.h index 61de272979..3248e94977 100644 --- a/src/GPU/pair_lj_sdk_coul_long_gpu.h +++ b/src/GPU/pair_lj_sdk_coul_long_gpu.h @@ -14,7 +14,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/coul/long/gpu,PairLJSDKCoulLongGPU) -PairStyle(cg/cmm/coul/long/gpu,PairLJSDKCoulLongGPU) #else diff --git a/src/GPU/pair_lj_sdk_gpu.cpp b/src/GPU/pair_lj_sdk_gpu.cpp index e7e9b690f3..67103181d5 100644 --- a/src/GPU/pair_lj_sdk_gpu.cpp +++ b/src/GPU/pair_lj_sdk_gpu.cpp @@ -39,26 +39,26 @@ using namespace LAMMPS_NS; // External functions from cuda library for atom decomposition -int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, +int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen); -void cmm_gpu_clear(); -int ** cmm_gpu_compute_n(const int ago, const int inum, const int nall, +void sdk_gpu_clear(); +int ** sdk_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success); -void cmm_gpu_compute(const int ago, const int inum, const int nall, +void sdk_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success); -double cmm_gpu_bytes(); +double sdk_gpu_bytes(); #include "lj_sdk_common.h" @@ -80,7 +80,7 @@ PairLJSDKGPU::PairLJSDKGPU(LAMMPS *lmp) : PairLJSDK(lmp), gpu_mode(GPU_FORCE) PairLJSDKGPU::~PairLJSDKGPU() { - cmm_gpu_clear(); + sdk_gpu_clear(); } /* ---------------------------------------------------------------------- */ @@ -97,7 +97,7 @@ void PairLJSDKGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode != GPU_FORCE) { inum = atom->nlocal; - firstneigh = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + firstneigh = sdk_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, domain->sublo, domain->subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, @@ -108,7 +108,7 @@ void PairLJSDKGPU::compute(int eflag, int vflag) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - cmm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + sdk_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success); } @@ -154,7 +154,7 @@ void PairLJSDKGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - int success = cmm_gpu_init(atom->ntypes+1,cutsq,lj_type,lj1,lj2,lj3,lj4, + int success = sdk_gpu_init(atom->ntypes+1,cutsq,lj_type,lj1,lj2,lj3,lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, cell_size, gpu_mode, screen); @@ -172,7 +172,7 @@ void PairLJSDKGPU::init_style() double PairLJSDKGPU::memory_usage() { double bytes = Pair::memory_usage(); - return bytes + cmm_gpu_bytes(); + return bytes + sdk_gpu_bytes(); } /* ---------------------------------------------------------------------- */ diff --git a/src/GPU/pair_lj_sdk_gpu.h b/src/GPU/pair_lj_sdk_gpu.h index 610fb8b0e4..3865b34046 100644 --- a/src/GPU/pair_lj_sdk_gpu.h +++ b/src/GPU/pair_lj_sdk_gpu.h @@ -14,7 +14,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/gpu,PairLJSDKGPU) -PairStyle(cg/cmm/gpu,PairLJSDKGPU) #else diff --git a/src/Install.sh b/src/Install.sh index 307188a09f..e9c8b80595 100644 --- a/src/Install.sh +++ b/src/Install.sh @@ -33,5 +33,5 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done diff --git a/src/KIM/Install.sh b/src/KIM/Install.sh index bac9d97cc6..7ddb9c8227 100644 --- a/src/KIM/Install.sh +++ b/src/KIM/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 9c11e9321b..df5fc3e5f1 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -213,8 +213,8 @@ action pair_morse_kokkos.cpp action pair_morse_kokkos.h action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h -action pair_reax_c_kokkos.cpp pair_reax_c.cpp -action pair_reax_c_kokkos.h pair_reax_c.h +action pair_reaxc_kokkos.cpp pair_reaxc.cpp +action pair_reaxc_kokkos.h pair_reaxc.h action pair_sw_kokkos.cpp pair_sw.cpp action pair_sw_kokkos.h pair_sw.h action pair_vashishta_kokkos.cpp pair_vashishta.cpp diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 48fc3a352c..34b868aadc 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -80,22 +80,22 @@ void AtomVecAngleKokkos::grow(int n) memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, - "atom:special"); + "atom:special"); memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, - "atom:bond_type"); + "atom:bond_type"); memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, - "atom:bond_atom"); + "atom:bond_atom"); memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, - "atom:angle_type"); + "atom:angle_type"); memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, - "atom:angle_atom1"); + "atom:angle_atom1"); memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, - "atom:angle_atom2"); + "atom:angle_atom2"); memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, - "atom:angle_atom3"); + "atom:angle_atom3"); grow_reset(); sync(Host,ALL_MASK); @@ -241,7 +241,7 @@ struct AtomVecAngleKokkos_PackComm { _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz) { const size_t maxsend = (buf.view().dimension_0() - *buf.view().dimension_1())/3; + *buf.view().dimension_1())/3; const size_t elements = 3; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -272,11 +272,11 @@ struct AtomVecAngleKokkos_PackComm { /* ---------------------------------------------------------------------- */ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) { // Check whether to always run forward communication on the host // Choose correct forward PackComm kernel @@ -339,7 +339,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, LMPDeviceType::fence(); } - return n*size_forward; + return n*size_forward; } /* ---------------------------------------------------------------------- */ @@ -714,18 +714,18 @@ struct AtomVecAngleKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } } }; @@ -957,10 +957,10 @@ struct AtomVecAngleKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _molecule(i+_first) = static_cast (_buf(i,6)); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; } }; @@ -1165,28 +1165,28 @@ struct AtomVecAngleKokkos_PackExchangeFunctor { _buf(mysend,m++) = _v(i,0); _buf(mysend,m++) = _v(i,1); _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = _tag(i); - _buf(mysend,m++) = _type(i); - _buf(mysend,m++) = _mask(i); - _buf(mysend,m++) = _image(i); - _buf(mysend,m++) = _molecule(i); - _buf(mysend,m++) = _num_bond(i); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = _bond_type(i,k); - _buf(mysend,m++) = _bond_atom(i,k); + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; } - _buf(mysend,m++) = _num_angle(i); + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = _angle_type(i,k); - _buf(mysend,m++) = _angle_atom1(i,k); - _buf(mysend,m++) = _angle_atom2(i,k); - _buf(mysend,m++) = _angle_atom3(i,k); + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; } - _buf(mysend,m++) = _nspecial(i,0); - _buf(mysend,m++) = _nspecial(i,1); - _buf(mysend,m++) = _nspecial(i,2); + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = _special(i,k); + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; const int j = _copylist(mysend); @@ -1350,7 +1350,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { _lo(lo),_hi(hi){ elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; const int maxsendlist = (buf.template view().dimension_0()* - buf.template view().dimension_1())/elements; + buf.template view().dimension_1())/elements; buffer_view(_buf,buf,maxsendlist,elements); } @@ -1366,30 +1366,30 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,m++); _v(i,1) = _buf(myrecv,m++); _v(i,2) = _buf(myrecv,m++); - _tag(i) = _buf(myrecv,m++); - _type(i) = _buf(myrecv,m++); - _mask(i) = _buf(myrecv,m++); - _image(i) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _molecule(i) = _buf(myrecv,m++); - _num_bond(i) = _buf(myrecv,m++); + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; int k; for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = _buf(myrecv,m++); - _bond_atom(i,k) = _buf(myrecv,m++); + _bond_type(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_angle(i) = _buf(myrecv,m++); + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = _buf(myrecv,m++); - _angle_atom1(i,k) = _buf(myrecv,m++); - _angle_atom2(i,k) = _buf(myrecv,m++); - _angle_atom3(i,k) = _buf(myrecv,m++); + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _nspecial(i,0) = _buf(myrecv,m++); - _nspecial(i,1) = _buf(myrecv,m++); - _nspecial(i,2) = _buf(myrecv,m++); + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = _buf(myrecv,m++); + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } } }; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index dc254e6a7e..d040bd3553 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -619,16 +619,16 @@ struct AtomVecAtomicKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; } } }; @@ -836,9 +836,9 @@ struct AtomVecAtomicKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; // printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); } }; @@ -977,10 +977,10 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { _buf(mysend,4) = _v(i,0); _buf(mysend,5) = _v(i,1); _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = _tag[i]; - _buf(mysend,8) = _type[i]; - _buf(mysend,9) = _mask[i]; - _buf(mysend,10) = _image[i]; + _buf(mysend,7) = d_ubuf(_tag[i]).d; + _buf(mysend,8) = d_ubuf(_type[i]).d; + _buf(mysend,9) = d_ubuf(_mask[i]).d; + _buf(mysend,10) = d_ubuf(_image[i]).d; const int j = _copylist(mysend); if(j>-1) { @@ -1091,10 +1091,10 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,4); _v(i,1) = _buf(myrecv,5); _v(i,2) = _buf(myrecv,6); - _tag[i] = _buf(myrecv,7); - _type[i] = _buf(myrecv,8); - _mask[i] = _buf(myrecv,9); - _image[i] = _buf(myrecv,10); + _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; + _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; + _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; + _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; } } }; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index f10decac28..c46c49cb29 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -662,18 +662,18 @@ struct AtomVecBondKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } } }; @@ -905,10 +905,10 @@ struct AtomVecBondKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _molecule(i+_first) = static_cast (_buf(i,6)); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; } }; @@ -1095,21 +1095,21 @@ struct AtomVecBondKokkos_PackExchangeFunctor { _buf(mysend,m++) = _v(i,0); _buf(mysend,m++) = _v(i,1); _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = _tag(i); - _buf(mysend,m++) = _type(i); - _buf(mysend,m++) = _mask(i); - _buf(mysend,m++) = _image(i); - _buf(mysend,m++) = _molecule(i); - _buf(mysend,m++) = _num_bond(i); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = _bond_type(i,k); - _buf(mysend,m++) = _bond_atom(i,k); + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; } - _buf(mysend,m++) = _nspecial(i,0); - _buf(mysend,m++) = _nspecial(i,1); - _buf(mysend,m++) = _nspecial(i,2); + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = _special(i,k); + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; const int j = _copylist(mysend); @@ -1267,23 +1267,23 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,m++); _v(i,1) = _buf(myrecv,m++); _v(i,2) = _buf(myrecv,m++); - _tag(i) = _buf(myrecv,m++); - _type(i) = _buf(myrecv,m++); - _mask(i) = _buf(myrecv,m++); - _image(i) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _molecule(i) = _buf(myrecv,m++); - _num_bond(i) = _buf(myrecv,m++); + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; int k; for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = _buf(myrecv,m++); - _bond_atom(i,k) = _buf(myrecv,m++); + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _nspecial(i,0) = _buf(myrecv,m++); - _nspecial(i,1) = _buf(myrecv,m++); - _nspecial(i,2) = _buf(myrecv,m++); + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = _buf(myrecv,m++); + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } } }; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index f6952f127c..856660d1e9 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -323,7 +323,7 @@ struct AtomVecChargeKokkos_PackCommSelf { /* ---------------------------------------------------------------------- */ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { + const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { sync(Host,X_MASK); modified(Host,X_MASK); @@ -631,17 +631,17 @@ struct AtomVecChargeKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; _buf(i,6) = _q(j); } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; _buf(i,6) = _q(j); } } @@ -872,9 +872,9 @@ struct AtomVecChargeKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); } }; @@ -1039,10 +1039,10 @@ struct AtomVecChargeKokkos_PackExchangeFunctor { _buf(mysend,4) = _v(i,0); _buf(mysend,5) = _v(i,1); _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = _tag[i]; - _buf(mysend,8) = _type[i]; - _buf(mysend,9) = _mask[i]; - _buf(mysend,10) = _image[i]; + _buf(mysend,7) = d_ubuf(_tag[i]).d; + _buf(mysend,8) = d_ubuf(_type[i]).d; + _buf(mysend,9) = d_ubuf(_mask[i]).d; + _buf(mysend,10) = d_ubuf(_image[i]).d; _buf(mysend,11) = _q[i]; const int j = _copylist(mysend); @@ -1163,10 +1163,10 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,4); _v(i,1) = _buf(myrecv,5); _v(i,2) = _buf(myrecv,6); - _tag[i] = _buf(myrecv,7); - _type[i] = _buf(myrecv,8); - _mask[i] = _buf(myrecv,9); - _image[i] = _buf(myrecv,10); + _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; + _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; + _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; + _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; _q[i] = _buf(myrecv,11); } } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 731168b6ea..fa4cf18ae3 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -761,17 +761,6 @@ void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf) template struct AtomVecFullKokkos_PackBorder { - union ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int arg) : i(arg) {} - }; - typedef DeviceType device_type; typedef ArrayTypes AT; @@ -808,20 +797,20 @@ struct AtomVecFullKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = ubuf(_tag(j)).d; - _buf(i,4) = ubuf(_type(j)).d; - _buf(i,5) = ubuf(_mask(j)).d; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; _buf(i,6) = _q(j); - _buf(i,7) = ubuf(_molecule(j)).d; + _buf(i,7) = d_ubuf(_molecule(j)).d; } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = ubuf(_tag(j)).d; - _buf(i,4) = ubuf(_type(j)).d; - _buf(i,5) = ubuf(_mask(j)).d; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; _buf(i,6) = _q(j); - _buf(i,7) = ubuf(_molecule(j)).d; + _buf(i,7) = d_ubuf(_molecule(j)).d; } } }; @@ -1030,17 +1019,6 @@ int AtomVecFullKokkos::pack_border_hybrid(int n, int *list, double *buf) template struct AtomVecFullKokkos_UnpackBorder { - union ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int arg) : i(arg) {} - }; - typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1072,11 +1050,11 @@ struct AtomVecFullKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) ubuf(_buf(i,3)).i; - _type(i+_first) = (int) ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) ubuf(_buf(i,5)).i; + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; _q(i+_first) = _buf(i,6); - _molecule(i+_first) = (tagint) ubuf(_buf(i,7)).i; + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; } }; @@ -1178,18 +1156,6 @@ int AtomVecFullKokkos::unpack_border_hybrid(int n, int first, double *buf) template struct AtomVecFullKokkos_PackExchangeFunctor { - - union ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int arg) : i(arg) {} - }; - typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_x_array_randomread _x; @@ -1328,7 +1294,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor { elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().dimension_0()* - buf.template view().dimension_1())/elements; + buf.template view().dimension_1())/elements; buffer_view(_buf,buf,maxsendlist,elements); } @@ -1344,46 +1310,46 @@ struct AtomVecFullKokkos_PackExchangeFunctor { _buf(mysend,m++) = _v(i,0); _buf(mysend,m++) = _v(i,1); _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = ubuf(_tag(i)).d; - _buf(mysend,m++) = ubuf(_type(i)).d; - _buf(mysend,m++) = ubuf(_mask(i)).d; - _buf(mysend,m++) = ubuf(_image(i)).d; + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; _buf(mysend,m++) = _q(i); - _buf(mysend,m++) = ubuf(_molecule(i)).d; - _buf(mysend,m++) = ubuf(_num_bond(i)).d; + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = ubuf(_bond_atom(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; } - _buf(mysend,m++) = ubuf(_num_angle(i)).d; + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = ubuf(_angle_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; } - _buf(mysend,m++) = ubuf(_num_dihedral(i)).d; + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = ubuf(_dihedral_atom4(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; } - _buf(mysend,m++) = ubuf(_num_improper(i)).d; + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = ubuf(_improper_atom4(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; } - _buf(mysend,m++) = ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = ubuf(_nspecial(i,2)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = ubuf(_special(i,k)).d; + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; const int j = _copylist(mysend); @@ -1531,18 +1497,6 @@ int AtomVecFullKokkos::pack_exchange(int i, double *buf) template struct AtomVecFullKokkos_UnpackExchangeFunctor { - - union ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - ubuf(int arg) : i(arg) {} - }; - typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_x_array _x; @@ -1617,7 +1571,7 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().dimension_0()* - buf.template view().dimension_1())/elements; + buf.template view().dimension_1())/elements; buffer_view(_buf,buf,maxsendlist,elements); } @@ -1633,46 +1587,46 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,m++); _v(i,1) = _buf(myrecv,m++); _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) ubuf(_buf(myrecv,m++)).i; + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; _q(i) = _buf(myrecv,m++); - _molecule(i) = (tagint) ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) ubuf(_buf(myrecv,m++)).i; + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; int k; for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_angle(i) = (int) ubuf(_buf(myrecv,m++)).i; + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_dihedral(i) = (int) ubuf(_buf(myrecv,m++)).i; + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_improper(i) = (int) ubuf(_buf(myrecv,m++)).i; + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _nspecial(i,0) = (int) ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) ubuf(_buf(myrecv,m++)).i; + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } } }; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7ac66f1626..7f593f235f 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -20,6 +20,17 @@ namespace LAMMPS_NS { +union d_ubuf { + double d; + int64_t i; + KOKKOS_INLINE_FUNCTION + d_ubuf(double arg) : d(arg) {} + KOKKOS_INLINE_FUNCTION + d_ubuf(int64_t arg) : i(arg) {} + KOKKOS_INLINE_FUNCTION + d_ubuf(int arg) : i(arg) {} +}; + class AtomVecKokkos : public AtomVec { public: AtomVecKokkos(class LAMMPS *); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 4fd8114376..5c16ac1513 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -786,18 +786,18 @@ struct AtomVecMolecularKokkos_PackBorder { _buf(i,0) = _x(j,0); _buf(i,1) = _x(j,1); _buf(i,2) = _x(j,2); - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } else { _buf(i,0) = _x(j,0) + _dx; _buf(i,1) = _x(j,1) + _dy; _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _molecule(j); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = d_ubuf(_molecule(j)).d; } } }; @@ -1029,10 +1029,10 @@ struct AtomVecMolecularKokkos_UnpackBorder { _x(i+_first,0) = _buf(i,0); _x(i+_first,1) = _buf(i,1); _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _molecule(i+_first) = static_cast (_buf(i,6)); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; } }; @@ -1263,7 +1263,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().dimension_0()* - buf.template view().dimension_1())/elements; + buf.template view().dimension_1())/elements; buffer_view(_buf,buf,maxsendlist,elements); } @@ -1279,45 +1279,45 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor { _buf(mysend,m++) = _v(i,0); _buf(mysend,m++) = _v(i,1); _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = _tag(i); - _buf(mysend,m++) = _type(i); - _buf(mysend,m++) = _mask(i); - _buf(mysend,m++) = _image(i); - _buf(mysend,m++) = _molecule(i); - _buf(mysend,m++) = _num_bond(i); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = _bond_type(i,k); - _buf(mysend,m++) = _bond_atom(i,k); + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; } - _buf(mysend,m++) = _num_angle(i); + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = _angle_type(i,k); - _buf(mysend,m++) = _angle_atom1(i,k); - _buf(mysend,m++) = _angle_atom2(i,k); - _buf(mysend,m++) = _angle_atom3(i,k); + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; } - _buf(mysend,m++) = _num_dihedral(i); + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = _dihedral_type(i,k); - _buf(mysend,m++) = _dihedral_atom1(i,k); - _buf(mysend,m++) = _dihedral_atom2(i,k); - _buf(mysend,m++) = _dihedral_atom3(i,k); - _buf(mysend,m++) = _dihedral_atom4(i,k); + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; } - _buf(mysend,m++) = _num_improper(i); + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = _improper_type(i,k); - _buf(mysend,m++) = _improper_atom1(i,k); - _buf(mysend,m++) = _improper_atom2(i,k); - _buf(mysend,m++) = _improper_atom3(i,k); - _buf(mysend,m++) = _improper_atom4(i,k); + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; } - _buf(mysend,m++) = _nspecial(i,0); - _buf(mysend,m++) = _nspecial(i,1); - _buf(mysend,m++) = _nspecial(i,2); + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = _special(i,k); + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; const int j = _copylist(mysend); @@ -1536,7 +1536,7 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; const int maxsendlist = (buf.template view().dimension_0()* - buf.template view().dimension_1())/elements; + buf.template view().dimension_1())/elements; buffer_view(_buf,buf,maxsendlist,elements); } @@ -1552,46 +1552,46 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor { _v(i,0) = _buf(myrecv,m++); _v(i,1) = _buf(myrecv,m++); _v(i,2) = _buf(myrecv,m++); - _tag(i) = _buf(myrecv,m++); - _type(i) = _buf(myrecv,m++); - _mask(i) = _buf(myrecv,m++); - _image(i) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _molecule(i) = _buf(myrecv,m++); - _num_bond(i) = _buf(myrecv,m++); + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; int k; for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = _buf(myrecv,m++); - _bond_atom(i,k) = _buf(myrecv,m++); + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_angle(i) = _buf(myrecv,m++); + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = _buf(myrecv,m++); - _angle_atom1(i,k) = _buf(myrecv,m++); - _angle_atom2(i,k) = _buf(myrecv,m++); - _angle_atom3(i,k) = _buf(myrecv,m++); + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_dihedral(i) = _buf(myrecv,m++); + _num_dihedral(i) = d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = _buf(myrecv,m++); - _dihedral_atom1(i,k) = _buf(myrecv,m++); - _dihedral_atom2(i,k) = _buf(myrecv,m++); - _dihedral_atom3(i,k) = _buf(myrecv,m++); - _dihedral_atom4(i,k) = _buf(myrecv,m++); + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _num_improper(i) = _buf(myrecv,m++); - for (k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = _buf(myrecv,m++); - _improper_atom1(i,k) = _buf(myrecv,m++); - _improper_atom2(i,k) = _buf(myrecv,m++); - _improper_atom3(i,k) = _buf(myrecv,m++); - _improper_atom4(i,k) = _buf(myrecv,m++); + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < (int) _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } - _nspecial(i,0) = _buf(myrecv,m++); - _nspecial(i,1) = _buf(myrecv,m++); - _nspecial(i,2) = _buf(myrecv,m++); + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = _buf(myrecv,m++); + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } } }; diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index fbc6e0a298..dc6b084f0d 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -37,7 +37,7 @@ #include "math_const.h" #include "memory.h" #include "error.h" -#include "pair_reax_c_kokkos.h" +#include "pair_reaxc_kokkos.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -50,7 +50,8 @@ using namespace FixConst; /* ---------------------------------------------------------------------- */ template -FixQEqReaxKokkos::FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : +FixQEqReaxKokkos:: +FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : FixQEqReax(lmp, narg, arg) { kokkosable = 1; diff --git a/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp b/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp index 7688d6745a..e4fb9385a5 100644 --- a/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp +++ b/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp @@ -21,7 +21,7 @@ #include "fix_reaxc_bonds_kokkos.h" #include "atom.h" #include "update.h" -#include "pair_reax_c_kokkos.h" +#include "pair_reaxc_kokkos.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" diff --git a/src/KOKKOS/fix_reaxc_species_kokkos.cpp b/src/KOKKOS/fix_reaxc_species_kokkos.cpp index 17b42174c6..ce84de30cb 100644 --- a/src/KOKKOS/fix_reaxc_species_kokkos.cpp +++ b/src/KOKKOS/fix_reaxc_species_kokkos.cpp @@ -23,7 +23,7 @@ #include "fix_reaxc_species_kokkos.h" #include "domain.h" #include "update.h" -#include "pair_reax_c_kokkos.h" +#include "pair_reaxc_kokkos.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" @@ -156,4 +156,4 @@ void FixReaxCSpeciesKokkos::FindMolecule() if (looptot >= 400*nprocs) break; } -} \ No newline at end of file +} diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp index b4a89c8e39..c9242f2116 100644 --- a/src/KOKKOS/modify_kokkos.cpp +++ b/src/KOKKOS/modify_kokkos.cpp @@ -44,17 +44,19 @@ void ModifyKokkos::setup(int vflag) if (update->whichflag == 1) for (int i = 0; i < nfix; i++) { atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[i]->kokkosable) lmp->kokkos->auto_sync = 1; fix[i]->setup(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify); } else if (update->whichflag == 2) for (int i = 0; i < nfix; i++) { atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[i]->kokkosable) lmp->kokkos->auto_sync = 1; fix[i]->min_setup(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify); } } @@ -70,9 +72,10 @@ void ModifyKokkos::setup_pre_exchange() for (int i = 0; i < n_pre_exchange; i++) { atomKK->sync(fix[list_pre_exchange[i]]->execution_space, fix[list_pre_exchange[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_exchange[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_exchange[i]]->setup_pre_exchange(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_exchange[i]]->execution_space, fix[list_pre_exchange[i]]->datamask_modify); } @@ -80,9 +83,10 @@ void ModifyKokkos::setup_pre_exchange() for (int i = 0; i < n_min_pre_exchange; i++) { atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space, fix[list_min_pre_exchange[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_exchange[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_exchange[i]]->setup_pre_exchange(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space, fix[list_min_pre_exchange[i]]->datamask_modify); } @@ -99,9 +103,10 @@ void ModifyKokkos::setup_pre_neighbor() for (int i = 0; i < n_pre_neighbor; i++) { atomKK->sync(fix[list_pre_neighbor[i]]->execution_space, fix[list_pre_neighbor[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_neighbor[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_neighbor[i]]->setup_pre_neighbor(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_neighbor[i]]->execution_space, fix[list_pre_neighbor[i]]->datamask_modify); } @@ -109,9 +114,10 @@ void ModifyKokkos::setup_pre_neighbor() for (int i = 0; i < n_min_pre_neighbor; i++) { atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space, fix[list_min_pre_neighbor[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_neighbor[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_neighbor[i]]->setup_pre_neighbor(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space, fix[list_min_pre_neighbor[i]]->datamask_modify); } @@ -128,9 +134,10 @@ void ModifyKokkos::setup_pre_force(int vflag) for (int i = 0; i < n_pre_force; i++) { atomKK->sync(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_force[i]]->setup_pre_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_modify); } @@ -138,9 +145,10 @@ void ModifyKokkos::setup_pre_force(int vflag) for (int i = 0; i < n_min_pre_force; i++) { atomKK->sync(fix[list_min_pre_force[i]]->execution_space, fix[list_min_pre_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_force[i]]->setup_pre_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_force[i]]->execution_space, fix[list_min_pre_force[i]]->datamask_modify); } @@ -157,9 +165,10 @@ void ModifyKokkos::setup_pre_reverse(int eflag, int vflag) for (int i = 0; i < n_pre_reverse; i++) { atomKK->sync(fix[list_pre_reverse[i]]->execution_space, fix[list_pre_reverse[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_reverse[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_reverse[i]]->setup_pre_reverse(eflag,vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_reverse[i]]->execution_space, fix[list_pre_reverse[i]]->datamask_modify); } @@ -167,9 +176,10 @@ void ModifyKokkos::setup_pre_reverse(int eflag, int vflag) for (int i = 0; i < n_min_pre_reverse; i++) { atomKK->sync(fix[list_min_pre_reverse[i]]->execution_space, fix[list_min_pre_reverse[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_reverse[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_reverse[i]]->setup_pre_reverse(eflag,vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_reverse[i]]->execution_space, fix[list_min_pre_reverse[i]]->datamask_modify); } @@ -184,9 +194,10 @@ void ModifyKokkos::initial_integrate(int vflag) for (int i = 0; i < n_initial_integrate; i++) { atomKK->sync(fix[list_initial_integrate[i]]->execution_space, fix[list_initial_integrate[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_initial_integrate[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_initial_integrate[i]]->initial_integrate(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_initial_integrate[i]]->execution_space, fix[list_initial_integrate[i]]->datamask_modify); } @@ -201,9 +212,10 @@ void ModifyKokkos::post_integrate() for (int i = 0; i < n_post_integrate; i++) { atomKK->sync(fix[list_post_integrate[i]]->execution_space, fix[list_post_integrate[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_post_integrate[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_post_integrate[i]]->post_integrate(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_post_integrate[i]]->execution_space, fix[list_post_integrate[i]]->datamask_modify); } @@ -218,9 +230,10 @@ void ModifyKokkos::pre_exchange() for (int i = 0; i < n_pre_exchange; i++) { atomKK->sync(fix[list_pre_exchange[i]]->execution_space, fix[list_pre_exchange[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_exchange[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_exchange[i]]->pre_exchange(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_exchange[i]]->execution_space, fix[list_pre_exchange[i]]->datamask_modify); } @@ -235,9 +248,10 @@ void ModifyKokkos::pre_neighbor() for (int i = 0; i < n_pre_neighbor; i++) { atomKK->sync(fix[list_pre_neighbor[i]]->execution_space, fix[list_pre_neighbor[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_neighbor[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_neighbor[i]]->pre_neighbor(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_neighbor[i]]->execution_space, fix[list_pre_neighbor[i]]->datamask_modify); } @@ -252,9 +266,10 @@ void ModifyKokkos::pre_force(int vflag) for (int i = 0; i < n_pre_force; i++) { atomKK->sync(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_force[i]]->pre_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_modify); } @@ -269,9 +284,10 @@ void ModifyKokkos::pre_reverse(int eflag, int vflag) for (int i = 0; i < n_pre_reverse; i++) { atomKK->sync(fix[list_pre_reverse[i]]->execution_space, fix[list_pre_reverse[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_reverse[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_reverse[i]]->pre_reverse(eflag,vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_reverse[i]]->execution_space, fix[list_pre_reverse[i]]->datamask_modify); } @@ -286,9 +302,10 @@ void ModifyKokkos::post_force(int vflag) for (int i = 0; i < n_post_force; i++) { atomKK->sync(fix[list_post_force[i]]->execution_space, fix[list_post_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_post_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_post_force[i]]->post_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_post_force[i]]->execution_space, fix[list_post_force[i]]->datamask_modify); } @@ -303,9 +320,10 @@ void ModifyKokkos::final_integrate() for (int i = 0; i < n_final_integrate; i++) { atomKK->sync(fix[list_final_integrate[i]]->execution_space, fix[list_final_integrate[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_final_integrate[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_final_integrate[i]]->final_integrate(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_final_integrate[i]]->execution_space, fix[list_final_integrate[i]]->datamask_modify); } @@ -322,9 +340,10 @@ void ModifyKokkos::end_of_step() if (update->ntimestep % end_of_step_every[i] == 0) { atomKK->sync(fix[list_end_of_step[i]]->execution_space, fix[list_end_of_step[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_end_of_step[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_end_of_step[i]]->end_of_step(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_end_of_step[i]]->execution_space, fix[list_end_of_step[i]]->datamask_modify); } @@ -342,9 +361,10 @@ double ModifyKokkos::thermo_energy() for (int i = 0; i < n_thermo_energy; i++) { atomKK->sync(fix[list_thermo_energy[i]]->execution_space, fix[list_thermo_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_thermo_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; energy += fix[list_thermo_energy[i]]->compute_scalar(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_thermo_energy[i]]->execution_space, fix[list_thermo_energy[i]]->datamask_modify); } @@ -375,9 +395,10 @@ void ModifyKokkos::setup_pre_force_respa(int vflag, int ilevel) for (int i = 0; i < n_pre_force; i++) { atomKK->sync(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_force[i]]->setup_pre_force_respa(vflag,ilevel); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_force[i]]->execution_space, fix[list_pre_force[i]]->datamask_modify); } @@ -392,10 +413,11 @@ void ModifyKokkos::initial_integrate_respa(int vflag, int ilevel, int iloop) for (int i = 0; i < n_initial_integrate_respa; i++) { atomKK->sync(fix[list_initial_integrate_respa[i]]->execution_space, fix[list_initial_integrate_respa[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_initial_integrate_respa[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_initial_integrate_respa[i]]-> initial_integrate_respa(vflag,ilevel,iloop); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_initial_integrate_respa[i]]->execution_space, fix[list_initial_integrate_respa[i]]->datamask_modify); } @@ -410,9 +432,10 @@ void ModifyKokkos::post_integrate_respa(int ilevel, int iloop) for (int i = 0; i < n_post_integrate_respa; i++) { atomKK->sync(fix[list_post_integrate_respa[i]]->execution_space, fix[list_post_integrate_respa[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_post_integrate_respa[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_post_integrate_respa[i]]->post_integrate_respa(ilevel,iloop); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_post_integrate_respa[i]]->execution_space, fix[list_post_integrate_respa[i]]->datamask_modify); } @@ -427,9 +450,10 @@ void ModifyKokkos::pre_force_respa(int vflag, int ilevel, int iloop) for (int i = 0; i < n_pre_force_respa; i++) { atomKK->sync(fix[list_pre_force_respa[i]]->execution_space, fix[list_pre_force_respa[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_pre_force_respa[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_pre_force_respa[i]]->pre_force_respa(vflag,ilevel,iloop); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_pre_force_respa[i]]->execution_space, fix[list_pre_force_respa[i]]->datamask_modify); } @@ -444,9 +468,10 @@ void ModifyKokkos::post_force_respa(int vflag, int ilevel, int iloop) for (int i = 0; i < n_post_force_respa; i++) { atomKK->sync(fix[list_post_force_respa[i]]->execution_space, fix[list_post_force_respa[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_post_force_respa[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_post_force_respa[i]]->post_force_respa(vflag,ilevel,iloop); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_post_force_respa[i]]->execution_space, fix[list_post_force_respa[i]]->datamask_modify); } @@ -461,9 +486,10 @@ void ModifyKokkos::final_integrate_respa(int ilevel, int iloop) for (int i = 0; i < n_final_integrate_respa; i++) { atomKK->sync(fix[list_final_integrate_respa[i]]->execution_space, fix[list_final_integrate_respa[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_final_integrate_respa[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_final_integrate_respa[i]]->final_integrate_respa(ilevel,iloop); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_final_integrate_respa[i]]->execution_space, fix[list_final_integrate_respa[i]]->datamask_modify); } @@ -478,9 +504,10 @@ void ModifyKokkos::min_pre_exchange() for (int i = 0; i < n_min_pre_exchange; i++) { atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space, fix[list_min_pre_exchange[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_exchange[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_exchange[i]]->min_pre_exchange(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space, fix[list_min_pre_exchange[i]]->datamask_modify); } @@ -495,9 +522,10 @@ void ModifyKokkos::min_pre_neighbor() for (int i = 0; i < n_min_pre_neighbor; i++) { atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space, fix[list_min_pre_neighbor[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_neighbor[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_neighbor[i]]->min_pre_neighbor(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space, fix[list_min_pre_neighbor[i]]->datamask_modify); } @@ -512,9 +540,10 @@ void ModifyKokkos::min_pre_force(int vflag) for (int i = 0; i < n_min_pre_force; i++) { atomKK->sync(fix[list_min_pre_force[i]]->execution_space, fix[list_min_pre_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_force[i]]->min_pre_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_force[i]]->execution_space, fix[list_min_pre_force[i]]->datamask_modify); } @@ -529,9 +558,10 @@ void ModifyKokkos::min_pre_reverse(int eflag, int vflag) for (int i = 0; i < n_min_pre_reverse; i++) { atomKK->sync(fix[list_min_pre_reverse[i]]->execution_space, fix[list_min_pre_reverse[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_pre_reverse[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_pre_reverse[i]]->min_pre_reverse(eflag,vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_pre_reverse[i]]->execution_space, fix[list_min_pre_reverse[i]]->datamask_modify); } @@ -546,9 +576,10 @@ void ModifyKokkos::min_post_force(int vflag) for (int i = 0; i < n_min_post_force; i++) { atomKK->sync(fix[list_min_post_force[i]]->execution_space, fix[list_min_post_force[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_post_force[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_post_force[i]]->min_post_force(vflag); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_post_force[i]]->execution_space, fix[list_min_post_force[i]]->datamask_modify); } @@ -568,10 +599,11 @@ double ModifyKokkos::min_energy(double *fextra) for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[ifix]->kokkosable) lmp->kokkos->auto_sync = 1; eng += fix[ifix]->min_energy(&fextra[index]); index += fix[ifix]->min_dof(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify); } return eng; @@ -586,9 +618,10 @@ void ModifyKokkos::min_store() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_energy[i]]->min_store(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); } @@ -603,9 +636,10 @@ void ModifyKokkos::min_clearstore() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_energy[i]]->min_clearstore(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); } @@ -616,9 +650,10 @@ void ModifyKokkos::min_pushstore() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_energy[i]]->min_pushstore(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); } @@ -629,9 +664,10 @@ void ModifyKokkos::min_popstore() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; fix[list_min_energy[i]]->min_popstore(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); } @@ -649,10 +685,11 @@ void ModifyKokkos::min_step(double alpha, double *hextra) for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[ifix]->kokkosable) lmp->kokkos->auto_sync = 1; fix[ifix]->min_step(alpha,&hextra[index]); index += fix[ifix]->min_dof(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify); } } @@ -670,11 +707,12 @@ double ModifyKokkos::max_alpha(double *hextra) for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[ifix]->kokkosable) lmp->kokkos->auto_sync = 1; double alpha_one = fix[ifix]->max_alpha(&hextra[index]); alpha = MIN(alpha,alpha_one); index += fix[ifix]->min_dof(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify); } return alpha; @@ -690,9 +728,10 @@ int ModifyKokkos::min_dof() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; ndof += fix[list_min_energy[i]]->min_dof(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); } @@ -710,9 +749,10 @@ int ModifyKokkos::min_reset_ref() for (int i = 0; i < n_min_energy; i++) { atomKK->sync(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; if (!fix[list_min_energy[i]]->kokkosable) lmp->kokkos->auto_sync = 1; itmp = fix[list_min_energy[i]]->min_reset_ref(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = prev_auto_sync; if (itmp) itmpall = 1; atomKK->modified(fix[list_min_energy[i]]->execution_space, fix[list_min_energy[i]]->datamask_modify); diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp index dc85c39832..0771572e46 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp @@ -241,7 +241,7 @@ void PairCoulDebyeKokkos::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } diff --git a/src/KOKKOS/pair_reax_c_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp similarity index 99% rename from src/KOKKOS/pair_reax_c_kokkos.cpp rename to src/KOKKOS/pair_reaxc_kokkos.cpp index 87915dce3e..5ef0dae142 100644 --- a/src/KOKKOS/pair_reax_c_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "pair_reax_c_kokkos.h" +#include "pair_reaxc_kokkos.h" #include "kokkos.h" #include "atom_kokkos.h" #include "comm.h" @@ -2292,12 +2292,12 @@ void PairReaxCKokkos::operator()(PairReaxComputeMulti2 0) + if (numbonds > 0 || control->enobondsflag) e_lp = p_lp2 * d_Delta_lp[i] * inv_expvd2; const F_FLOAT dElp = p_lp2 * inv_expvd2 + 75.0 * p_lp2 * d_Delta_lp[i] * expvd2 * inv_expvd2*inv_expvd2; const F_FLOAT CElp = dElp * d_dDelta_lp[i]; - if (numbonds > 0) + if (numbonds > 0 || control->enobondsflag) a_CdDelta[i] += CElp; if (eflag) ev.ereax[0] += e_lp; @@ -2334,7 +2334,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeMulti2 0) + if (numbonds > 0 || control->enobondsflag) e_un = -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; if (eflag) ev.ereax[2] += e_un; @@ -2354,7 +2354,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeMulti2 0) + if (numbonds > 0 || control->enobondsflag) a_CdDelta[i] += CEunder3; const int j_start = d_bo_first[i]; diff --git a/src/KOKKOS/pair_reax_c_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h similarity index 99% rename from src/KOKKOS/pair_reax_c_kokkos.h rename to src/KOKKOS/pair_reaxc_kokkos.h index 8a0c08b660..59c4d196d5 100644 --- a/src/KOKKOS/pair_reax_c_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -25,7 +25,7 @@ PairStyle(reax/c/kk/host,PairReaxCKokkos) #include #include "pair_kokkos.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "neigh_list_kokkos.h" #include "reaxc_types.h" diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 53b4042376..e4a3f857d3 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -64,14 +64,17 @@ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) : setup before run ------------------------------------------------------------------------- */ -void VerletKokkos::setup() +void VerletKokkos::setup(int flag) { if (comm->me == 0 && screen) { fprintf(screen,"Setting up Verlet run ...\n"); - fprintf(screen," Unit style : %s\n", update->unit_style); - fprintf(screen," Current step : " BIGINT_FORMAT "\n", update->ntimestep); - fprintf(screen," Time step : %g\n", update->dt); - timer->print_timeout(screen); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + fprintf(screen," Time step : %g\n", update->dt); + timer->print_timeout(screen); + } } update->setupflag = 1; @@ -169,7 +172,7 @@ void VerletKokkos::setup() if (force->newton) comm->reverse_comm(); modify->setup(vflag); - output->setup(); + output->setup(flag); lmp->kokkos->auto_sync = 1; update->setupflag = 1; } diff --git a/src/KOKKOS/verlet_kokkos.h b/src/KOKKOS/verlet_kokkos.h index 03a9383324..6455239204 100644 --- a/src/KOKKOS/verlet_kokkos.h +++ b/src/KOKKOS/verlet_kokkos.h @@ -29,7 +29,7 @@ class VerletKokkos : public Verlet { public: VerletKokkos(class LAMMPS *, int, char **); ~VerletKokkos() {} - void setup(); + void setup(int flag=1); void setup_minimal(int); void run(int); diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp index 467a748d08..85e3da921b 100644 --- a/src/KSPACE/ewald_disp.cpp +++ b/src/KSPACE/ewald_disp.cpp @@ -138,13 +138,14 @@ void EwaldDisp::init() nsums += n[k]; } - if (!gewaldflag) g_ewald = 0.0; + if (!gewaldflag) g_ewald = g_ewald_6 = 1.0; pair->init(); // so B is defined init_coeffs(); init_coeff_sums(); if (function[0]) qsum_qsq(); else qsqsum = qsum = 0.0; natoms_original = atom->natoms; + if (!gewaldflag) g_ewald = g_ewald_6 = 0.0; // turn off coulombic if no charge @@ -218,8 +219,8 @@ void EwaldDisp::init() } if (!comm->me) { - if (screen) fprintf(screen, " G vector = %g\n", g_ewald); - if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); + if (screen) fprintf(screen, " G vector = %g, accuracy = %g\n", g_ewald,accuracy); + if (logfile) fprintf(logfile, " G vector = %g accuracy = %g\n", g_ewald,accuracy); } g_ewald_6 = g_ewald; diff --git a/src/KSPACE/pair_born_coul_long.cpp b/src/KSPACE/pair_born_coul_long.cpp index 14d43f4c63..e588a30b55 100644 --- a/src/KSPACE/pair_born_coul_long.cpp +++ b/src/KSPACE/pair_born_coul_long.cpp @@ -250,7 +250,7 @@ void PairBornCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pair_buck_coul_long.cpp b/src/KSPACE/pair_buck_coul_long.cpp index 9cd8485e5c..476e3c716a 100644 --- a/src/KSPACE/pair_buck_coul_long.cpp +++ b/src/KSPACE/pair_buck_coul_long.cpp @@ -240,7 +240,7 @@ void PairBuckCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp index 26bcb136b3..8aa4d72083 100644 --- a/src/KSPACE/pair_buck_long_coul_long.cpp +++ b/src/KSPACE/pair_buck_long_coul_long.cpp @@ -104,7 +104,7 @@ void PairBuckLongCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_buck[i][j] = cut_buck_global; } } diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp index 11c7a147e7..6e17a9bbd7 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp @@ -57,6 +57,10 @@ PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) implicit = 0; mix_flag = ARITHMETIC; writedata = 1; + + // short-range/long-range flag accessed by DihedralCharmmfsw + + dihedflag = 1; } /* ---------------------------------------------------------------------- */ @@ -669,10 +673,6 @@ void PairLJCharmmfswCoulLong::settings(int narg, char **arg) cut_lj = force->numeric(FLERR,arg[1]); if (narg == 2) cut_coul = cut_lj; else cut_coul = force->numeric(FLERR,arg[2]); - - // indicates pair_style being used for dihedral_charmm - - dihedflag = 1; } /* ---------------------------------------------------------------------- diff --git a/src/KSPACE/pair_lj_cut_coul_long.cpp b/src/KSPACE/pair_lj_cut_coul_long.cpp index 764aebc522..e9799843fc 100644 --- a/src/KSPACE/pair_lj_cut_coul_long.cpp +++ b/src/KSPACE/pair_lj_cut_coul_long.cpp @@ -608,7 +608,7 @@ void PairLJCutCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pair_lj_cut_tip4p_long.cpp b/src/KSPACE/pair_lj_cut_tip4p_long.cpp index 146d4e6f37..588d21ac66 100644 --- a/src/KSPACE/pair_lj_cut_tip4p_long.cpp +++ b/src/KSPACE/pair_lj_cut_tip4p_long.cpp @@ -450,7 +450,7 @@ void PairLJCutTIP4PLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pair_lj_long_coul_long.cpp b/src/KSPACE/pair_lj_long_coul_long.cpp index e474347935..44256a9fbb 100644 --- a/src/KSPACE/pair_lj_long_coul_long.cpp +++ b/src/KSPACE/pair_lj_long_coul_long.cpp @@ -103,7 +103,7 @@ void PairLJLongCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp index c3d95c37a6..fd318fd75b 100644 --- a/src/KSPACE/pair_lj_long_tip4p_long.cpp +++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp @@ -1439,8 +1439,8 @@ void PairLJLongTIP4PLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 5d6c2042be..b31d42a815 100644 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -380,6 +380,12 @@ void PPPMDisp::init() alpha = qdist / (cos(0.5*theta) * blen); } + //if g_ewald and g_ewald_6 have not been specified, set some initial value + // to avoid problems when calculating the energies! + + if (!gewaldflag) g_ewald = 1; + if (!gewaldflag_6) g_ewald_6 = 1; + // initialize the pair style to get the coefficients neighrequest_flag = 0; @@ -387,12 +393,6 @@ void PPPMDisp::init() neighrequest_flag = 1; init_coeffs(); - //if g_ewald and g_ewald_6 have not been specified, set some initial value - // to avoid problems when calculating the energies! - - if (!gewaldflag) g_ewald = 1; - if (!gewaldflag_6) g_ewald_6 = 1; - // set accuracy (force units) from accuracy_relative or accuracy_absolute if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp index abf75c85c5..cc7efbcaa6 100644 --- a/src/MANYBODY/pair_airebo.cpp +++ b/src/MANYBODY/pair_airebo.cpp @@ -158,7 +158,7 @@ void PairAIREBO::settings(int narg, char **arg) // this one parameter for C-C interactions is different in AIREBO vs REBO // see Favata, Micheletti, Ryu, Pugno, Comp Phys Comm (2016) - + PCCf_2_0 = -0.0276030; } @@ -1335,7 +1335,7 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], dN2[0] = 0.0; dN2[1] = 0.0; PijS = PijSpline(NijC,NijH,itype,jtype,dN2); - pij = pow(1.0+Etmp+PijS,-0.5); + pij = 1.0/sqrt(1.0+Etmp+PijS); tmp = -0.5*cube(pij); // pij forces @@ -1480,7 +1480,7 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], dN2[0] = 0.0; dN2[1] = 0.0; PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2); - pji = pow(1.0+Etmp+PjiS,-0.5); + pji = 1.0/sqrt(1.0+Etmp+PjiS); tmp = -0.5*cube(pji); REBO_neighs = REBO_firstneigh[j]; @@ -1850,7 +1850,7 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], (1.0-tspjik)*(1.0-tspijl); aaa1 = -prefactor*(1.0-square(om1234)) * (1.0-tspjik)*(1.0-tspijl); - aaa2 = aaa1*w21*w34; + aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34; at2 = aa*cwnum; fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + @@ -2080,9 +2080,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3; double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ; double Nki,Nlj,dS,lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC; - double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3]; - double dN2[2],dN3[3]; - double dcosijldrj[3],dcosijldrl[3],dcosjikdrj[3],dwjl; + double dN2[2],dN3[3],dwjl; double Tij,crosskij[3],crosskijmag; double crossijl[3],crossijlmag,omkijl; double tmppij,tmppji,dN2PIJ[2],dN2PJI[2],dN3piRC[3],dN3Tij[3]; @@ -2092,16 +2090,16 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag; double w21,dw21,r34[3],r34mag,cos234,w34,dw34; double cross321[3],cross234[3],prefactor,SpN; - double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc; - double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; + double fcikpc,fcjlpc,fcjkpc,fcilpc; + double dt2dik[3],dt2djl[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2; - double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i; - double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij; + double dctik,dctjk,dctjl,dctil,rik2i,rjl2i,sink2i,sinl2i; + double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil; double dNlj; double PijS,PjiS; double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp; int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l; - double F12[3],F23[3],F34[3],F31[3],F24[3]; + double F12[3],F34[3],F31[3],F24[3]; double fi[3],fj[3],fk[3],fl[3],f1[3],f2[3],f3[3],f4[4]; double rji[3],rki[3],rlj[3],r13[3],r43[3]; double realrij[3], realrijmag; @@ -2136,7 +2134,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, realrij[0] = x[atomi][0] - x[atomj][0]; realrij[1] = x[atomi][1] - x[atomj][1]; realrij[2] = x[atomi][2] - x[atomj][2]; - realrijmag = sqrt(realrij[0] * realrij[0] + realrij[1] * realrij[1] + realrijmag = sqrt(realrij[0] * realrij[0] + realrij[1] * realrij[1] + realrij[2] * realrij[2]); REBO_neighs = REBO_firstneigh[i]; @@ -2171,7 +2169,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, dN2PIJ[0] = 0.0; dN2PIJ[1] = 0.0; PijS = PijSpline(NijC,NijH,itype,jtype,dN2PIJ); - pij = pow(1.0+Etmp+PijS,-0.5); + pij = 1.0/sqrt(1.0+Etmp+PijS); tmppij = -.5*cube(pij); tmp3pij = tmp3; tmp = 0.0; @@ -2211,7 +2209,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, dN2PJI[0] = 0.0; dN2PJI[1] = 0.0; PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2PJI); - pji = pow(1.0+Etmp+PjiS,-0.5); + pji = 1.0/sqrt(1.0+Etmp+PjiS); tmppji = -.5*cube(pji); tmp3pji = tmp3; @@ -2481,7 +2479,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, fj[1] -= rijmbr * (fil[1] - (realrij[1] * realrij[0] * fil[0] + realrij[1] * realrij[1] * fil[1] + realrij[1] * realrij[2] * fil[2]) / (realrijmag * realrijmag)); fj[2] -= rijmbr * (fil[2] - (realrij[2] * realrij[0] * fil[0] + realrij[2] * realrij[1] * fil[1] + realrij[2] * realrij[2] * fil[2]) / (realrijmag * realrijmag)); - + tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1)); fj[0] += tmp2*(rjl[0]/rjlmag); fj[1] += tmp2*(rjl[1]/rjlmag); @@ -2780,7 +2778,7 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, (1.0-tspjik)*(1.0-tspijl); aaa1 = -prefactor*(1.0-square(om1234)) * (1.0-tspjik)*(1.0-tspijl); - aaa2 = aaa1*w21*w34; + aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34; at2 = aa*cwnum; fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); @@ -3094,72 +3092,58 @@ double PairAIREBO::gSpline(double costh, double Nij, int typei, double PairAIREBO::PijSpline(double NijC, double NijH, int typei, int typej, double dN2[2]) { - int x,y,i,done; - double Pij,coeffs[16]; - - for (i = 0; i < 16; i++) coeffs[i]=0.0; + int x,y; + double Pij; x = 0; y = 0; dN2[0] = 0.0; dN2[1] = 0.0; - done = 0; + Pij = 0.0; - // if inputs are out of bounds set them back to a point in bounds + if (typei == 1) return Pij; + + if (typej == 0) { + + // if inputs are out of bounds set them back to a point in bounds - if (typei == 0 && typej == 0) { if (NijC < pCCdom[0][0]) NijC=pCCdom[0][0]; if (NijC > pCCdom[0][1]) NijC=pCCdom[0][1]; if (NijH < pCCdom[1][0]) NijH=pCCdom[1][0]; if (NijH > pCCdom[1][1]) NijH=pCCdom[1][1]; + x = (int) floor(NijC); + y = (int) floor(NijH); if (fabs(NijC-floor(NijC)) < TOL && fabs(NijH-floor(NijH)) < TOL) { - Pij = PCCf[(int) NijC][(int) NijH]; - dN2[0] = PCCdfdx[(int) NijC][(int) NijH]; - dN2[1] = PCCdfdy[(int) NijC][(int) NijH]; - done = 1; + Pij = PCCf[x][y]; + dN2[0] = PCCdfdx[x][y]; + dN2[1] = PCCdfdy[x][y]; + } else { + if (NijC == pCCdom[0][1]) --x; + if (NijH == pCCdom[1][1]) --y; + Pij = Spbicubic(NijC,NijH,pCC[x][y],dN2); } - if (done == 0) { - x = (int) (floor(NijC)); - y = (int) (floor(NijH)); - for (i = 0; i<16; i++) coeffs[i] = pCC[x][y][i]; - Pij = Spbicubic(NijC,NijH,coeffs,dN2); - } - } - // if inputs are out of bounds set them back to a point in bounds + } else if (typej == 1) { - if (typei == 0 && typej == 1){ - if (NijC < pCHdom[0][0]) NijC=pCHdom[0][0]; - if (NijC > pCHdom[0][1]) NijC=pCHdom[0][1]; - if (NijH < pCHdom[1][0]) NijH=pCHdom[1][0]; - if (NijH > pCHdom[1][1]) NijH=pCHdom[1][1]; + // if inputs are out of bounds set them back to a point in bounds + + if (NijC < pCHdom[0][0]) NijC=pCHdom[0][0]; + if (NijC > pCHdom[0][1]) NijC=pCHdom[0][1]; + if (NijH < pCHdom[1][0]) NijH=pCHdom[1][0]; + if (NijH > pCHdom[1][1]) NijH=pCHdom[1][1]; + x = (int) floor(NijC); + y = (int) floor(NijH); if (fabs(NijC-floor(NijC)) < TOL && fabs(NijH-floor(NijH)) < TOL) { - Pij = PCHf[(int) NijC][(int) NijH]; - dN2[0] = PCHdfdx[(int) NijC][(int) NijH]; - dN2[1] = PCHdfdy[(int) NijC][(int) NijH]; - done = 1; + Pij = PCHf[x][y]; + dN2[0] = PCHdfdx[x][y]; + dN2[1] = PCHdfdy[x][y]; + } else { + if (NijC == pCHdom[0][1]) --x; + if (NijH == pCHdom[1][1]) --y; + Pij = Spbicubic(NijC,NijH,pCH[x][y],dN2); } - if (done == 0) { - x = (int) (floor(NijC)); - y = (int) (floor(NijH)); - for (i = 0; i<16; i++) coeffs[i] = pCH[x][y][i]; - Pij = Spbicubic(NijC,NijH,coeffs,dN2); - } - } - - if (typei == 1 && typej == 0) { - Pij = 0.0; - dN2[0] = 0.0; - dN2[1] = 0.0; - } - - - if (typei == 1 && typej == 1) { - Pij = 0.0; - dN2[0] = 0.0; - dN2[1] = 0.0; } return Pij; } @@ -3171,115 +3155,93 @@ double PairAIREBO::PijSpline(double NijC, double NijH, int typei, int typej, double PairAIREBO::piRCSpline(double Nij, double Nji, double Nijconj, int typei, int typej, double dN3[3]) { - int x,y,z,i,done; - double piRC,coeffs[64]; + int x,y,z; + double piRC; x=0; y=0; z=0; - i=0; - - done=0; - - for (i=0; i<64; i++) coeffs[i]=0.0; + dN3[0]=0.0; + dN3[1]=0.0; + dN3[2]=0.0; if (typei==0 && typej==0) { - // if the inputs are out of bounds set them back to a point in bounds - - if (NijpiCCdom[0][1]) Nij=piCCdom[0][1]; - if (NjipiCCdom[1][1]) Nji=piCCdom[1][1]; - if (NijconjpiCCdom[2][1]) Nijconj=piCCdom[2][1]; - - if (fabs(Nij-floor(Nij))=(double) i && Nij<=(double) i+1) x=i; - for (i=0; i=(double) i && Nji<=(double) i+1) y=i; - for (i=0; i=(double) i && Nijconj<=(double) i+1) z=i; - - for (i=0; i<64; i++) coeffs[i]=piCC[x][y][z][i]; - piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3); - } - } - - // CH interaction - - if ((typei==0 && typej==1) || (typei==1 && typej==0)) { + // CC interaction // if the inputs are out of bounds set them back to a point in bounds - if (NijpiCHdom[0][1] || - NjipiCHdom[1][1] || - NijconjpiCHdom[2][1]) { - if (NijpiCHdom[0][1]) Nij=piCHdom[0][1]; - if (NjipiCHdom[1][1]) Nji=piCHdom[1][1]; - if (NijconjpiCHdom[2][1]) Nijconj=piCHdom[2][1]; - } + if (Nij < piCCdom[0][0]) Nij=piCCdom[0][0]; + if (Nij > piCCdom[0][1]) Nij=piCCdom[0][1]; + if (Nji < piCCdom[1][0]) Nji=piCCdom[1][0]; + if (Nji > piCCdom[1][1]) Nji=piCCdom[1][1]; + if (Nijconj < piCCdom[2][0]) Nijconj=piCCdom[2][0]; + if (Nijconj > piCCdom[2][1]) Nijconj=piCCdom[2][1]; + x = (int) floor(Nij); + y = (int) floor(Nji); + z = (int) floor(Nijconj); - if (fabs(Nij-floor(Nij))=i && Nij<=i+1) x=i; - for (i=0; i=i && Nji<=i+1) y=i; - for (i=0; i=i && Nijconj<=i+1) z=i; + // CH interaction - for (i=0; i<64; i++) coeffs[i]=piCH[x][y][z][i]; - piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3); + // if the inputs are out of bounds set them back to a point in bounds + + if (Nij < piCHdom[0][0]) Nij=piCHdom[0][0]; + if (Nij > piCHdom[0][1]) Nij=piCHdom[0][1]; + if (Nji < piCHdom[1][0]) Nji=piCHdom[1][0]; + if (Nji > piCHdom[1][1]) Nji=piCHdom[1][1]; + if (Nijconj < piCHdom[2][0]) Nijconj=piCHdom[2][0]; + if (Nijconj > piCHdom[2][1]) Nijconj=piCHdom[2][1]; + x = (int) floor(Nij); + y = (int) floor(Nji); + z = (int) floor(Nijconj); + + if (fabs(Nij-floor(Nij)) < TOL && fabs(Nji-floor(Nji)) < TOL + && fabs(Nijconj-floor(Nijconj)) < TOL) { + piRC=piCHf[x][y][z]; + dN3[0]=piCHdfdx[x][y][z]; + dN3[1]=piCHdfdy[x][y][z]; + dN3[2]=piCHdfdz[x][y][z]; + } else { + if (Nij == piCHdom[0][1]) --x; + if (Nji == piCHdom[1][1]) --y; + if (Nijconj == piCHdom[2][1]) --z; + piRC=Sptricubic(Nij,Nji,Nijconj,piCH[x][y][z],dN3); } - } + } else if (typei==1 && typej==1) { + if (Nij < piHHdom[0][0]) Nij=piHHdom[0][0]; + if (Nij > piHHdom[0][1]) Nij=piHHdom[0][1]; + if (Nji < piHHdom[1][0]) Nji=piHHdom[1][0]; + if (Nji > piHHdom[1][1]) Nji=piHHdom[1][1]; + if (Nijconj < piHHdom[2][0]) Nijconj=piHHdom[2][0]; + if (Nijconj > piHHdom[2][1]) Nijconj=piHHdom[2][1]; + x = (int) floor(Nij); + y = (int) floor(Nji); + z = (int) floor(Nijconj); - if (typei==1 && typej==1) { - if (NijpiHHdom[0][1] || - NjipiHHdom[1][1] || - NijconjpiHHdom[2][1]) { - Nij=0.0; - Nji=0.0; - Nijconj=0.0; - } - if (fabs(Nij-floor(Nij))=i && Nij<=i+1) x=i; - for (i=0; i=i && Nji<=i+1) y=i; - for (i=0; i=i && Nijconj<=i+1) z=i; - - for (i=0; i<64; i++) coeffs[i]=piHH[x][y][z][i]; - piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3); + if (fabs(Nij-floor(Nij)) < TOL && fabs(Nji-floor(Nji)) < TOL + && fabs(Nijconj-floor(Nijconj)) < TOL) { + piRC=piHHf[x][y][z]; + dN3[0]=piHHdfdx[x][y][z]; + dN3[1]=piHHdfdy[x][y][z]; + dN3[2]=piHHdfdz[x][y][z]; + } else { + if (Nij == piHHdom[0][1]) --x; + if (Nji == piHHdom[1][1]) --y; + if (Nijconj == piHHdom[2][1]) --z; + piRC=Sptricubic(Nij,Nji,Nijconj,piHH[x][y][z],dN3); } } @@ -3293,45 +3255,40 @@ double PairAIREBO::piRCSpline(double Nij, double Nji, double Nijconj, double PairAIREBO::TijSpline(double Nij, double Nji, double Nijconj, double dN3[3]) { - int x,y,z,i,done; - double Tijf,coeffs[64]; + int x,y,z; + double Tijf; x=0; y=0; z=0; - i=0; Tijf=0.0; - done=0; - for (i=0; i<64; i++) coeffs[i]=0.0; + dN3[0]=0.0; + dN3[1]=0.0; + dN3[2]=0.0; //if the inputs are out of bounds set them back to a point in bounds - if (NijTijdom[0][1]) Nij=Tijdom[0][1]; - if (NjiTijdom[1][1]) Nji=Tijdom[1][1]; - if (NijconjTijdom[2][1]) Nijconj=Tijdom[2][1]; + if (Nij < Tijdom[0][0]) Nij=Tijdom[0][0]; + if (Nij > Tijdom[0][1]) Nij=Tijdom[0][1]; + if (Nji < Tijdom[1][0]) Nji=Tijdom[1][0]; + if (Nji > Tijdom[1][1]) Nji=Tijdom[1][1]; + if (Nijconj < Tijdom[2][0]) Nijconj=Tijdom[2][0]; + if (Nijconj > Tijdom[2][1]) Nijconj=Tijdom[2][1]; + x = (int) floor(Nij); + y = (int) floor(Nji); + z = (int) floor(Nijconj); - if (fabs(Nij-floor(Nij))=i && Nij<=i+1) x=i; - for (i=0; i=i && Nji<=i+1) y=i; - for (i=0; i=i && Nijconj<=i+1) z=i; - - for (i=0; i<64; i++) coeffs[i]=Tijc[x][y][z][i]; - Tijf=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3); + if (fabs(Nij-floor(Nij)) < TOL && fabs(Nji-floor(Nji)) < TOL + && fabs(Nijconj-floor(Nijconj)) < TOL) { + Tijf=Tf[x][y][z]; + dN3[0]=Tdfdx[x][y][z]; + dN3[1]=Tdfdy[x][y][z]; + dN3[2]=Tdfdz[x][y][z]; + } else { + if (Nij == Tijdom[0][1]) --x; + if (Nji == Tijdom[1][1]) --y; + if (Nijconj == Tijdom[2][1]) --z; + Tijf=Sptricubic(Nij,Nji,Nijconj,Tijc[x][y][z],dN3); } return Tijf; diff --git a/src/MANYBODY/pair_gw.cpp b/src/MANYBODY/pair_gw.cpp new file mode 100644 index 0000000000..e4090dbed2 --- /dev/null +++ b/src/MANYBODY/pair_gw.cpp @@ -0,0 +1,763 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: German Samolyuk (ORNL) + based on PairTersoff by Aidan Thompson (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_gw.h" +#include "atom.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- */ + +PairGW::PairGW(LAMMPS *lmp) : Pair(lmp) +{ + single_enable = 0; + restartinfo = 0; + one_coeff = 1; + manybody_flag = 1; + + nelements = 0; + elements = NULL; + nparams = maxparam = 0; + params = NULL; + elem2param = NULL; + map = NULL; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairGW::~PairGW() +{ + if (elements) + for (int i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + memory->destroy(params); + memory->destroy(elem2param); + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + delete [] map; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::compute(int eflag, int vflag) +{ + int i,j,k,ii,jj,kk,inum,jnum; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij, prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = vflag_atom = 0; + + double **x = atom->x; + double **f = atom->f; + tagint *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < x[i][2]) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,eflag,evdwl); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fpair,delx,dely,delz); + } + + // three-body interactions + // skip immediately if I-J is not within cutoff + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + zeta_ij = 1.0; + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + } + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],rsq1,zeta_ij,fpair,prefactor,eflag,evdwl); + + f[i][0] += delr1[0]*fpair; + f[i][1] += delr1[1]*fpair; + f[i][2] += delr1[2]*fpair; + f[j][0] -= delr1[0]*fpair; + f[j][1] -= delr1[1]*fpair; + f[j][2] -= delr1[2]*fpair; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,-fpair,-delr1[0],-delr1[1],-delr1[2]); + + // attractive term via loop over k + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (vflag_atom) v_tally3(i,j,k,fj,fk,delr1,delr2); + } // kk + } // jj + } // ii + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + + map = new int[n+1]; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairGW::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairGW::coeff(int narg, char **arg) +{ + int i,j,n; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + // nelements = # of unique elements + // elements = list of element names + + if (elements) { + for (i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + } + elements = new char*[atom->ntypes]; + for (i = 0; i < atom->ntypes; i++) elements[i] = NULL; + + nelements = 0; + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (strcmp(arg[i],elements[j]) == 0) break; + map[i-2] = j; + if (j == nelements) { + n = strlen(arg[i]) + 1; + elements[j] = new char[n]; + strcpy(elements[j],arg[i]); + nelements++; + } + } + + // read potential file and initialize potential parameters + + read_file(arg[2]); + setup_params(); + + // clear setflag since coeff() called once with I,J = * * + + n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + + int count = 0; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + count++; + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairGW::init_style() +{ + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style GW requires atom IDs"); + if (force->newton_pair == 0) + error->all(FLERR,"Pair style GW requires newton pair on"); + + // need a full neighbor list + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairGW::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + return cutmax; +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::read_file(char *file) +{ + int params_per_line = 17; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = maxparam = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open GW potential file %s",file); + error->one(FLERR,str); + } + } + + // read each line out of file, skipping blank lines or leading '#' + // store line of params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in GW potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].powerm = atof(words[3]); + params[nparams].gamma = atof(words[4]); + params[nparams].lam3 = atof(words[5]); + params[nparams].c = atof(words[6]); + params[nparams].d = atof(words[7]); + params[nparams].h = atof(words[8]); + params[nparams].powern = atof(words[9]); + params[nparams].beta = atof(words[10]); + params[nparams].lam2 = atof(words[11]); + params[nparams].bigb = atof(words[12]); + params[nparams].bigr = atof(words[13]); + params[nparams].bigd = atof(words[14]); + params[nparams].lam1 = atof(words[15]); + params[nparams].biga = atof(words[16]); + + // currently only allow m exponent of 1 or 3 + + params[nparams].powermint = int(params[nparams].powerm); + + if (params[nparams].c < 0.0 || params[nparams].d < 0.0 || + params[nparams].powern < 0.0 || params[nparams].beta < 0.0 || + params[nparams].lam2 < 0.0 || params[nparams].bigb < 0.0 || + params[nparams].bigr < 0.0 ||params[nparams].bigd < 0.0 || + params[nparams].bigd > params[nparams].bigr || + params[nparams].lam1 < 0.0 || params[nparams].biga < 0.0 || + params[nparams].powerm - params[nparams].powermint != 0.0 || + (params[nparams].powermint != 3 && params[nparams].powermint != 1) || + params[nparams].gamma < 0.0) + error->all(FLERR,"Illegal GW parameter"); + + nparams++; + } + + delete [] words; +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::setup_params() +{ + int i,j,k,m,n; + + // set elem2param for all element triplet combinations + // must be a single exact match to lines read from file + // do not allow for ACB in place of ABC + + memory->destroy(elem2param); + memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param"); + + for (i = 0; i < nelements; i++) + for (j = 0; j < nelements; j++) + for (k = 0; k < nelements; k++) { + n = -1; + for (m = 0; m < nparams; m++) { + if (i == params[m].ielement && j == params[m].jelement && + k == params[m].kelement) { + if (n >= 0) error->all(FLERR,"Potential file has duplicate entry"); + n = m; + } + } + if (n < 0) error->all(FLERR,"Potential file is missing an entry"); + elem2param[i][j][k] = n; + } + + + // compute parameter values derived from inputs + + for (m = 0; m < nparams; m++) { + params[m].cut = params[m].bigr + params[m].bigd; + params[m].cutsq = params[m].cut*params[m].cut; + + params[m].c1 = pow(2.0*params[m].powern*1.0e-16,-1.0/params[m].powern); + params[m].c2 = pow(2.0*params[m].powern*1.0e-8,-1.0/params[m].powern); + params[m].c3 = 1.0/params[m].c2; + params[m].c4 = 1.0/params[m].c1; + } + + // set cutmax to max of all params + + cutmax = 0.0; + for (m = 0; m < nparams; m++) + if (params[m].cut > cutmax) cutmax = params[m].cut; +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::repulsive(Param *param, double rsq, double &fforce, + int eflag, double &eng) +{ + double r,tmp_fc,tmp_fc_d,tmp_exp; + + r = sqrt(rsq); + tmp_fc = gw_fc(r,param); + tmp_fc_d = gw_fc_d(r,param); + tmp_exp = exp(-param->lam1 * r); + fforce = -param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1) / r; + if (eflag) eng = tmp_fc * param->biga * tmp_exp; +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::zeta(Param *param, double rsqij, double rsqik, + double *delrij, double *delrik) +{ + double rij,rik,costheta,arg,ex_delr; + + rij = sqrt(rsqij); + rik = sqrt(rsqik); + costheta = (delrij[0]*delrik[0] + delrij[1]*delrik[1] + + delrij[2]*delrik[2]) / (rij*rik); + + if (param->powermint == 3) arg = pow(param->lam3 * (rij-rik),3.0); + else arg = param->lam3 * (rij-rik); + + if (arg > 69.0776) ex_delr = 1.e30; + else if (arg < -69.0776) ex_delr = 0.0; + else ex_delr = exp(arg); + + return gw_fc(rik,param) * gw_gijk(costheta,param) * ex_delr; +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::force_zeta(Param *param_i, double rsq, double zeta_ij, + double &fforce, double &prefactor, + int eflag, double &eng) +{ + double r,fa,fa_d,bij; + + r = sqrt(rsq); + fa = gw_fa(r,param_i); + fa_d = gw_fa_d(r,param_i); + bij = gw_bij(zeta_ij,param_i); + fforce = 0.5*bij*fa_d / r; + prefactor = -0.5*fa * gw_bij_d(zeta_ij,param_i); + if (eflag) eng = 0.5*bij*fa; +} + +/* ---------------------------------------------------------------------- + attractive term + use param_ij cutoff for rij test + use param_ijk cutoff for rik test +------------------------------------------------------------------------- */ + +void PairGW::attractive(Param *param, double prefactor, + double rsqij, double rsqik, + double *delrij, double *delrik, + double *fi, double *fj, double *fk) +{ + double rij_hat[3],rik_hat[3]; + double rij,rijinv,rik,rikinv; + + rij = sqrt(rsqij); + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rik = sqrt(rsqik); + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + gw_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,fi,fj,fk,param); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_fc(double r, Param *param) +{ + double gw_R = param->bigr; + double gw_D = param->bigd; + + if (r < gw_R-gw_D) return 1.0; + if (r > gw_R+gw_D) return 0.0; + return 0.5*(1.0 - sin(MY_PI2*(r - gw_R)/gw_D)); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_fc_d(double r, Param *param) +{ + double gw_R = param->bigr; + double gw_D = param->bigd; + + if (r < gw_R-gw_D) return 0.0; + if (r > gw_R+gw_D) return 0.0; + return -(MY_PI4/gw_D) * cos(MY_PI2*(r - gw_R)/gw_D); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_fa(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return -param->bigb * exp(-param->lam2 * r) * gw_fc(r,param); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_fa_d(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return param->bigb * exp(-param->lam2 * r) * + (param->lam2 * gw_fc(r,param) - gw_fc_d(r,param)); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_bij(double zeta_ij, Param *param_i) +{ + double tmp = param_i->beta * zeta_ij; + return pow(tmp,-param_i->powern); +} + +/* ---------------------------------------------------------------------- */ + +double PairGW::gw_bij_d(double zeta_ij, Param *param_i) +{ + double tmp = param_i->beta * zeta_ij; + return - param_i->powern * pow(tmp,-param_i->powern-1)*tmp / zeta_ij; +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::gw_zetaterm_d(double prefactor, + double *rij_hat, double rij, + double *rik_hat, double rik, + double *dri, double *drj, double *drk, + Param *param) +{ + double gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp; + double dcosdri[3],dcosdrj[3],dcosdrk[3]; + + fc = gw_fc(rik,param); + dfc = gw_fc_d(rik,param); + if (param->powermint == 3) tmp = pow(param->lam3 * (rij-rik),3.0); + else tmp = param->lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (param->powermint == 3) + ex_delr_d = 3.0*pow(param->lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else ex_delr_d = param->lam3 * ex_delr; + + cos_theta = vec3_dot(rij_hat,rik_hat); + gijk = gw_gijk(cos_theta,param); + gijk_d = gw_gijk_d(cos_theta,param); + costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk); + + // compute the derivative wrt Ri + // dri = -dfc*gijk*ex_delr*rik_hat; + // dri += fc*gijk_d*ex_delr*dcosdri; + // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat); + + vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri); + vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri); + vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri); + vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri); + vec3_scale(prefactor,dri,dri); + + // compute the derivative wrt Rj + // drj = fc*gijk_d*ex_delr*dcosdrj; + // drj += fc*gijk*ex_delr_d*rij_hat; + + vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj); + vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj); + vec3_scale(prefactor,drj,drj); + + // compute the derivative wrt Rk + // drk = dfc*gijk*ex_delr*rik_hat; + // drk += fc*gijk_d*ex_delr*dcosdrk; + // drk += -fc*gijk*ex_delr_d*rik_hat; + + vec3_scale(dfc*gijk*ex_delr,rik_hat,drk); + vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk); + vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk); + vec3_scale(prefactor,drk,drk); +} + +/* ---------------------------------------------------------------------- */ + +void PairGW::costheta_d(double *rij_hat, double rij, + double *rik_hat, double rik, + double *dri, double *drj, double *drk) +{ + // first element is devative wrt Ri, second wrt Rj, third wrt Rk + + double cos_theta = vec3_dot(rij_hat,rik_hat); + + vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj); + vec3_scale(1.0/rij,drj,drj); + vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk); + vec3_scale(1.0/rik,drk,drk); + vec3_add(drj,drk,dri); + vec3_scale(-1.0,dri,dri); +} diff --git a/src/MANYBODY/pair_gw.h b/src/MANYBODY/pair_gw.h new file mode 100644 index 0000000000..eedc2a1f9f --- /dev/null +++ b/src/MANYBODY/pair_gw.h @@ -0,0 +1,196 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gw,PairGW) + +#else + +#ifndef LMP_PAIR_GW_H +#define LMP_PAIR_GW_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairGW : public Pair { + public: + PairGW(class LAMMPS *); + virtual ~PairGW(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + + protected: + struct Param { + double lam1,lam2,lam3; + double c,d,h; + double gamma,powerm; + double powern,beta; + double biga,bigb,bigd,bigr; + double cut,cutsq; + double c1,c2,c3,c4; + int ielement,jelement,kelement; + int powermint; + double Z_i,Z_j; + double ZBLcut,ZBLexpscale; + }; + + Param *params; // parameter set for an I-J-K interaction + char **elements; // names of unique elements + int ***elem2param; // mapping from element triplets to paramegw + int *map; // mapping from atom types to elements + double cutmax; // max cutoff for all elements + int nelements; // # of unique elements + int nparams; // # of stored parameter sets + int maxparam; // max # of parameter sets + + int **pages; // neighbor list pages + int maxlocal; // size of numneigh, firstneigh arrays + int maxpage; // # of pages currently allocated + int pgsize; // size of neighbor page + int oneatom; // max # of neighbors for one atom + + + int *GW_numneigh; // # of pair neighbors for each atom + int **GW_firstneigh; // ptr to 1st neighbor of each atom + + void GW_neigh(); + void add_pages(int howmany = 1); + + void allocate(); + virtual void read_file(char *); + void setup_params(); + virtual void repulsive(Param *, double, double &, int, double &); + double zeta(Param *, double, double, double *, double *); + virtual void force_zeta(Param *, double, double, double &, + double &, int, double &); + void attractive(Param *, double, double, double, double *, double *, + double *, double *, double *); + + double gw_fc(double, Param *); + double gw_fc_d(double, Param *); + virtual double gw_fa(double, Param *); + virtual double gw_fa_d(double, Param *); + double gw_bij(double, Param *); + double gw_bij_d(double, Param *); + + void gw_zetaterm_d(double, double *, double, double *, double, + double *, double *, double *, Param *); + void costheta_d(double *, double, double *, double, + double *, double *, double *); + + // inlined functions for efficiency + + inline double gw_gijk(const double costheta, + const Param * const param) const { + const double gw_c = param->c * param->c; + const double gw_d = param->d * param->d; + const double hcth = param->h - costheta; + + //printf("gw_gijk: gw_c=%f gw_d=%f hcth=%f=%f-%f\n", gw_c, gw_d, hcth, param->h, costheta); + + return param->gamma*(1.0 + gw_c/gw_d - gw_c / (gw_d + hcth*hcth)); + } + + inline double gw_gijk_d(const double costheta, + const Param * const param) const { + const double gw_c = param->c * param->c; + const double gw_d = param->d * param->d; + const double hcth = param->h - costheta; + const double numerator = -2.0 * gw_c * hcth; + const double denominator = 1.0/(gw_d + hcth*hcth); + return param->gamma*numerator*denominator*denominator; + } + + inline double vec3_dot(const double x[3], const double y[3]) const { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + inline void vec3_add(const double x[3], const double y[3], + double * const z) const { + z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + } + + inline void vec3_scale(const double k, const double x[3], + double y[3]) const { + y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + } + + inline void vec3_scaleadd(const double k, const double x[3], + const double y[3], double * const z) const { + z[0] = k*x[0]+y[0]; + z[1] = k*x[1]+y[1]; + z[2] = k*x[2]+y[2]; + } +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair style GW requires atom IDs + +This is a requirement to use the GW potential. + +E: Pair style GW requires newton pair on + +See the newton command. This is a restriction to use the GW +potential. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open GW potential file %s + +The specified GW potential file cannot be opened. Check that the +path and name are correct. + +E: Incorrect format in GW potential file + +Incorrect number of words per line in the potential file. + +E: Illegal GW parameter + +One or more of the coefficients defined in the potential file is +invalid. + +E: Potential file has duplicate entry + +The potential file for a SW or GW potential has more than +one entry for the same 3 ordered elements. + +E: Potential file is missing an entry + +The potential file for a SW or GW potential does not have a +needed entry. + +*/ diff --git a/src/MANYBODY/pair_gw_zbl.cpp b/src/MANYBODY/pair_gw_zbl.cpp new file mode 100644 index 0000000000..a4e1ccb1f9 --- /dev/null +++ b/src/MANYBODY/pair_gw_zbl.cpp @@ -0,0 +1,287 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: German Samolyuk (ORNL) + Based on PairTersoffZBL by Aidan Thompson (SNL) and David Farrell (NWU) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_gw_zbl.h" +#include "atom.h" +#include "update.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- */ + +PairGWZBL::PairGWZBL(LAMMPS *lmp) : PairGW(lmp) +{ + // hard-wired constants in metal or real units + // a0 = Bohr radius + // epsilon0 = permittivity of vacuum = q / energy-distance units + // e = unit charge + // 1 Kcal/mole = 0.043365121 eV + + if (strcmp(update->unit_style,"metal") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635; + global_e = 1.0; + } else if (strcmp(update->unit_style,"real") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635 * 0.043365121; + global_e = 1.0; + } else error->all(FLERR,"Pair gw/zbl requires metal or real units"); +} + +/* ---------------------------------------------------------------------- */ + +void PairGWZBL::read_file(char *file) +{ + int params_per_line = 21; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = maxparam = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open GW potential file %s",file); + error->one(FLERR,str); + } + } + + // read each line out of file, skipping blank lines or leading '#' + // store line of params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in GW potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].powerm = atof(words[3]); + params[nparams].gamma = atof(words[4]); + params[nparams].lam3 = atof(words[5]); + params[nparams].c = atof(words[6]); + params[nparams].d = atof(words[7]); + params[nparams].h = atof(words[8]); + params[nparams].powern = atof(words[9]); + params[nparams].beta = atof(words[10]); + params[nparams].lam2 = atof(words[11]); + params[nparams].bigb = atof(words[12]); + params[nparams].bigr = atof(words[13]); + params[nparams].bigd = atof(words[14]); + params[nparams].lam1 = atof(words[15]); + params[nparams].biga = atof(words[16]); + params[nparams].Z_i = atof(words[17]); + params[nparams].Z_j = atof(words[18]); + params[nparams].ZBLcut = atof(words[19]); + params[nparams].ZBLexpscale = atof(words[20]); + + // currently only allow m exponent of 1 or 3 + + params[nparams].powermint = int(params[nparams].powerm); + + if ( + params[nparams].lam3 < 0.0 || params[nparams].c < 0.0 || + params[nparams].d < 0.0 || params[nparams].powern < 0.0 || + params[nparams].beta < 0.0 || params[nparams].lam2 < 0.0 || + params[nparams].bigb < 0.0 || params[nparams].bigr < 0.0 || + params[nparams].bigd < 0.0 || + params[nparams].bigd > params[nparams].bigr || + params[nparams].lam3 < 0.0 || params[nparams].biga < 0.0 || + params[nparams].powerm - params[nparams].powermint != 0.0 || + (params[nparams].powermint != 3 && params[nparams].powermint != 1) || + params[nparams].gamma < 0.0 || + params[nparams].Z_i < 1.0 || params[nparams].Z_j < 1.0 || + params[nparams].ZBLcut < 0.0 || params[nparams].ZBLexpscale < 0.0) + error->all(FLERR,"Illegal GW parameter"); + + nparams++; + } + + delete [] words; +} + +/* ---------------------------------------------------------------------- */ + +void PairGWZBL::repulsive(Param *param, double rsq, double &fforce, + int eflag, double &eng) +{ + double r,tmp_fc,tmp_fc_d,tmp_exp; + + // GW repulsive portion + + r = sqrt(rsq); + tmp_fc = gw_fc(r,param); + tmp_fc_d = gw_fc_d(r,param); + tmp_exp = exp(-param->lam1 * r); + double fforce_gw = param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1); + double eng_gw = tmp_fc * param->biga * tmp_exp; + + // ZBL repulsive portion + + double esq = pow(global_e,2.0); + double a_ij = (0.8854*global_a_0) / + (pow(param->Z_i,0.23) + pow(param->Z_j,0.23)); + double premult = (param->Z_i * param->Z_j * esq)/(4.0*MY_PI*global_epsilon_0); + double r_ov_a = r/a_ij; + double phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + + 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); + double dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - + 0.9423*0.5099*exp(-0.9423*r_ov_a) - + 0.4029*0.2802*exp(-0.4029*r_ov_a) - + 0.2016*0.02817*exp(-0.2016*r_ov_a)); + double fforce_ZBL = premult*-phi/rsq + premult*dphi/r; + double eng_ZBL = premult*(1.0/r)*phi; + + // combine two parts with smoothing by Fermi-like function + + fforce = -(-F_fermi_d(r,param) * eng_ZBL + + (1.0 - F_fermi(r,param))*fforce_ZBL + + F_fermi_d(r,param)*eng_gw + F_fermi(r,param)*fforce_gw) / r; + + if (eflag) + eng = (1.0 - F_fermi(r,param))*eng_ZBL + F_fermi(r,param)*eng_gw; +} + +/* ---------------------------------------------------------------------- */ + +double PairGWZBL::gw_fa(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return -param->bigb * exp(-param->lam2 * r) * gw_fc(r,param) * + F_fermi(r,param); +} + +/* ---------------------------------------------------------------------- */ + +double PairGWZBL::gw_fa_d(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return param->bigb * exp(-param->lam2 * r) * + (param->lam2 * gw_fc(r,param) * F_fermi(r,param) - + gw_fc_d(r,param) * F_fermi(r,param) - gw_fc(r,param) * + F_fermi_d(r,param)); +} + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function +------------------------------------------------------------------------- */ + +double PairGWZBL::F_fermi(double r, Param *param) +{ + return 1.0 / (1.0 + exp(-param->ZBLexpscale*(r-param->ZBLcut))); +} + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function derivative with respect to r +------------------------------------------------------------------------- */ + +double PairGWZBL::F_fermi_d(double r, Param *param) +{ + return param->ZBLexpscale*exp(-param->ZBLexpscale*(r-param->ZBLcut)) / + pow(1.0 + exp(-param->ZBLexpscale*(r-param->ZBLcut)),2.0); +} diff --git a/src/MANYBODY/pair_gw_zbl.h b/src/MANYBODY/pair_gw_zbl.h new file mode 100644 index 0000000000..0ed7f1de56 --- /dev/null +++ b/src/MANYBODY/pair_gw_zbl.h @@ -0,0 +1,72 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gw/zbl,PairGWZBL) + +#else + +#ifndef LMP_PAIR_GW_ZBL_H +#define LMP_PAIR_GW_ZBL_H + +#include "pair_gw.h" + +namespace LAMMPS_NS { + +class PairGWZBL : public PairGW { + public: + PairGWZBL(class LAMMPS *); + ~PairGWZBL() {} + + private: + double global_a_0; // Bohr radius for Coulomb repulsion + double global_epsilon_0; // permittivity of vacuum for Coulomb repulsion + double global_e; // proton charge (negative of electron charge) + + void read_file(char *); + void repulsive(Param *, double, double &, int, double &); + + double gw_fa(double, Param *); + double gw_fa_d(double, Param *); + + double F_fermi(double, Param *); + double F_fermi_d(double, Param *); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pair GW/zbl requires metal or real units + +This is a current restriction of this pair potential. + +E: Cannot open GW potential file %s + +The specified GW potential file cannot be opened. Check that the +path and name are correct. + +E: Incorrect format in GW potential file + +Incorrect number of words per line in the potential file. + +E: Illegal GW parameter + +One or more of the coefficients defined in the potential file is +invalid. + +*/ diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index 407c980729..73758e3628 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -60,7 +60,7 @@ using namespace MathConst; // this must be lower than MAXENERGYSIGNAL // by a large amount, so that it is still // less than total energy when negative -// energy changes are added to MAXENERGYSIGNAL +// energy contributions are added to MAXENERGYSIGNAL #define MAXENERGYTEST 1.0e50 @@ -260,7 +260,7 @@ void FixGCMC::options(int narg, char **arg) grouptypebits = NULL; energy_intra = 0.0; tfac_insert = 1.0; - overlap_cutoff = 0.0; + overlap_cutoffsq = 0.0; overlap_flag = 0; int iarg = 0; @@ -366,7 +366,8 @@ void FixGCMC::options(int narg, char **arg) iarg += 2; } else if (strcmp(arg[iarg],"overlap_cutoff") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - overlap_cutoff = force->numeric(FLERR,arg[iarg+1]); + double rtmp = force->numeric(FLERR,arg[iarg+1]); + overlap_cutoffsq = rtmp*rtmp; overlap_flag = 1; iarg += 2; } else error->all(FLERR,"Illegal fix gcmc command"); @@ -701,6 +702,9 @@ void FixGCMC::pre_exchange() if (full_flag) { energy_stored = energy_full(); + if (overlap_flag && energy_stored > MAXENERGYTEST) + error->warning(FLERR,"Energy of old configuration in " + "fix gcmc is > MAXENERGYTEST."); if (mode == MOLECULE) { for (int i = 0; i < ncycles; i++) { @@ -778,6 +782,9 @@ void FixGCMC::attempt_atomic_translation() if (i >= 0) { double **x = atom->x; double energy_before = energy(i,ngcmc_type,-1,x[i]); + if (overlap_flag && energy_before > MAXENERGYTEST) + error->warning(FLERR,"Energy of old configuration in " + "fix gcmc is > MAXENERGYTEST."); double rsq = 1.1; double rx,ry,rz; rx = ry = rz = 0.0; @@ -998,6 +1005,9 @@ void FixGCMC::attempt_molecule_translation() if (translation_molecule == -1) return; double energy_before_sum = molecule_energy(translation_molecule); + if (overlap_flag && energy_before_sum > MAXENERGYTEST) + error->warning(FLERR,"Energy of old configuration in " + "fix gcmc is > MAXENERGYTEST."); double **x = atom->x; double rx,ry,rz; @@ -1095,6 +1105,9 @@ void FixGCMC::attempt_molecule_rotation() if (rotation_molecule == -1) return; double energy_before_sum = molecule_energy(rotation_molecule); + if (overlap_flag && energy_before_sum > MAXENERGYTEST) + error->warning(FLERR,"Energy of old configuration in " + "fix gcmc is > MAXENERGYTEST."); int nlocal = atom->nlocal; int *mask = atom->mask; @@ -2134,7 +2147,7 @@ double FixGCMC::energy(int i, int itype, tagint imolecule, double *coord) // if overlap check requested, if overlap, // return signal value for energy - if (overlap_flag && rsq < overlap_cutoff) + if (overlap_flag && rsq < overlap_cutoffsq) return MAXENERGYSIGNAL; if (rsq < cutsq[itype][jtype]) @@ -2170,6 +2183,8 @@ double FixGCMC::molecule_energy(tagint gas_molecule_id) double FixGCMC::energy_full() { + int imolecule; + if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); @@ -2185,14 +2200,15 @@ double FixGCMC::energy_full() // return signal value for energy if (overlap_flag) { + int overlaptestall; + int overlaptest = 0; double delx,dely,delz,rsq; double **x = atom->x; tagint *molecule = atom->molecule; int nall = atom->nlocal + atom->nghost; for (int i = 0; i < atom->nlocal; i++) { - int imolecule = molecule[i]; + if (mode == MOLECULE) imolecule = molecule[i]; for (int j = i+1; j < nall; j++) { - if (mode == MOLECULE) if (imolecule == molecule[j]) continue; @@ -2201,11 +2217,18 @@ double FixGCMC::energy_full() delz = x[i][2] - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; - if (rsq < overlap_cutoff) return MAXENERGYSIGNAL; + if (rsq < overlap_cutoffsq) { + overlaptest = 1; + break; + } } + if (overlaptest) break; } + MPI_Allreduce(&overlaptest, &overlaptestall, 1, + MPI_INT, MPI_MAX, world); + if (overlaptestall) return MAXENERGYSIGNAL; } - + // clear forces so they don't accumulate over multiple // calls within fix gcmc timestep, e.g. for fix shake diff --git a/src/MC/fix_gcmc.h b/src/MC/fix_gcmc.h index 9b2184dda2..8a5375eed7 100644 --- a/src/MC/fix_gcmc.h +++ b/src/MC/fix_gcmc.h @@ -106,13 +106,13 @@ class FixGCMC : public Fix { double xlo,xhi,ylo,yhi,zlo,zhi; double region_xlo,region_xhi,region_ylo,region_yhi,region_zlo,region_zhi; double region_volume; - double energy_stored; + double energy_stored; // full energy of old/current configuration double *sublo,*subhi; int *local_gas_list; double **cutsq; double **atom_coord; imageint imagezero; - double overlap_cutoff; + double overlap_cutoffsq; // square distance cutoff for overlap int overlap_flag; double energy_intra; @@ -214,9 +214,14 @@ W: Fix gcmc using full_energy option Fix gcmc has automatically turned on the full_energy option since it is required for systems like the one specified by the user. User input -included one or more of the following: kspace, triclinic, a hybrid -pair style, an eam pair style, or no "single" function for the pair -style. +included one or more of the following: kspace, a hybrid +pair style, an eam pair style, tail correction, +or no "single" function for the pair style. + +W: Energy of old configuration in fix gcmc is > MAXENERGYTEST. + +This probably means that a pair of atoms are closer than the +overlap cutoff distance for keyword overlap_cutoff. E: Invalid atom type in fix gcmc command diff --git a/src/MC/pair_dsmc.cpp b/src/MC/pair_dsmc.cpp index 344faf87f6..29ecde2023 100644 --- a/src/MC/pair_dsmc.cpp +++ b/src/MC/pair_dsmc.cpp @@ -230,7 +230,7 @@ void PairDSMC::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/MEAM/Install.sh b/src/MEAM/Install.sh index 1825d4327f..7bfc76c0e2 100644 --- a/src/MEAM/Install.sh +++ b/src/MEAM/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/MISC/pair_nm_cut.cpp b/src/MISC/pair_nm_cut.cpp index 467be1b7be..0163cdcf58 100644 --- a/src/MISC/pair_nm_cut.cpp +++ b/src/MISC/pair_nm_cut.cpp @@ -187,7 +187,7 @@ void PairNMCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/MISC/pair_nm_cut_coul_cut.cpp b/src/MISC/pair_nm_cut_coul_cut.cpp index 86fa09f176..5cb2452906 100644 --- a/src/MISC/pair_nm_cut_coul_cut.cpp +++ b/src/MISC/pair_nm_cut_coul_cut.cpp @@ -213,7 +213,7 @@ void PairNMCutCoulCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/MISC/pair_nm_cut_coul_long.cpp b/src/MISC/pair_nm_cut_coul_long.cpp index c186d19539..15d5d03757 100644 --- a/src/MISC/pair_nm_cut_coul_long.cpp +++ b/src/MISC/pair_nm_cut_coul_long.cpp @@ -255,7 +255,7 @@ void PairNMCutCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/MOLECULE/bond_harmonic.cpp b/src/MOLECULE/bond_harmonic.cpp index f164a51de4..0763d7d3e2 100644 --- a/src/MOLECULE/bond_harmonic.cpp +++ b/src/MOLECULE/bond_harmonic.cpp @@ -13,6 +13,7 @@ #include #include +#include #include "bond_harmonic.h" #include "atom.h" #include "neighbor.h" @@ -26,7 +27,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -BondHarmonic::BondHarmonic(LAMMPS *lmp) : Bond(lmp) {} +BondHarmonic::BondHarmonic(LAMMPS *lmp) : Bond(lmp) +{ + reinitflag = 1; +} /* ---------------------------------------------------------------------- */ @@ -196,3 +200,16 @@ double BondHarmonic::single(int type, double rsq, int i, int j, if (r > 0.0) fforce = -2.0*rk/r; return rk*dr; } + +/* ---------------------------------------------------------------------- + Return ptr to internal members upon request. +------------------------------------------------------------------------ */ +void *BondHarmonic::extract( char *str, int &dim ) +{ + dim = 1; + if( strcmp(str,"kappa")==0) return (void*) k; + if( strcmp(str,"r0")==0) return (void*) r0; + return NULL; +} + + diff --git a/src/MOLECULE/bond_harmonic.h b/src/MOLECULE/bond_harmonic.h index 7c7125b04c..a0fd24577a 100644 --- a/src/MOLECULE/bond_harmonic.h +++ b/src/MOLECULE/bond_harmonic.h @@ -36,6 +36,7 @@ class BondHarmonic : public Bond { void read_restart(FILE *); void write_data(FILE *); double single(int, double, int, int, double &); + virtual void *extract(char *, int &); protected: double *k,*r0; diff --git a/src/MOLECULE/dihedral_charmmfsh.cpp b/src/MOLECULE/dihedral_charmmfsw.cpp similarity index 95% rename from src/MOLECULE/dihedral_charmmfsh.cpp rename to src/MOLECULE/dihedral_charmmfsw.cpp index 93c1853fe5..613170bbfa 100644 --- a/src/MOLECULE/dihedral_charmmfsh.cpp +++ b/src/MOLECULE/dihedral_charmmfsw.cpp @@ -21,7 +21,7 @@ #include #include #include -#include "dihedral_charmmfsh.h" +#include "dihedral_charmmfsw.h" #include "atom.h" #include "comm.h" #include "neighbor.h" @@ -40,7 +40,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ -DihedralCharmmfsh::DihedralCharmmfsh(LAMMPS *lmp) : Dihedral(lmp) +DihedralCharmmfsw::DihedralCharmmfsw(LAMMPS *lmp) : Dihedral(lmp) { weightflag = 0; writedata = 1; @@ -48,7 +48,7 @@ DihedralCharmmfsh::DihedralCharmmfsh(LAMMPS *lmp) : Dihedral(lmp) /* ---------------------------------------------------------------------- */ -DihedralCharmmfsh::~DihedralCharmmfsh() +DihedralCharmmfsw::~DihedralCharmmfsw() { if (allocated && !copymode) { memory->destroy(setflag); @@ -63,7 +63,7 @@ DihedralCharmmfsh::~DihedralCharmmfsh() /* ---------------------------------------------------------------------- */ -void DihedralCharmmfsh::compute(int eflag, int vflag) +void DihedralCharmmfsw::compute(int eflag, int vflag) { int i1,i2,i3,i4,i,m,n,type; double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; @@ -322,7 +322,7 @@ void DihedralCharmmfsh::compute(int eflag, int vflag) /* ---------------------------------------------------------------------- */ -void DihedralCharmmfsh::allocate() +void DihedralCharmmfsw::allocate() { allocated = 1; int n = atom->ndihedraltypes; @@ -342,7 +342,7 @@ void DihedralCharmmfsh::allocate() set coeffs for one type ------------------------------------------------------------------------- */ -void DihedralCharmmfsh::coeff(int narg, char **arg) +void DihedralCharmmfsw::coeff(int narg, char **arg) { if (narg != 5) error->all(FLERR,"Incorrect args for dihedral coefficients"); if (!allocated) allocate(); @@ -384,7 +384,7 @@ void DihedralCharmmfsh::coeff(int narg, char **arg) error check and initialize all values needed for force computation ------------------------------------------------------------------------- */ -void DihedralCharmmfsh::init_style() +void DihedralCharmmfsw::init_style() { // insure use of CHARMM pair_style if any weight factors are non-zero // set local ptrs to LJ 14 arrays setup by Pair @@ -392,14 +392,14 @@ void DihedralCharmmfsh::init_style() if (weightflag) { int itmp; if (force->pair == NULL) - error->all(FLERR,"Dihedral charmmfsh is incompatible with Pair style"); + error->all(FLERR,"Dihedral charmmfsw is incompatible with Pair style"); lj14_1 = (double **) force->pair->extract("lj14_1",itmp); lj14_2 = (double **) force->pair->extract("lj14_2",itmp); lj14_3 = (double **) force->pair->extract("lj14_3",itmp); lj14_4 = (double **) force->pair->extract("lj14_4",itmp); int *ptr = (int *) force->pair->extract("implicit",itmp); if (!lj14_1 || !lj14_2 || !lj14_3 || !lj14_4 || !ptr) - error->all(FLERR,"Dihedral charmmfsh is incompatible with Pair style"); + error->all(FLERR,"Dihedral charmmfsw is incompatible with Pair style"); implicit = *ptr; } @@ -414,7 +414,7 @@ void DihedralCharmmfsh::init_style() if (p_cutcoul == NULL || p_cutljinner == NULL || p_cutlj == NULL || p_dihedflag == NULL) - error->all(FLERR,"Dihedral charmmfsh is incompatible with Pair style"); + error->all(FLERR,"Dihedral charmmfsw is incompatible with Pair style"); dihedflag = *p_dihedflag; cut_coul14 = *p_cutcoul; @@ -433,7 +433,7 @@ void DihedralCharmmfsh::init_style() proc 0 writes out coeffs to restart file ------------------------------------------------------------------------- */ -void DihedralCharmmfsh::write_restart(FILE *fp) +void DihedralCharmmfsw::write_restart(FILE *fp) { fwrite(&k[1],sizeof(double),atom->ndihedraltypes,fp); fwrite(&multiplicity[1],sizeof(int),atom->ndihedraltypes,fp); @@ -446,7 +446,7 @@ void DihedralCharmmfsh::write_restart(FILE *fp) proc 0 reads coeffs from restart file, bcasts them ------------------------------------------------------------------------- */ -void DihedralCharmmfsh::read_restart(FILE *fp) +void DihedralCharmmfsw::read_restart(FILE *fp) { allocate(); @@ -474,7 +474,7 @@ void DihedralCharmmfsh::read_restart(FILE *fp) proc 0 writes to data file ------------------------------------------------------------------------- */ -void DihedralCharmmfsh::write_data(FILE *fp) +void DihedralCharmmfsw::write_data(FILE *fp) { for (int i = 1; i <= atom->ndihedraltypes; i++) fprintf(fp,"%d %g %d %d %g\n",i,k[i],multiplicity[i],shift[i],weight[i]); diff --git a/src/MOLECULE/dihedral_charmmfsh.h b/src/MOLECULE/dihedral_charmmfsw.h similarity index 84% rename from src/MOLECULE/dihedral_charmmfsh.h rename to src/MOLECULE/dihedral_charmmfsw.h index 44ea9b2658..ab0ccf675d 100644 --- a/src/MOLECULE/dihedral_charmmfsh.h +++ b/src/MOLECULE/dihedral_charmmfsw.h @@ -13,22 +13,22 @@ #ifdef DIHEDRAL_CLASS -DihedralStyle(charmmfsh,DihedralCharmmfsh) +DihedralStyle(charmmfsw,DihedralCharmmfsw) #else -#ifndef LMP_DIHEDRAL_CHARMMFSH_H -#define LMP_DIHEDRAL_CHARMMFSH_H +#ifndef LMP_DIHEDRAL_CHARMMFSW_H +#define LMP_DIHEDRAL_CHARMMFSW_H #include #include "dihedral.h" namespace LAMMPS_NS { -class DihedralCharmmfsh : public Dihedral { +class DihedralCharmmfsw : public Dihedral { public: - DihedralCharmmfsh(class LAMMPS *); - virtual ~DihedralCharmmfsh(); + DihedralCharmmfsw(class LAMMPS *); + virtual ~DihedralCharmmfsw(); virtual void compute(int, int); virtual void coeff(int, char **); virtual void init_style(); @@ -73,9 +73,9 @@ E: Incorrect weight arg for dihedral coefficients Self-explanatory. Check the input script or data file. -E: Dihedral charmmfsh is incompatible with Pair style +E: Dihedral charmmfsw is incompatible with Pair style -Dihedral style charmmfsh must be used with a pair style charmm +Dihedral style charmmfsw must be used with a pair style charmm in order for the 1-4 epsilon/sigma parameters to be defined. */ diff --git a/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp b/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp index c75da63cae..1e34b06478 100644 --- a/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp +++ b/src/MOLECULE/pair_lj_charmmfsw_coul_charmmfsh.cpp @@ -42,6 +42,10 @@ PairLJCharmmfswCoulCharmmfsh::PairLJCharmmfswCoulCharmmfsh(LAMMPS *lmp) : implicit = 0; mix_flag = ARITHMETIC; writedata = 1; + + // short-range/long-range flag accessed by DihedralCharmmfsw + + dihedflag = 0; } /* ---------------------------------------------------------------------- */ @@ -235,10 +239,6 @@ void PairLJCharmmfswCoulCharmmfsh::settings(int narg, char **arg) } else { cut_coul = force->numeric(FLERR,arg[2]); } - - // indicates pair_style being used for dihedral_charmm - - dihedflag = 0; } /* ---------------------------------------------------------------------- @@ -393,10 +393,10 @@ void PairLJCharmmfswCoulCharmmfsh::write_restart(FILE *fp) for (j = i; j <= atom->ntypes; j++) { fwrite(&setflag[i][j],sizeof(int),1,fp); if (setflag[i][j]) { - fwrite(&epsilon[i][j],sizeof(double),1,fp); - fwrite(&sigma[i][j],sizeof(double),1,fp); - fwrite(&eps14[i][j],sizeof(double),1,fp); - fwrite(&sigma14[i][j],sizeof(double),1,fp); + fwrite(&epsilon[i][j],sizeof(double),1,fp); + fwrite(&sigma[i][j],sizeof(double),1,fp); + fwrite(&eps14[i][j],sizeof(double),1,fp); + fwrite(&sigma14[i][j],sizeof(double),1,fp); } } } @@ -535,7 +535,7 @@ void *PairLJCharmmfswCoulCharmmfsh::extract(const char *str, int &dim) dim = 0; if (strcmp(str,"implicit") == 0) return (void *) &implicit; - // info extracted by dihedral_charmmf + // info extracted by dihedral_charmmfsw if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; if (strcmp(str,"cut_lj_inner") == 0) return (void *) &cut_lj_inner; diff --git a/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp b/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp index 15f5d52961..e3093e4d10 100644 --- a/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp +++ b/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp @@ -441,7 +441,7 @@ void PairLJCutTIP4PCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } @@ -596,7 +596,7 @@ void PairLJCutTIP4PCut::write_restart(FILE *fp) for (i = 1; i <= atom->ntypes; i++) { for (j = i; j <= atom->ntypes; j++) { fwrite(&setflag[i][j],sizeof(int),1,fp); - if (setflag[i][j]){ + if (setflag[i][j]) { fwrite(&epsilon[i][j],sizeof(double),1,fp); fwrite(&sigma[i][j],sizeof(double),1,fp); fwrite(&cut_lj[i][j],sizeof(double),1,fp); diff --git a/src/MPIIO/Install.sh b/src/MPIIO/Install.sh index 3834aea5c5..902bff2fc8 100644 --- a/src/MPIIO/Install.sh +++ b/src/MPIIO/Install.sh @@ -36,7 +36,7 @@ touch ../write_restart.cpp # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package to include/exclude LMP_MPIIO setting diff --git a/src/MSCG/Install.sh b/src/MSCG/Install.sh index f7c7452101..353403c7da 100755 --- a/src/MSCG/Install.sh +++ b/src/MSCG/Install.sh @@ -25,7 +25,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/Makefile b/src/Makefile index d7e990461f..92a430a747 100644 --- a/src/Makefile +++ b/src/Makefile @@ -43,21 +43,39 @@ endif # Package variables +# PACKAGE = standard packages +# PACKUSER = user packagse +# PACKLIB = all packages that require an additional lib +# should be PACKSYS + PACKINT + PACKEXT +# PACKSYS = subset that reqiure a common system library +# include MPIIO and LB b/c require full MPI, not just STUBS +# PACKINT = subset that require an internal (provided) library +# PACKEXT = subset that require an external (downloaded) library + PACKAGE = asphere body class2 colloid compress coreshell dipole gpu \ granular kim kokkos kspace manybody mc meam misc molecule \ mpiio mscg opt peri poems \ python qeq reax replica rigid shock snap srd voronoi -PACKUSER = user-atc user-awpmd user-cg-cmm user-cgdna user-colvars \ +PACKUSER = user-atc user-awpmd user-cgdna user-cgsdk user-colvars \ user-diffraction user-dpd user-drude user-eff user-fep user-h5md \ user-intel user-lb user-manifold user-mgpt user-misc user-molfile \ - user-nc-dump user-omp user-phonon user-qmmm user-qtb \ + user-netcdf user-omp user-phonon user-qmmm user-qtb \ user-quip user-reaxc user-smd user-smtbq user-sph user-tally \ user-vtk -PACKLIB = compress gpu kim kokkos meam mpiio mscg poems python reax voronoi \ +PACKLIB = compress gpu kim kokkos meam mpiio mscg poems \ + python reax voronoi \ user-atc user-awpmd user-colvars user-h5md user-lb user-molfile \ - user-nc-dump user-qmmm user-quip user-smd user-vtk + user-netcdf user-qmmm user-quip user-smd user-vtk + +PACKSYS = compress mpiio python user-lb + +PACKINT = gpu kokkos meam poems reax user-atc user-awpmd user-colvars + +PACKEXT = kim mscg voronoi \ + user-h5md user-molfile user-netcdf user-qmmm user-quip \ + user-smd user-vtk PACKALL = $(PACKAGE) $(PACKUSER) @@ -66,6 +84,8 @@ PACKUSERUC = $(shell echo $(PACKUSER) | tr a-z A-Z) YESDIR = $(shell echo $(@:yes-%=%) | tr a-z A-Z) NODIR = $(shell echo $(@:no-%=%) | tr a-z A-Z) +LIBDIR = $(shell echo $(@:lib-%=%)) +LIBUSERDIR = $(shell echo $(@:lib-user-%=%)) # List of all targets @@ -75,9 +95,9 @@ help: @echo 'make clean-machine delete object files for one machine' @echo 'make mpi-stubs build dummy MPI library in STUBS' @echo 'make install-python install LAMMPS wrapper in Python' - @echo 'make tar create lmp_src.tar.gz of src dir and packages' + @echo 'make tar create lmp_src.tar.gz for src dir and packages' @echo '' - @echo 'make package list available packages' + @echo 'make package list available packages and their dependencies' @echo 'make package-status (ps) status of all packages' @echo 'make yes-package install a single pgk in src dir' @echo 'make no-package remove a single pkg from src dir' @@ -87,11 +107,16 @@ help: @echo 'make no-standard (no-std) remove all standard pkgs' @echo 'make yes-user install all user pkgs' @echo 'make no-user remove all user pkgs' - @echo 'make no-lib remove all pkgs with external libs' + @echo 'make yes-lib install all pkgs with libs (included or ext)' + @echo 'make no-lib remove all pkgs with libs (included or ext)' + @echo 'make yes-ext install all pkgs with external libs' + @echo 'make no-ext remove all pkgs with external libs' @echo '' @echo 'make package-update (pu) replace src files with updated package files' @echo 'make package-overwrite replace package files with src files' @echo 'make package-diff (pd) diff src files against package files' + @echo '' + @echo 'make lib-package download/build/install a package library' @echo 'make purge purge obsolete copies of source files' @echo '' @echo 'make machine build LAMMPS for machine' @@ -198,7 +223,10 @@ mpi-stubs: @cd STUBS; $(MAKE) clean; $(MAKE) # install LAMMPS shared lib and Python wrapper for Python usage +# include python package settings to +# automatically adapt name of python interpreter +sinclude ../lib/python/Makefile.lammps install-python: @$(PYTHON) ../python/install.py @@ -221,6 +249,13 @@ package: @echo '' @echo 'User-contributed packages:' $(PACKUSER) @echo '' + @echo 'Packages that need system libraries:' $(PACKSYS) + @echo '' + @echo 'Packages that need provided libraries:' $(PACKINT) + @echo '' + @echo 'Packages that need external libraries:' $(PACKEXT) + @echo '' + @echo 'make package list available packages' @echo 'make package list available packages' @echo 'make package-status (ps) status of all packages' @echo 'make yes-package install a single pgk in src dir' @@ -229,13 +264,18 @@ package: @echo 'make no-all remove all pkgs from src dir' @echo 'make yes-standard (yes-std) install all standard pkgs' @echo 'make no-standard (no-srd) remove all standard pkgs' - @echo '' @echo 'make yes-user install all user pkgs' @echo 'make no-user remove all user pkgs' - @echo 'make no-lib remove all pkgs with external libs' + @echo 'make yes-lib install all pkgs with libs (included or ext)' + @echo 'make no-lib remove all pkgs with libs (included or ext)' + @echo 'make yes-ext install all pkgs with external libs' + @echo 'make no-ext remove all pkgs with external libs' + @echo '' @echo 'make package-update (pu) replace src files with package files' @echo 'make package-overwrite replace package files with src files' @echo 'make package-diff (pd) diff src files against package file' + @echo '' + @echo 'make lib-package build and/or download a package library' yes-all: @for p in $(PACKALL); do $(MAKE) yes-$$p; done @@ -255,9 +295,18 @@ yes-user: no-user: @for p in $(PACKUSER); do $(MAKE) no-$$p; done +yes-lib: + @for p in $(PACKLIB); do $(MAKE) yes-$$p; done + no-lib: @for p in $(PACKLIB); do $(MAKE) no-$$p; done +yes-ext: + @for p in $(PACKEXT); do $(MAKE) yes-$$p; done + +no-ext: + @for p in $(PACKEXT); do $(MAKE) no-$$p; done + yes-%: @if [ ! -e Makefile.package ]; \ then cp Makefile.package.empty Makefile.package; fi @@ -288,6 +337,19 @@ no-%: $(SHELL) Depend.sh $(NODIR) 0; \ fi; +# download/build/install a package library + +lib-%: + @if [ -e ../lib/$(LIBDIR)/Install.py ]; then \ + echo "Installing lib $(@:lib-%=%)"; \ + cd ../lib/$(LIBDIR); python Install.py $(args); \ + elif [ -e ../lib/$(LIBUSERDIR)/Install.py ]; then \ + echo "Installing lib $(@:lib-user-%=%)"; \ + cd ../lib/$(LIBUSERDIR); python Install.py $(args); \ + else \ + echo "Install script for lib $(@:lib-%=%) does not exist"; \ + fi; + # status = list src files that differ from package files # update = replace src files with newer package files # overwrite = overwrite package files with newer src files diff --git a/src/POEMS/Install.sh b/src/POEMS/Install.sh index 7996f542be..be407d76f0 100644 --- a/src/POEMS/Install.sh +++ b/src/POEMS/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/PYTHON/Install.sh b/src/PYTHON/Install.sh index 3d6f71958a..9d2783ba0c 100755 --- a/src/PYTHON/Install.sh +++ b/src/PYTHON/Install.sh @@ -26,16 +26,14 @@ action () { fi } -# force rebuild of files with LMP_KOKKOS switch -# also variable so its *.d dependence on changed python_wrapper.h is rebuilt +# force rebuild of files using python header -touch ../python_wrapper.h -touch ../variable.cpp +touch ../python.h # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/PYTHON/fix_python.cpp b/src/PYTHON/fix_python.cpp new file mode 100644 index 0000000000..88a1a5088d --- /dev/null +++ b/src/PYTHON/fix_python.cpp @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Richard Berger (Temple U) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include "fix_python.h" +#include "atom.h" +#include "force.h" +#include "update.h" +#include "respa.h" +#include "error.h" +#include "python.h" +#include "python_compat.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixPython::FixPython(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg) +{ + if (narg != 6) error->all(FLERR,"Illegal fix python command"); + + nevery = force->inumeric(FLERR,arg[3]); + if (nevery <= 0) error->all(FLERR,"Illegal fix python command"); + + // ensure Python interpreter is initialized + python->init(); + + if (strcmp(arg[4],"post_force") == 0) { + selected_callback = POST_FORCE; + } else if (strcmp(arg[4],"end_of_step") == 0) { + selected_callback = END_OF_STEP; + } + + // get Python function + PyGILState_STATE gstate = PyGILState_Ensure(); + + PyObject * pyMain = PyImport_AddModule("__main__"); + + if (!pyMain) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not initialize embedded Python"); + } + + char * fname = arg[5]; + pFunc = PyObject_GetAttrString(pyMain, fname); + + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not find Python function"); + } + + PyGILState_Release(gstate); +} + +/* ---------------------------------------------------------------------- */ + +int FixPython::setmask() +{ + return selected_callback; +} + +/* ---------------------------------------------------------------------- */ + +void FixPython::end_of_step() +{ + PyGILState_STATE gstate = PyGILState_Ensure(); + + PyObject * ptr = PY_VOID_POINTER(lmp); + PyObject * arglist = Py_BuildValue("(O)", ptr); + + PyObject * result = PyEval_CallObject((PyObject*)pFunc, arglist); + Py_DECREF(arglist); + + PyGILState_Release(gstate); +} + +/* ---------------------------------------------------------------------- */ + +void FixPython::post_force(int vflag) +{ + if (update->ntimestep % nevery != 0) return; + + PyGILState_STATE gstate = PyGILState_Ensure(); + + PyObject * ptr = PY_VOID_POINTER(lmp); + PyObject * arglist = Py_BuildValue("(Oi)", ptr, vflag); + + PyObject * result = PyEval_CallObject((PyObject*)pFunc, arglist); + Py_DECREF(arglist); + + PyGILState_Release(gstate); +} diff --git a/src/python_wrapper.h b/src/PYTHON/fix_python.h similarity index 53% rename from src/python_wrapper.h rename to src/PYTHON/fix_python.h index 97d7de31ef..2e740dedcd 100644 --- a/src/python_wrapper.h +++ b/src/PYTHON/fix_python.h @@ -11,37 +11,43 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_PYTHON_WRAPPER_H -#define LMP_PYTHON_WRAPPER_H +#ifdef FIX_CLASS -// true interface to embedded Python -// used when PYTHON package is installed - -#ifdef LMP_PYTHON - -#include "python.h" +FixStyle(python,FixPython) #else -// dummy interface to PYTHON -// needed for compiling when PYTHON is not installed +#ifndef LMP_FIX_PYTHON_H +#define LMP_FIX_PYTHON_H + +#include "fix.h" namespace LAMMPS_NS { -class Python { +class FixPython : public Fix { public: - int python_exists; + FixPython(class LAMMPS *, int, char **); + virtual ~FixPython() {} + int setmask(); + virtual void end_of_step(); + virtual void post_force(int); - Python(class LAMMPS *) {python_exists = 0;} - ~Python() {} - void command(int, char **) {} - void invoke_function(int, char *) {} - int find(char *) {return -1;} - int variable_match(char *, char *, int) {return -1;} - char *long_string(int) {return NULL;} + private: + void * pFunc; + int selected_callback; }; } #endif #endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +*/ diff --git a/src/PYTHON/pair_python.cpp b/src/PYTHON/pair_python.cpp new file mode 100644 index 0000000000..384aa5a94b --- /dev/null +++ b/src/PYTHON/pair_python.cpp @@ -0,0 +1,483 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Axel Kohlmeyer and Richard Berger (Temple U) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_python.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "update.h" +#include "neigh_list.h" +#include "python.h" +#include "error.h" +#include "python_compat.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairPython::PairPython(LAMMPS *lmp) : Pair(lmp) { + respa_enable = 0; + single_enable = 1; + writedata = 0; + restartinfo = 0; + one_coeff = 1; + reinitflag = 0; + + py_potential = NULL; + skip_types = NULL; + + python->init(); + + // add current directory to PYTHONPATH + PyObject * py_path = PySys_GetObject((char *)"path"); + PyList_Append(py_path, PY_STRING_FROM_STRING(".")); + + // if LAMMPS_POTENTIALS environment variable is set, add it to PYTHONPATH as well + const char * potentials_path = getenv("LAMMPS_POTENTIALS"); + if (potentials_path != NULL) { + PyList_Append(py_path, PY_STRING_FROM_STRING(potentials_path)); + } +} + +/* ---------------------------------------------------------------------- */ + +PairPython::~PairPython() +{ + if (py_potential) Py_DECREF((PyObject*) py_potential); + delete[] skip_types; + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairPython::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // prepare access to compute_force and compute_energy functions + + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject *py_pair_instance = (PyObject *) py_potential; + PyObject *py_compute_force = PyObject_GetAttrString(py_pair_instance,"compute_force"); + if (!py_compute_force) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'compute_force' method'"); + } + if (!PyCallable_Check(py_compute_force)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'compute_force' is not callable"); + } + + PyObject *py_compute_energy = PyObject_GetAttrString(py_pair_instance,"compute_energy"); + if (!py_compute_energy) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'compute_energy' method'"); + } + if (!PyCallable_Check(py_compute_energy)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'compute_energy' is not callable"); + } + + PyObject *py_compute_args = PyTuple_New(3); + if (!py_compute_args) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not create tuple for 'compute' function arguments"); + } + + PyObject *py_rsq, *py_itype, *py_jtype, *py_value; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + py_itype = PY_INT_FROM_LONG(itype); + PyTuple_SetItem(py_compute_args,1,py_itype); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + // with hybrid/overlay we might get called for skipped types + if (skip_types[itype] || skip_types[jtype]) continue; + + py_jtype = PY_INT_FROM_LONG(jtype); + PyTuple_SetItem(py_compute_args,2,py_jtype); + + if (rsq < cutsq[itype][jtype]) { + py_rsq = PyFloat_FromDouble(rsq); + PyTuple_SetItem(py_compute_args,0,py_rsq); + py_value = PyObject_CallObject(py_compute_force,py_compute_args); + if (!py_value) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Calling 'compute_force' function failed"); + } + fpair = factor_lj*PyFloat_AsDouble(py_value); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + py_value = PyObject_CallObject(py_compute_energy,py_compute_args); + evdwl = factor_lj*PyFloat_AsDouble(py_value); + } else evdwl = 0.0; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + Py_DECREF(py_compute_args); + PyGILState_Release(gstate); + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairPython::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairPython::settings(int narg, char **arg) +{ + if (narg != 1) + error->all(FLERR,"Illegal pair_style command"); + + cut_global = force->numeric(FLERR,arg[0]); +} + +/* ---------------------------------------------------------------------- + set coeffs for all type pairs +------------------------------------------------------------------------- */ + +void PairPython::coeff(int narg, char **arg) +{ + const int ntypes = atom->ntypes; + + if (narg != 3+ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + if (!allocated) allocate(); + + // make sure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // check if python potential file exists and source it + char * full_cls_name = arg[2]; + char * lastpos = strrchr(full_cls_name, '.'); + + if (lastpos == NULL) { + error->all(FLERR,"Python pair style requires fully qualified class name"); + } + + size_t module_name_length = strlen(full_cls_name) - strlen(lastpos); + size_t cls_name_length = strlen(lastpos)-1; + + char * module_name = new char[module_name_length+1]; + char * cls_name = new char[cls_name_length+1]; + strncpy(module_name, full_cls_name, module_name_length); + module_name[module_name_length] = 0; + + strcpy(cls_name, lastpos+1); + + PyGILState_STATE gstate = PyGILState_Ensure(); + + PyObject * pModule = PyImport_ImportModule(module_name); + if (!pModule) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Loading python pair style module failure"); + } + + // create LAMMPS atom type to potential file type mapping in python class + // by calling 'lammps_pair_style.map_coeff(name,type)' + + PyObject *py_pair_type = PyObject_GetAttrString(pModule, cls_name); + if (!py_pair_type) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find pair style class in module'"); + } + + delete [] module_name; + delete [] cls_name; + + PyObject * py_pair_instance = PyObject_CallObject(py_pair_type, NULL); + if (!py_pair_instance) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not instantiate instance of pair style class'"); + } + + py_potential = (void *) py_pair_instance; + + PyObject *py_check_units = PyObject_GetAttrString(py_pair_instance,"check_units"); + if (!py_check_units) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'check_units' method'"); + } + if (!PyCallable_Check(py_check_units)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'check_units' is not callable"); + } + PyObject *py_units_args = PyTuple_New(1); + if (!py_units_args) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not create tuple for 'check_units' function arguments"); + } + + PyObject *py_name = PY_STRING_FROM_STRING(update->unit_style); + PyTuple_SetItem(py_units_args,0,py_name); + PyObject *py_value = PyObject_CallObject(py_check_units,py_units_args); + if (!py_value) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Calling 'check_units' function failed"); + } + Py_DECREF(py_units_args); + + + PyObject *py_map_coeff = PyObject_GetAttrString(py_pair_instance,"map_coeff"); + if (!py_map_coeff) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'map_coeff' method'"); + } + if (!PyCallable_Check(py_map_coeff)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'map_coeff' is not callable"); + } + + PyObject *py_map_args = PyTuple_New(2); + if (!py_map_args) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not create tuple for 'map_coeff' function arguments"); + } + + delete[] skip_types; + skip_types = new int[ntypes+1]; + skip_types[0] = 1; + for (int i = 1; i <= ntypes ; i++) { + if (strcmp(arg[2+i],"NULL") == 0) { + skip_types[i] = 1; + continue; + } else skip_types[i] = 0; + PyObject *py_type = PY_INT_FROM_LONG(i); + py_name = PY_STRING_FROM_STRING(arg[2+i]); + PyTuple_SetItem(py_map_args,0,py_name); + PyTuple_SetItem(py_map_args,1,py_type); + py_value = PyObject_CallObject(py_map_coeff,py_map_args); + if (!py_value) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Calling 'map_coeff' function failed"); + } + + for (int j = i; j <= ntypes ; j++) { + setflag[i][j] = 1; + cutsq[i][j] = cut_global*cut_global; + } + } + Py_DECREF(py_map_args); + PyGILState_Release(gstate); +} + +/* ---------------------------------------------------------------------- */ + +double PairPython::init_one(int, int) +{ + return cut_global; +} + +/* ---------------------------------------------------------------------- */ + +double PairPython::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + // with hybrid/overlay we might get called for skipped types + if (skip_types[itype] || skip_types[jtype]) { + fforce = 0.0; + return 0.0; + } + + // prepare access to compute_force and compute_energy functions + + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject *py_pair_instance = (PyObject *) py_potential; + PyObject *py_compute_force + = PyObject_GetAttrString(py_pair_instance,"compute_force"); + if (!py_compute_force) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'compute_force' method'"); + } + if (!PyCallable_Check(py_compute_force)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'compute_force' is not callable"); + } + + PyObject *py_compute_energy + = PyObject_GetAttrString(py_pair_instance,"compute_energy"); + if (!py_compute_energy) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not find 'compute_energy' method'"); + } + if (!PyCallable_Check(py_compute_energy)) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Python 'compute_energy' is not callable"); + } + + PyObject *py_rsq, *py_itype, *py_jtype, *py_value; + PyObject *py_compute_args = PyTuple_New(3); + if (!py_compute_args) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Could not create tuple for 'compute' function arguments"); + } + + py_itype = PY_INT_FROM_LONG(itype); + PyTuple_SetItem(py_compute_args,1,py_itype); + py_jtype = PY_INT_FROM_LONG(jtype); + PyTuple_SetItem(py_compute_args,2,py_jtype); + py_rsq = PyFloat_FromDouble(rsq); + PyTuple_SetItem(py_compute_args,0,py_rsq); + + py_value = PyObject_CallObject(py_compute_force,py_compute_args); + if (!py_value) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Calling 'compute_force' function failed"); + } + fforce = factor_lj*PyFloat_AsDouble(py_value); + + py_value = PyObject_CallObject(py_compute_energy,py_compute_args); + if (!py_value) { + PyErr_Print(); + PyErr_Clear(); + PyGILState_Release(gstate); + error->all(FLERR,"Calling 'compute_energy' function failed"); + } + double evdwl = factor_lj*PyFloat_AsDouble(py_value); + + Py_DECREF(py_compute_args); + PyGILState_Release(gstate); + + return evdwl; +} diff --git a/src/PYTHON/pair_python.h b/src/PYTHON/pair_python.h new file mode 100644 index 0000000000..440b39e482 --- /dev/null +++ b/src/PYTHON/pair_python.h @@ -0,0 +1,77 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Pair zero is a dummy pair interaction useful for requiring a + force cutoff distance in the absense of pair-interactions or + with hybrid/overlay if a larger force cutoff distance is required. + + This can be used in conjunction with bond/create to create bonds + that are longer than the cutoff of a given force field, or to + calculate radial distribution functions for models without + pair interactions. + +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(python,PairPython) + +#else + +#ifndef LMP_PAIR_PYTHON_H +#define LMP_PAIR_PYTHON_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairPython : public Pair { + public: + PairPython(class LAMMPS *); + virtual ~PairPython(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + double single(int, int, int, int, double, double, double, double &); + + protected: + double cut_global; + void * py_potential; + int * skip_types; + + virtual void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair cutoff < Respa interior cutoff + +One or more pairwise cutoffs are too short to use with the specified +rRESPA cutoffs. + +*/ diff --git a/src/PYTHON/python_compat.h b/src/PYTHON/python_compat.h new file mode 100644 index 0000000000..175d797ffa --- /dev/null +++ b/src/PYTHON/python_compat.h @@ -0,0 +1,33 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_PYTHON_COMPAT_H +#define LMP_PYTHON_COMPAT_H + +// Wrap API changes between Python 2 and 3 using macros +#if PY_MAJOR_VERSION == 2 +#define PY_INT_FROM_LONG(X) PyInt_FromLong(X) +#define PY_INT_AS_LONG(X) PyInt_AsLong(X) +#define PY_STRING_FROM_STRING(X) PyString_FromString(X) +#define PY_VOID_POINTER(X) PyCObject_FromVoidPtr((void *) X, NULL) +#define PY_STRING_AS_STRING(X) PyString_AsString(X) + +#elif PY_MAJOR_VERSION == 3 +#define PY_INT_FROM_LONG(X) PyLong_FromLong(X) +#define PY_INT_AS_LONG(X) PyLong_AsLong(X) +#define PY_STRING_FROM_STRING(X) PyUnicode_FromString(X) +#define PY_VOID_POINTER(X) PyCapsule_New((void *) X, NULL, NULL) +#define PY_STRING_AS_STRING(X) PyUnicode_AsUTF8(X) +#endif + +#endif diff --git a/src/PYTHON/python.cpp b/src/PYTHON/python_impl.cpp similarity index 73% rename from src/PYTHON/python.cpp rename to src/PYTHON/python_impl.cpp index 11bb848b33..55108eb8c7 100644 --- a/src/PYTHON/python.cpp +++ b/src/PYTHON/python_impl.cpp @@ -11,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Richard Berger and Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + #include #include "python.h" #include "force.h" @@ -18,6 +22,7 @@ #include "variable.h" #include "memory.h" #include "error.h" +#include "python_compat.h" using namespace LAMMPS_NS; @@ -25,43 +30,63 @@ enum{NONE,INT,DOUBLE,STRING,PTR}; #define VALUELENGTH 64 // also in variable.cpp + /* ---------------------------------------------------------------------- */ -Python::Python(LAMMPS *lmp) : Pointers(lmp) +PythonImpl::PythonImpl(LAMMPS *lmp) : Pointers(lmp) { - python_exists = 1; - - pyMain = NULL; - // pfuncs stores interface info for each Python function nfunc = 0; pfuncs = NULL; + + // one-time initialization of Python interpreter + // pyMain stores pointer to main module + external_interpreter = Py_IsInitialized(); + + Py_Initialize(); + PyEval_InitThreads(); + + PyGILState_STATE gstate = PyGILState_Ensure(); + + PyObject *pModule = PyImport_AddModule("__main__"); + if (!pModule) error->all(FLERR,"Could not initialize embedded Python"); + + pyMain = (void *) pModule; + PyGILState_Release(gstate); } /* ---------------------------------------------------------------------- */ -Python::~Python() +PythonImpl::~PythonImpl() { - // clean up + if(pyMain) { + // clean up + PyGILState_STATE gstate = PyGILState_Ensure(); - for (int i = 0; i < nfunc; i++) { - delete [] pfuncs[i].name; - deallocate(i); - PyObject *pFunc = (PyObject *) pfuncs[i].pFunc; - Py_XDECREF(pFunc); + for (int i = 0; i < nfunc; i++) { + delete [] pfuncs[i].name; + deallocate(i); + PyObject *pFunc = (PyObject *) pfuncs[i].pFunc; + Py_XDECREF(pFunc); + } + + // shutdown Python interpreter + + if (!external_interpreter) { + Py_Finalize(); + } + else { + PyGILState_Release(gstate); + } } - // shutdown Python interpreter - - if (pyMain) Py_Finalize(); - memory->sfree(pfuncs); } /* ---------------------------------------------------------------------- */ -void Python::command(int narg, char **arg) +void PythonImpl::command(int narg, char **arg) { if (narg < 2) error->all(FLERR,"Invalid python command"); @@ -83,6 +108,23 @@ void Python::command(int narg, char **arg) return; } + // if source is only keyword, execute the python code + + if (narg == 3 && strcmp(arg[1],"source") == 0) { + int err; + + FILE *fp = fopen(arg[2],"r"); + if (fp == NULL) + err = execute_string(arg[2]); + else + err = execute_file(arg[2]); + + if (fp) fclose(fp); + if (err) error->all(FLERR,"Could not process Python source command"); + + return; + } + // parse optional args, invoke is not allowed in this mode ninput = noutput = 0; @@ -146,29 +188,7 @@ void Python::command(int narg, char **arg) int ifunc = create_entry(arg[0]); - // one-time initialization of Python interpreter - // Py_SetArgv() enables finding of *.py module files in current dir - // only needed for module load, not for direct file read into __main__ - // pymain stores pointer to main module - - if (pyMain == NULL) { - if (Py_IsInitialized()) - error->all(FLERR,"Cannot embed Python when also " - "extending Python with LAMMPS"); - Py_Initialize(); - - //char *arg = (char *) "./lmp"; - //PySys_SetArgv(1,&arg); - - //PyObject *pName = PyString_FromString("__main__"); - //if (!pName) errorX->all(FLERR,"Bad pName"); - //PyObject *pModule = PyImport_Import(pName); - //Py_DECREF(pName); - - PyObject *pModule = PyImport_AddModule("__main__"); - if (!pModule) error->all(FLERR,"Could not initialize embedded Python"); - pyMain = (void *) pModule; - } + PyGILState_STATE gstate = PyGILState_Ensure(); // send Python code to Python interpreter // file: read the file via PyRun_SimpleFile() @@ -177,22 +197,44 @@ void Python::command(int narg, char **arg) if (pyfile) { FILE *fp = fopen(pyfile,"r"); - if (fp == NULL) error->all(FLERR,"Could not open Python file"); + + if (fp == NULL) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not open Python file"); + } + int err = PyRun_SimpleFile(fp,pyfile); - if (err) error->all(FLERR,"Could not process Python file"); + + if (err) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not process Python file"); + } + fclose(fp); } else if (herestr) { int err = PyRun_SimpleString(herestr); - if (err) error->all(FLERR,"Could not process Python string"); + + if (err) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not process Python string"); + } } // pFunc = function object for requested function PyObject *pModule = (PyObject *) pyMain; PyObject *pFunc = PyObject_GetAttrString(pModule,pfuncs[ifunc].name); - if (!pFunc) error->all(FLERR,"Could not find Python function"); - if (!PyCallable_Check(pFunc)) + + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not find Python function"); + } + + if (!PyCallable_Check(pFunc)) { + PyGILState_Release(gstate); error->all(FLERR,"Python function is not callable"); + } + pfuncs[ifunc].pFunc = (void *) pFunc; // clean-up input storage @@ -200,12 +242,14 @@ void Python::command(int narg, char **arg) delete [] istr; delete [] format; delete [] pyfile; + PyGILState_Release(gstate); } /* ------------------------------------------------------------------ */ -void Python::invoke_function(int ifunc, char *result) +void PythonImpl::invoke_function(int ifunc, char *result) { + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject *pValue; char *str; @@ -215,33 +259,54 @@ void Python::invoke_function(int ifunc, char *result) int ninput = pfuncs[ifunc].ninput; PyObject *pArgs = PyTuple_New(ninput); - if (!pArgs) error->all(FLERR,"Could not create Python function arguments"); + + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR,"Could not create Python function arguments"); + } for (int i = 0; i < ninput; i++) { int itype = pfuncs[ifunc].itype[i]; if (itype == INT) { if (pfuncs[ifunc].ivarflag[i]) { str = input->variable->retrieve(pfuncs[ifunc].svalue[i]); - if (!str) + + if (!str) { + PyGILState_Release(gstate); error->all(FLERR,"Could not evaluate Python function input variable"); - pValue = PyInt_FromLong(atoi(str)); - } else pValue = PyInt_FromLong(pfuncs[ifunc].ivalue[i]); + } + + pValue = PY_INT_FROM_LONG(atoi(str)); + } else { + pValue = PY_INT_FROM_LONG(pfuncs[ifunc].ivalue[i]); + } } else if (itype == DOUBLE) { if (pfuncs[ifunc].ivarflag[i]) { str = input->variable->retrieve(pfuncs[ifunc].svalue[i]); - if (!str) + + if (!str) { + PyGILState_Release(gstate); error->all(FLERR,"Could not evaluate Python function input variable"); + } + pValue = PyFloat_FromDouble(atof(str)); - } else pValue = PyFloat_FromDouble(pfuncs[ifunc].dvalue[i]); + } else { + pValue = PyFloat_FromDouble(pfuncs[ifunc].dvalue[i]); + } } else if (itype == STRING) { if (pfuncs[ifunc].ivarflag[i]) { str = input->variable->retrieve(pfuncs[ifunc].svalue[i]); - if (!str) + if (!str) { + PyGILState_Release(gstate); error->all(FLERR,"Could not evaluate Python function input variable"); - pValue = PyString_FromString(str); - } else pValue = PyString_FromString(pfuncs[ifunc].svalue[i]); + } + + pValue = PY_STRING_FROM_STRING(str); + } else { + pValue = PY_STRING_FROM_STRING(pfuncs[ifunc].svalue[i]); + } } else if (itype == PTR) { - pValue = PyCObject_FromVoidPtr((void *) lmp,NULL); + pValue = PY_VOID_POINTER(lmp); } PyTuple_SetItem(pArgs,i,pValue); } @@ -250,7 +315,12 @@ void Python::invoke_function(int ifunc, char *result) // error check with one() since only some procs may fail pValue = PyObject_CallObject(pFunc,pArgs); - if (!pValue) error->one(FLERR,"Python function evaluation failed"); + + if (!pValue) { + PyGILState_Release(gstate); + error->one(FLERR,"Python function evaluation failed"); + } + Py_DECREF(pArgs); // function returned a value @@ -260,22 +330,24 @@ void Python::invoke_function(int ifunc, char *result) if (pfuncs[ifunc].noutput) { int otype = pfuncs[ifunc].otype; if (otype == INT) { - sprintf(result,"%ld",PyInt_AsLong(pValue)); + sprintf(result,"%ld",PY_INT_AS_LONG(pValue)); } else if (otype == DOUBLE) { sprintf(result,"%.15g",PyFloat_AsDouble(pValue)); } else if (otype == STRING) { - char *pystr = PyString_AsString(pValue); + char *pystr = PY_STRING_AS_STRING(pValue); if (pfuncs[ifunc].longstr) strncpy(pfuncs[ifunc].longstr,pystr,pfuncs[ifunc].length_longstr); else strncpy(result,pystr,VALUELENGTH-1); } Py_DECREF(pValue); } + + PyGILState_Release(gstate); } /* ------------------------------------------------------------------ */ -int Python::find(char *name) +int PythonImpl::find(char *name) { for (int i = 0; i < nfunc; i++) if (strcmp(name,pfuncs[i].name) == 0) return i; @@ -284,7 +356,7 @@ int Python::find(char *name) /* ------------------------------------------------------------------ */ -int Python::variable_match(char *name, char *varname, int numeric) +int PythonImpl::variable_match(char *name, char *varname, int numeric) { int ifunc = find(name); if (ifunc < 0) return -1; @@ -296,14 +368,14 @@ int Python::variable_match(char *name, char *varname, int numeric) /* ------------------------------------------------------------------ */ -char *Python::long_string(int ifunc) +char *PythonImpl::long_string(int ifunc) { return pfuncs[ifunc].longstr; } /* ------------------------------------------------------------------ */ -int Python::create_entry(char *name) +int PythonImpl::create_entry(char *name) { // ifunc = index to entry by name in pfuncs vector, can be old or new // free old vectors if overwriting old pfunc @@ -411,9 +483,35 @@ int Python::create_entry(char *name) return ifunc; } +/* ---------------------------------------------------------------------- */ + +int PythonImpl::execute_string(char *cmd) +{ + PyGILState_STATE gstate = PyGILState_Ensure(); + int err = PyRun_SimpleString(cmd); + PyGILState_Release(gstate); + + return err; +} + +/* ---------------------------------------------------------------------- */ + +int PythonImpl::execute_file(char *fname) +{ + FILE *fp = fopen(fname,"r"); + if (fp == NULL) return -1; + + PyGILState_STATE gstate = PyGILState_Ensure(); + int err = PyRun_SimpleFile(fp,fname); + PyGILState_Release(gstate); + + if (fp) fclose(fp); + return err; +} + /* ------------------------------------------------------------------ */ -void Python::deallocate(int i) +void PythonImpl::deallocate(int i) { delete [] pfuncs[i].itype; delete [] pfuncs[i].ivarflag; diff --git a/src/PYTHON/python_impl.h b/src/PYTHON/python_impl.h new file mode 100644 index 0000000000..efe43edbd8 --- /dev/null +++ b/src/PYTHON/python_impl.h @@ -0,0 +1,132 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_PYTHON_IMPL_H +#define LMP_PYTHON_IMPL_H + +#include "pointers.h" + +namespace LAMMPS_NS { + +class PythonImpl : protected Pointers, public PythonInterface { + public: + bool external_interpreter; + + PythonImpl(class LAMMPS *); + ~PythonImpl(); + void command(int, char **); + void invoke_function(int, char *); + int find(char *); + int variable_match(char *, char *, int); + char *long_string(int); + int execute_string(char *); + int execute_file(char *); + + private: + int ninput,noutput,length_longstr; + char **istr; + char *ostr,*format; + void *pyMain; + + struct PyFunc { + char *name; + int ninput,noutput; + int *itype,*ivarflag; + int *ivalue; + double *dvalue; + char **svalue; + int otype; + char *ovarname; + char *longstr; + int length_longstr; + void *pFunc; + }; + + PyFunc *pfuncs; + int nfunc; + + int create_entry(char *); + void deallocate(int); +}; + +} + +#endif + +/* ERROR/WARNING messages: + +E: Invalid python command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Python invoke of undefined function + +Cannot invoke a function that has not been previously defined. + +E: Python variable does not match Python function + +This matching is defined by the python-style variable and the python +command. + +E: Cannot embed Python when also extending Python with LAMMPS + +When running LAMMPS via Python through the LAMMPS library interface +you cannot also user the input script python command. + +E: Could not initialize embedded Python + +The main module in Python was not accessible. + +E: Could not open Python file + +The specified file of Python code cannot be opened. Check that the +path and name are correct. + +E: Could not process Python file + +The Python code in the specified file was not run successfully by +Python, probably due to errors in the Python code. + +E: Could not process Python string + +The Python code in the here string was not run successfully by Python, +probably due to errors in the Python code. + +E: Could not find Python function + +The provided Python code was run successfully, but it not +define a callable function with the required name. + +E: Python function is not callable + +The provided Python code was run successfully, but it not +define a callable function with the required name. + +E: Could not create Python function arguments + +This is an internal Python error, possibly because the number +of inputs to the function is too large. + +E: Could not evaluate Python function input variable + +Self-explanatory. + +E: Python function evaluation failed + +The Python function did not run successfully and/or did not return a +value (if it is supposed to return a value). This is probably due to +some error condition in the function. + +*/ diff --git a/src/Purge.list b/src/Purge.list index 772961bbdf..fbc4de9fcd 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -19,6 +19,33 @@ style_neigh_stencil.h # deleted on 01 Mar 2017 npair_halffull_newton_ssa.cpp npair_halffull_newton_ssa.h +# deleted on 4 May 2017 +dump_custom_vtk.cpp +dump_custom_vtk.h +dump_nc.cpp +dump_nc.h +dump_nc_mpiio.cpp +dump_nc_mpiio.h +pair_reax_c.cpp +pair_reax_c.h +fix_reax_c.cpp +fix_reax_c.h +fix_reax_c_bonds.cpp +fix_reax_c_bonds.h +fix_reax_c_species.cpp +fix_reax_c_species.h +pair_reax_c_kokkos.cpp +pair_reax_c_kokkos.h +fix_reax_c_bonds_kokkos.cpp +fix_reax_c_bonds_kokkos.h +fix_reax_c_species_kokkos.cpp +fix_reax_c_species_kokkos.h +# deleted on 19 April 2017 +vmdplugin.h +molfile_plugin.h +# deleted on 13 April 2017 +dihedral_charmmfsh.cpp +dihedral_charmmfsh.h # deleted on ## XXX 2016 accelerator_intel.h neigh_bond.cpp diff --git a/src/QEQ/fix_qeq.cpp b/src/QEQ/fix_qeq.cpp index 7e8db7632c..c5b566eef7 100644 --- a/src/QEQ/fix_qeq.cpp +++ b/src/QEQ/fix_qeq.cpp @@ -286,9 +286,6 @@ void FixQEq::setup_pre_force(int vflag) if (force->newton_pair == 0) error->all(FLERR,"QEQ with 'newton pair off' not supported"); - // should not be needed - // neighbor->build_one(list); - deallocate_storage(); allocate_storage(); diff --git a/src/QEQ/fix_qeq_point.cpp b/src/QEQ/fix_qeq_point.cpp index 9af70a445a..63d20ad911 100644 --- a/src/QEQ/fix_qeq_point.cpp +++ b/src/QEQ/fix_qeq_point.cpp @@ -58,7 +58,7 @@ void FixQEqPoint::init() neighbor->requests[irequest]->full = 1; int ntypes = atom->ntypes; - memory->create(shld,ntypes+1,ntypes+1,"qeq:shileding"); + memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); if (strstr(update->integrate_style,"respa")) nlevels_respa = ((Respa *) update->integrate)->nlevels; diff --git a/src/REAX/Install.sh b/src/REAX/Install.sh index f0083810e9..bf8c8dbca2 100644 --- a/src/REAX/Install.sh +++ b/src/REAX/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/REAX/fix_reax_bonds.cpp b/src/REAX/fix_reax_bonds.cpp index 2574d991ae..00cfb0937b 100644 --- a/src/REAX/fix_reax_bonds.cpp +++ b/src/REAX/fix_reax_bonds.cpp @@ -49,7 +49,21 @@ FixReaxBonds::FixReaxBonds(LAMMPS *lmp, int narg, char **arg) : if (nevery < 1) error->all(FLERR,"Illegal fix reax/bonds command"); if (me == 0) { - fp = fopen(arg[4],"w"); + char *suffix = strrchr(arg[4],'.'); + if (suffix && strcmp(suffix,".gz") == 0) { +#ifdef LAMMPS_GZIP + char gzip[128]; + sprintf(gzip,"gzip -6 > %s",arg[4]); +#ifdef _WIN32 + fp = _popen(gzip,"wb"); +#else + fp = popen(gzip,"w"); +#endif +#else + error->one(FLERR,"Cannot open gzipped file"); +#endif + } else fp = fopen(arg[4],"w"); + if (fp == NULL) { char str[128]; sprintf(str,"Cannot open fix reax/bonds file %s",arg[4]); diff --git a/src/REPLICA/compute_event_displace.cpp b/src/REPLICA/compute_event_displace.cpp index 1431fc202e..330e2ebca3 100644 --- a/src/REPLICA/compute_event_displace.cpp +++ b/src/REPLICA/compute_event_displace.cpp @@ -84,7 +84,7 @@ void ComputeEventDisplace::init() } /* ---------------------------------------------------------------------- - return non-zero if an atom has moved > displace_dist since last event + return non-zero if any atom has moved > displace_dist since last event ------------------------------------------------------------------------- */ double ComputeEventDisplace::compute_scalar() @@ -145,6 +145,62 @@ double ComputeEventDisplace::compute_scalar() return scalar; } +/* ---------------------------------------------------------------------- + return count of atoms that have moved > displace_dist since last event +------------------------------------------------------------------------- */ + +int ComputeEventDisplace::all_events() +{ + invoked_scalar = update->ntimestep; + + if (id_event == NULL) return 0.0; + + int event = 0; + double **xevent = fix_event->array_atom; + + double **x = atom->x; + int *mask = atom->mask; + imageint *image = atom->image; + int nlocal = atom->nlocal; + + double *h = domain->h; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + int xbox,ybox,zbox; + double dx,dy,dz,rsq; + + if (triclinic == 0) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + xbox = (image[i] & IMGMASK) - IMGMAX; + ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; + zbox = (image[i] >> IMG2BITS) - IMGMAX; + dx = x[i][0] + xbox*xprd - xevent[i][0]; + dy = x[i][1] + ybox*yprd - xevent[i][1]; + dz = x[i][2] + zbox*zprd - xevent[i][2]; + rsq = dx*dx + dy*dy + dz*dz; + if (rsq >= displace_distsq) event++; + } + } else { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + xbox = (image[i] & IMGMASK) - IMGMAX; + ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; + zbox = (image[i] >> IMG2BITS) - IMGMAX; + dx = x[i][0] + h[0]*xbox + h[5]*ybox + h[4]*zbox - xevent[i][0]; + dy = x[i][1] + h[1]*ybox + h[3]*zbox - xevent[i][1]; + dz = x[i][2] + h[2]*zbox - xevent[i][2]; + rsq = dx*dx + dy*dy + dz*dz; + if (rsq >= displace_distsq) event++; + } + } + + int allevents; + MPI_Allreduce(&event,&allevents,1,MPI_INT,MPI_SUM,world); + + return allevents; +} /* ---------------------------------------------------------------------- */ diff --git a/src/REPLICA/compute_event_displace.h b/src/REPLICA/compute_event_displace.h index c545c696a4..602f3c4b76 100644 --- a/src/REPLICA/compute_event_displace.h +++ b/src/REPLICA/compute_event_displace.h @@ -30,8 +30,11 @@ class ComputeEventDisplace : public Compute { ~ComputeEventDisplace(); void init(); double compute_scalar(); + + int all_events(); void reset_extra_compute_fix(const char *); + private: int triclinic; double displace_distsq; diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 1fe704efb0..5c993ee859 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -1419,12 +1419,14 @@ void FixShake::shake(int m) domain->minimum_image(r01); // s01 = distance vec after unconstrained update, with PBC + // use Domain::minimum_image_once(), not minimum_image() + // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = xshake[i0][0] - xshake[i1][0]; s01[1] = xshake[i0][1] - xshake[i1][1]; s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); + domain->minimum_image_once(s01); // scalar distances between atoms @@ -1526,18 +1528,20 @@ void FixShake::shake3(int m) domain->minimum_image(r02); // s01,s02 = distance vec after unconstrained update, with PBC + // use Domain::minimum_image_once(), not minimum_image() + // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = xshake[i0][0] - xshake[i1][0]; s01[1] = xshake[i0][1] - xshake[i1][1]; s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); + domain->minimum_image_once(s01); double s02[3]; s02[0] = xshake[i0][0] - xshake[i2][0]; s02[1] = xshake[i0][1] - xshake[i2][1]; s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); + domain->minimum_image_once(s02); // scalar distances between atoms @@ -1699,24 +1703,26 @@ void FixShake::shake4(int m) domain->minimum_image(r03); // s01,s02,s03 = distance vec after unconstrained update, with PBC + // use Domain::minimum_image_once(), not minimum_image() + // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = xshake[i0][0] - xshake[i1][0]; s01[1] = xshake[i0][1] - xshake[i1][1]; s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); + domain->minimum_image_once(s01); double s02[3]; s02[0] = xshake[i0][0] - xshake[i2][0]; s02[1] = xshake[i0][1] - xshake[i2][1]; s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); + domain->minimum_image_once(s02); double s03[3]; s03[0] = xshake[i0][0] - xshake[i3][0]; s03[1] = xshake[i0][1] - xshake[i3][1]; s03[2] = xshake[i0][2] - xshake[i3][2]; - domain->minimum_image(s03); + domain->minimum_image_once(s03); // scalar distances between atoms @@ -1941,24 +1947,26 @@ void FixShake::shake3angle(int m) domain->minimum_image(r12); // s01,s02,s12 = distance vec after unconstrained update, with PBC + // use Domain::minimum_image_once(), not minimum_image() + // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = xshake[i0][0] - xshake[i1][0]; s01[1] = xshake[i0][1] - xshake[i1][1]; s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); + domain->minimum_image_once(s01); double s02[3]; s02[0] = xshake[i0][0] - xshake[i2][0]; s02[1] = xshake[i0][1] - xshake[i2][1]; s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); + domain->minimum_image_once(s02); double s12[3]; s12[0] = xshake[i1][0] - xshake[i2][0]; s12[1] = xshake[i1][1] - xshake[i2][1]; s12[2] = xshake[i1][2] - xshake[i2][2]; - domain->minimum_image(s12); + domain->minimum_image_once(s12); // scalar distances between atoms @@ -2055,6 +2063,7 @@ void FixShake::shake3angle(int m) double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new; while (!done && niter < max_iter) { + quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_1212 * lamda12*lamda12 + diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp index ad934535ab..cba6fae9b7 100644 --- a/src/SNAP/compute_sna_atom.cpp +++ b/src/SNAP/compute_sna_atom.cpp @@ -48,7 +48,8 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; - bzeroflag = 0; + bzeroflag = 1; + quadraticflag = 0; // offset by 1 to match up with types @@ -106,6 +107,11 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Illegal compute sna/atom command"); bzeroflag = atoi(arg[iarg+1]); iarg += 2; + } else if (strcmp(arg[iarg],"quadraticflag") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute sna/atom command"); + quadraticflag = atoi(arg[iarg+1]); + iarg += 2; } else error->all(FLERR,"Illegal compute sna/atom command"); } @@ -122,8 +128,9 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : } ncoeff = snaptr[0]->ncoeff; - peratom_flag = 1; size_peratom_cols = ncoeff; + if (quadraticflag) size_peratom_cols += ncoeff*ncoeff; + peratom_flag = 1; nmax = 0; njmax = 0; @@ -264,8 +271,16 @@ void ComputeSNAAtom::compute_peratom() snaptr[tid]->copy_bi2bvec(); for (int icoeff = 0; icoeff < ncoeff; icoeff++) sna[i][icoeff] = snaptr[tid]->bvec[icoeff]; + if (quadraticflag) { + int ncount = ncoeff; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bi = snaptr[tid]->bvec[icoeff]; + for (int jcoeff = 0; jcoeff < ncoeff; jcoeff++) + sna[i][ncount++] = bi*snaptr[tid]->bvec[jcoeff]; + } + } } else { - for (int icoeff = 0; icoeff < ncoeff; icoeff++) + for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) sna[i][icoeff] = 0.0; } } diff --git a/src/SNAP/compute_sna_atom.h b/src/SNAP/compute_sna_atom.h index af62d7cf3b..b22eea71b5 100644 --- a/src/SNAP/compute_sna_atom.h +++ b/src/SNAP/compute_sna_atom.h @@ -44,7 +44,7 @@ class ComputeSNAAtom : public Compute { double *wjelem; class SNA** snaptr; double cutmax; - + int quadraticflag; }; } diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index 73452427bd..39f34dd8cd 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -48,9 +48,11 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; - bzeroflag = 0; + bzeroflag = 1; + quadraticflag = 0; // process required arguments + memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types memory->create(wjelem,ntypes+1,"sna/atom:wjelem"); rcutfac = atof(arg[3]); @@ -60,11 +62,15 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : radelem[i+1] = atof(arg[6+i]); for(int i = 0; i < ntypes; i++) wjelem[i+1] = atof(arg[6+ntypes+i]); + // construct cutsq + double cut; + cutmax = 0.0; memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq"); for(int i = 1; i <= ntypes; i++) { cut = 2.0*radelem[i]*rcutfac; + if (cut > cutmax) cutmax = cut; cutsq[i][i] = cut*cut; for(int j = i+1; j <= ntypes; j++) { cut = (radelem[i]+radelem[j])*rcutfac; @@ -94,6 +100,11 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Illegal compute snad/atom command"); switchflag = atoi(arg[iarg+1]); iarg += 2; + } else if (strcmp(arg[iarg],"quadraticflag") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute snad/atom command"); + quadraticflag = atoi(arg[iarg+1]); + iarg += 2; } else error->all(FLERR,"Illegal compute snad/atom command"); } @@ -110,9 +121,19 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : } ncoeff = snaptr[0]->ncoeff; - peratom_flag = 1; - size_peratom_cols = 3*ncoeff*atom->ntypes; + twoncoeff = 2*ncoeff; + threencoeff = 3*ncoeff; + size_peratom_cols = threencoeff*atom->ntypes; + if (quadraticflag) { + ncoeffsq = ncoeff*ncoeff; + twoncoeffsq = 2*ncoeffsq; + threencoeffsq = 3*ncoeffsq; + size_peratom_cols += + threencoeffsq*atom->ntypes; + } comm_reverse = size_peratom_cols; + peratom_flag = 1; + nmax = 0; njmax = 0; snad = NULL; @@ -136,10 +157,9 @@ void ComputeSNADAtom::init() { if (force->pair == NULL) error->all(FLERR,"Compute snad/atom requires a pair style be defined"); - // TODO: Not sure what to do with this error check since cutoff radius is not - // a single number - //if (sqrt(cutsq) > force->pair->cutforce) - //error->all(FLERR,"Compute snad/atom cutoff is longer than pairwise cutoff"); + + if (cutmax > force->pair->cutforce) + error->all(FLERR,"Compute sna/atom cutoff is longer than pairwise cutoff"); // need an occasional full neighbor list @@ -228,7 +248,9 @@ void ComputeSNADAtom::compute_peratom() const int* const jlist = firstneigh[i]; const int jnum = numneigh[i]; - const int typeoffset = 3*ncoeff*(atom->type[i]-1); + const int typeoffset = threencoeff*(atom->type[i]-1); + const int quadraticoffset = threencoeff*atom->ntypes + + threencoeffsq*(atom->type[i]-1); // insure rij, inside, and typej are of size jnum @@ -262,7 +284,11 @@ void ComputeSNADAtom::compute_peratom() snaptr[tid]->compute_ui(ninside); snaptr[tid]->compute_zi(); - + if (quadraticflag) { + snaptr[tid]->compute_bi(); + snaptr[tid]->copy_bi2bvec(); + } + for (int jj = 0; jj < ninside; jj++) { const int j = snaptr[tid]->inside[jj]; snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj], @@ -279,11 +305,38 @@ void ComputeSNADAtom::compute_peratom() for (int icoeff = 0; icoeff < ncoeff; icoeff++) { snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0]; snadi[icoeff+ncoeff] += snaptr[tid]->dbvec[icoeff][1]; - snadi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2]; + snadi[icoeff+twoncoeff] += snaptr[tid]->dbvec[icoeff][2]; snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]; snadj[icoeff+ncoeff] -= snaptr[tid]->dbvec[icoeff][1]; - snadj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2]; + snadj[icoeff+twoncoeff] -= snaptr[tid]->dbvec[icoeff][2]; } + + if (quadraticflag) { + double *snadi = snad[i]+quadraticoffset; + double *snadj = snad[j]+quadraticoffset; + int ncount = 0; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bi = snaptr[tid]->bvec[icoeff]; + double bix = snaptr[tid]->dbvec[icoeff][0]; + double biy = snaptr[tid]->dbvec[icoeff][1]; + double biz = snaptr[tid]->dbvec[icoeff][2]; + for (int jcoeff = 0; jcoeff < ncoeff; jcoeff++) { + double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] + + bix*snaptr[tid]->bvec[jcoeff]; + double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] + + biy*snaptr[tid]->bvec[jcoeff]; + double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] + + biz*snaptr[tid]->bvec[jcoeff]; + snadi[ncount] += dbxtmp; + snadi[ncount+ncoeffsq] += dbytmp; + snadi[ncount+twoncoeffsq] += dbztmp; + snadj[ncount] -= dbxtmp; + snadj[ncount+ncoeffsq] -= dbytmp; + snadj[ncount+twoncoeffsq] -= dbztmp; + ncount++; + } + } + } } } } @@ -331,7 +384,8 @@ double ComputeSNADAtom::memory_usage() double bytes = nmax*size_peratom_cols * sizeof(double); bytes += 3*njmax*sizeof(double); bytes += njmax*sizeof(int); - bytes += ncoeff*3; + bytes += threencoeff*atom->ntypes; + if (quadraticflag) bytes += threencoeffsq*atom->ntypes; bytes += snaptr[0]->memory_usage()*comm->nthreads; return bytes; } diff --git a/src/SNAP/compute_snad_atom.h b/src/SNAP/compute_snad_atom.h index 31f5bf252d..0d5a369ab6 100644 --- a/src/SNAP/compute_snad_atom.h +++ b/src/SNAP/compute_snad_atom.h @@ -37,7 +37,7 @@ class ComputeSNADAtom : public Compute { private: int nmax, njmax, diagonalstyle; - int ncoeff; + int ncoeff, twoncoeff, threencoeff, ncoeffsq, twoncoeffsq, threencoeffsq; double **cutsq; class NeighList *list; double **snad; @@ -45,7 +45,8 @@ class ComputeSNADAtom : public Compute { double *radelem; double *wjelem; class SNA** snaptr; - + double cutmax; + int quadraticflag; }; } diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index f75b02fba7..0d21d16561 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -38,8 +38,6 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : radelem = NULL; wjelem = NULL; - nvirial = 6; - int ntypes = atom->ntypes; int nargmin = 6+2*ntypes; @@ -50,9 +48,11 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; - bzeroflag = 0; + bzeroflag = 1; + quadraticflag = 0; // process required arguments + memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types memory->create(wjelem,ntypes+1,"sna/atom:wjelem"); rcutfac = atof(arg[3]); @@ -96,6 +96,11 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Illegal compute snav/atom command"); switchflag = atoi(arg[iarg+1]); iarg += 2; + } else if (strcmp(arg[iarg],"quadraticflag") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute snav/atom command"); + quadraticflag = atoi(arg[iarg+1]); + iarg += 2; } else error->all(FLERR,"Illegal compute snav/atom command"); } @@ -112,9 +117,24 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : } ncoeff = snaptr[0]->ncoeff; - peratom_flag = 1; - size_peratom_cols = nvirial*ncoeff*atom->ntypes; + twoncoeff = 2*ncoeff; + threencoeff = 3*ncoeff; + fourncoeff = 4*ncoeff; + fivencoeff = 5*ncoeff; + sixncoeff = 6*ncoeff; + size_peratom_cols = sixncoeff*atom->ntypes; + if (quadraticflag) { + ncoeffsq = ncoeff*ncoeff; + twoncoeffsq = 2*ncoeffsq; + threencoeffsq = 3*ncoeffsq; + fourncoeffsq = 4*ncoeffsq; + fivencoeffsq = 5*ncoeffsq; + sixncoeffsq = 6*ncoeffsq; + size_peratom_cols += + sixncoeffsq*atom->ntypes; + } comm_reverse = size_peratom_cols; + peratom_flag = 1; nmax = 0; njmax = 0; @@ -231,7 +251,9 @@ void ComputeSNAVAtom::compute_peratom() const int* const jlist = firstneigh[i]; const int jnum = numneigh[i]; - const int typeoffset = nvirial*ncoeff*(atom->type[i]-1); + const int typeoffset = sixncoeff*(atom->type[i]-1); + const int quadraticoffset = sixncoeff*atom->ntypes + + sixncoeffsq*(atom->type[i]-1); // insure rij, inside, and typej are of size jnum @@ -265,6 +287,10 @@ void ComputeSNAVAtom::compute_peratom() snaptr[tid]->compute_ui(ninside); snaptr[tid]->compute_zi(); + if (quadraticflag) { + snaptr[tid]->compute_bi(); + snaptr[tid]->copy_bi2bvec(); + } for (int jj = 0; jj < ninside; jj++) { const int j = snaptr[tid]->inside[jj]; @@ -281,19 +307,52 @@ void ComputeSNAVAtom::compute_peratom() double *snavj = snav[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp; - snavi[icoeff+ncoeff] += snaptr[tid]->dbvec[icoeff][1]*ytmp; - snavi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2]*ztmp; - snavi[icoeff+3*ncoeff] += snaptr[tid]->dbvec[icoeff][1]*ztmp; - snavi[icoeff+4*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ztmp; - snavi[icoeff+5*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ytmp; - snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0]; - snavj[icoeff+ncoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1]; - snavj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2]; - snavj[icoeff+3*ncoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2]; - snavj[icoeff+4*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2]; - snavj[icoeff+5*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1]; + snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp; + snavi[icoeff+ncoeff] += snaptr[tid]->dbvec[icoeff][1]*ytmp; + snavi[icoeff+twoncoeff] += snaptr[tid]->dbvec[icoeff][2]*ztmp; + snavi[icoeff+threencoeff] += snaptr[tid]->dbvec[icoeff][1]*ztmp; + snavi[icoeff+fourncoeff] += snaptr[tid]->dbvec[icoeff][0]*ztmp; + snavi[icoeff+fivencoeff] += snaptr[tid]->dbvec[icoeff][0]*ytmp; + snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0]; + snavj[icoeff+ncoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1]; + snavj[icoeff+twoncoeff] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2]; + snavj[icoeff+threencoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2]; + snavj[icoeff+fourncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2]; + snavj[icoeff+fivencoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1]; } + + if (quadraticflag) { + double *snavi = snav[i]+quadraticoffset; + double *snavj = snav[j]+quadraticoffset; + int ncount = 0; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bi = snaptr[tid]->bvec[icoeff]; + double bix = snaptr[tid]->dbvec[icoeff][0]; + double biy = snaptr[tid]->dbvec[icoeff][1]; + double biz = snaptr[tid]->dbvec[icoeff][2]; + for (int jcoeff = 0; jcoeff < ncoeff; jcoeff++) { + double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] + + bix*snaptr[tid]->bvec[jcoeff]; + double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] + + biy*snaptr[tid]->bvec[jcoeff]; + double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] + + biz*snaptr[tid]->bvec[jcoeff]; + snavi[ncount] += dbxtmp*xtmp; + snavi[ncount+ncoeffsq] += dbytmp*ytmp; + snavi[ncount+twoncoeffsq] += dbztmp*ztmp; + snavi[ncount+threencoeffsq] += dbytmp*ztmp; + snavi[ncount+fourncoeffsq] += dbxtmp*ztmp; + snavi[ncount+fivencoeffsq] += dbxtmp*ytmp; + snavj[ncount] -= dbxtmp*x[j][0]; + snavj[ncount+ncoeffsq] -= dbytmp*x[j][1]; + snavj[ncount+twoncoeffsq] -= dbztmp*x[j][2]; + snavj[ncount+threencoeffsq] -= dbytmp*x[j][2]; + snavj[ncount+fourncoeffsq] -= dbxtmp*x[j][2]; + snavj[ncount+fivencoeffsq] -= dbxtmp*x[j][1]; + ncount++; + } + } + } } } } @@ -341,7 +400,8 @@ double ComputeSNAVAtom::memory_usage() double bytes = nmax*size_peratom_cols * sizeof(double); bytes += 3*njmax*sizeof(double); bytes += njmax*sizeof(int); - bytes += ncoeff*nvirial; + bytes += sixncoeff*atom->ntypes; + if (quadraticflag) bytes += sixncoeffsq*atom->ntypes; bytes += snaptr[0]->memory_usage()*comm->nthreads; return bytes; } diff --git a/src/SNAP/compute_snav_atom.h b/src/SNAP/compute_snav_atom.h index 0252be7059..33ae4f9217 100644 --- a/src/SNAP/compute_snav_atom.h +++ b/src/SNAP/compute_snav_atom.h @@ -37,16 +37,17 @@ class ComputeSNAVAtom : public Compute { private: int nmax, njmax, diagonalstyle; - int ncoeff,nvirial; + int ncoeff, twoncoeff, threencoeff, fourncoeff, fivencoeff, sixncoeff; + int ncoeffsq, twoncoeffsq, threencoeffsq, fourncoeffsq, fivencoeffsq, sixncoeffsq; double **cutsq; class NeighList *list; double **snav; double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; - + double cutmax; + int quadraticflag; }; } diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 06c2e48488..e4ed57b933 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -1635,7 +1635,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) rmin0 = 0.0; diagonalstyle = 3; switchflag = 1; - bzeroflag = 0; + bzeroflag = 1; + // open SNAP parameter file on proc 0 FILE *fpparam; diff --git a/src/USER-ATC/Install.sh b/src/USER-ATC/Install.sh index 1137389018..f719fe220f 100755 --- a/src/USER-ATC/Install.sh +++ b/src/USER-ATC/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-AWPMD/Install.sh b/src/USER-AWPMD/Install.sh index 7922c53395..094e10c157 100644 --- a/src/USER-AWPMD/Install.sh +++ b/src/USER-AWPMD/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-AWPMD/pair_awpmd_cut.cpp b/src/USER-AWPMD/pair_awpmd_cut.cpp index cd89c3984d..2ce1a92684 100644 --- a/src/USER-AWPMD/pair_awpmd_cut.cpp +++ b/src/USER-AWPMD/pair_awpmd_cut.cpp @@ -454,16 +454,6 @@ void PairAWPMDCut::settings(int narg, char **arg){ else if(!strcmp(arg[i],"flex_press")) flexible_pressure_flag = 1; } - - - // reset cutoffs that have been explicitly set - /* - if (allocated) { - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) cut[i][j] = cut_global; - }*/ } /* ---------------------------------------------------------------------- @@ -489,7 +479,7 @@ void PairAWPMDCut::coeff(int narg, char **arg) else{ int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } diff --git a/src/USER-CG-CMM/Install.sh b/src/USER-CGSDK/Install.sh similarity index 100% rename from src/USER-CG-CMM/Install.sh rename to src/USER-CGSDK/Install.sh diff --git a/src/USER-CG-CMM/README b/src/USER-CGSDK/README similarity index 58% rename from src/USER-CG-CMM/README rename to src/USER-CGSDK/README index b37fbd3760..535bd43ac1 100644 --- a/src/USER-CG-CMM/README +++ b/src/USER-CGSDK/README @@ -13,23 +13,15 @@ lipids and charged amino acids. See the doc pages for these commands for details. There are example scripts for using this package in -examples/USER/cg-cmm. +examples/USER/cgsdk This is the second generation implementation reducing the the clutter of the previous version. For many systems with long range electrostatics, it will be faster to use pair_style hybrid/overlay with lj/sdk and coul/long instead of the combined lj/sdk/coul/long -style, since the number of charged atom types is usually small. To -exploit this property, the use of the kspace_style pppm/cg is -recommended over regular pppm. For all new styles, input file backward -compatibility is provided. The old implementation is still available -through appending the /old suffix. These will be discontinued and -removed after the new implementation has been fully validated. - -The current version of this package should be considered beta -quality. The CG potentials work correctly for "normal" situations, but -have not been testing with all kinds of potential parameters and -simuation systems. +style, since the number of charged atom types is usually small. +To exploit this property, the use of the kspace_style pppm/cg is +recommended over regular pppm. The person who created this package is Axel Kohlmeyer at Temple U (akohlmey at gmail.com). Contact him directly if you have questions. @@ -38,9 +30,9 @@ The person who created this package is Axel Kohlmeyer at Temple U Thanks for contributions, support and testing goes to -Wataru Shinoda (AIST, Tsukuba) +Wataru Shinoda (Nagoya University) Russell DeVane (Procter & Gamble) -Michael L. Klein (CMM / U Penn, Philadelphia) +Michael L. Klein (Temple University, Philadelphia) Balasubramanian Sundaram (JNCASR, Bangalore) -version: 0.99 / 2011-11-29 +version: 1.0 / 2017-04-26 diff --git a/src/USER-CG-CMM/angle_sdk.cpp b/src/USER-CGSDK/angle_sdk.cpp similarity index 100% rename from src/USER-CG-CMM/angle_sdk.cpp rename to src/USER-CGSDK/angle_sdk.cpp diff --git a/src/USER-CG-CMM/angle_sdk.h b/src/USER-CGSDK/angle_sdk.h similarity index 98% rename from src/USER-CG-CMM/angle_sdk.h rename to src/USER-CGSDK/angle_sdk.h index fbd5461187..a5d917e57c 100644 --- a/src/USER-CG-CMM/angle_sdk.h +++ b/src/USER-CGSDK/angle_sdk.h @@ -14,7 +14,6 @@ #ifdef ANGLE_CLASS AngleStyle(sdk,AngleSDK) -AngleStyle(cg/cmm,AngleSDK) #else diff --git a/src/USER-CG-CMM/lj_sdk_common.h b/src/USER-CGSDK/lj_sdk_common.h similarity index 100% rename from src/USER-CG-CMM/lj_sdk_common.h rename to src/USER-CGSDK/lj_sdk_common.h diff --git a/src/USER-CG-CMM/pair_lj_sdk.cpp b/src/USER-CGSDK/pair_lj_sdk.cpp similarity index 99% rename from src/USER-CG-CMM/pair_lj_sdk.cpp rename to src/USER-CGSDK/pair_lj_sdk.cpp index 665f188ce9..23b0f47a6d 100644 --- a/src/USER-CG-CMM/pair_lj_sdk.cpp +++ b/src/USER-CGSDK/pair_lj_sdk.cpp @@ -248,7 +248,7 @@ void PairLJSDK::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-CG-CMM/pair_lj_sdk.h b/src/USER-CGSDK/pair_lj_sdk.h similarity index 98% rename from src/USER-CG-CMM/pair_lj_sdk.h rename to src/USER-CGSDK/pair_lj_sdk.h index de27485c14..ef0263c06b 100644 --- a/src/USER-CG-CMM/pair_lj_sdk.h +++ b/src/USER-CGSDK/pair_lj_sdk.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk,PairLJSDK) -PairStyle(cg/cmm,PairLJSDK) #else diff --git a/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp b/src/USER-CGSDK/pair_lj_sdk_coul_long.cpp similarity index 99% rename from src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp rename to src/USER-CGSDK/pair_lj_sdk_coul_long.cpp index 5e4a0db31c..845c5822a7 100644 --- a/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp +++ b/src/USER-CGSDK/pair_lj_sdk_coul_long.cpp @@ -308,7 +308,7 @@ void PairLJSDKCoulLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/USER-CG-CMM/pair_lj_sdk_coul_long.h b/src/USER-CGSDK/pair_lj_sdk_coul_long.h similarity index 97% rename from src/USER-CG-CMM/pair_lj_sdk_coul_long.h rename to src/USER-CGSDK/pair_lj_sdk_coul_long.h index 508ffe5e6d..57779cc0b9 100644 --- a/src/USER-CG-CMM/pair_lj_sdk_coul_long.h +++ b/src/USER-CGSDK/pair_lj_sdk_coul_long.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/coul/long,PairLJSDKCoulLong) -PairStyle(cg/cmm/coul/long,PairLJSDKCoulLong) #else diff --git a/src/USER-CG-CMM/pair_lj_sdk_coul_msm.cpp b/src/USER-CGSDK/pair_lj_sdk_coul_msm.cpp similarity index 100% rename from src/USER-CG-CMM/pair_lj_sdk_coul_msm.cpp rename to src/USER-CGSDK/pair_lj_sdk_coul_msm.cpp diff --git a/src/USER-CG-CMM/pair_lj_sdk_coul_msm.h b/src/USER-CGSDK/pair_lj_sdk_coul_msm.h similarity index 97% rename from src/USER-CG-CMM/pair_lj_sdk_coul_msm.h rename to src/USER-CGSDK/pair_lj_sdk_coul_msm.h index be56c0cec3..8438ced66b 100644 --- a/src/USER-CG-CMM/pair_lj_sdk_coul_msm.h +++ b/src/USER-CGSDK/pair_lj_sdk_coul_msm.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/coul/msm,PairLJSDKCoulMSM) -PairStyle(cg/cmm/coul/msm,PairLJSDKCoulMSM) #else diff --git a/src/USER-COLVARS/Install.sh b/src/USER-COLVARS/Install.sh index c01719e766..d67883a416 100755 --- a/src/USER-COLVARS/Install.sh +++ b/src/USER-COLVARS/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 4c57a8e70f..5dacf52ee1 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -76,6 +76,7 @@ void NBinSSA::bin_atoms() int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above for (i = nall-1; i >= nowned; i--) { ibin = coord2ssaAIR(x[i]); + atom2bin[i] = ibin; if (ibin < 1) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { bins[i] = gairhead_ssa[ibin]; @@ -85,6 +86,7 @@ void NBinSSA::bin_atoms() } else { for (i = nall-1; i >= nlocal; i--) { ibin = coord2ssaAIR(x[i]); + atom2bin[i] = ibin; if (ibin < 1) continue; // skip ghost atoms not in AIR bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; @@ -92,6 +94,7 @@ void NBinSSA::bin_atoms() } for (i = nlocal-1; i >= 0; i--) { ibin = coord2bin(x[i][0], x[i][1], x[i][2], xbin, ybin, zbin); + atom2bin[i] = ibin; // Find the bounding box of the local atoms in the bins if (xbin < lbinxlo) lbinxlo = xbin; if (xbin >= lbinxhi) lbinxhi = xbin + 1; diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index a6479d4c4f..221aa5b454 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -251,7 +251,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) ytmp = x[i][1]; ztmp = x[i][2]; - ibin = coord2bin(x[i],xbin,ybin,zbin); + ibin = atom2bin[i]; // loop over AIR ghost atoms in all bins in "full" stencil // Note: the non-AIR ghost atoms have already been filtered out diff --git a/src/USER-DPD/pair_dpd_fdt.cpp b/src/USER-DPD/pair_dpd_fdt.cpp index 987755db8a..95908c556d 100644 --- a/src/USER-DPD/pair_dpd_fdt.cpp +++ b/src/USER-DPD/pair_dpd_fdt.cpp @@ -267,7 +267,7 @@ void PairDPDfdt::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index bf86f95b5f..32ac456b0f 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -353,7 +353,7 @@ void PairDPDfdtEnergy::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-DPD/pair_exp6_rx.cpp b/src/USER-DPD/pair_exp6_rx.cpp index 8f406e3661..8b6fc9beed 100644 --- a/src/USER-DPD/pair_exp6_rx.cpp +++ b/src/USER-DPD/pair_exp6_rx.cpp @@ -566,7 +566,7 @@ void PairExp6rx::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp index 671de7090b..a74f51477c 100644 --- a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp +++ b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp @@ -295,11 +295,11 @@ void PairLJCutTholeLong::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) { - thole[i][j] = thole_global; - cut_lj[i][j] = cut_lj_global; - } + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) { + thole[i][j] = thole_global; + cut_lj[i][j] = cut_lj_global; + } } } diff --git a/src/USER-DRUDE/pair_thole.cpp b/src/USER-DRUDE/pair_thole.cpp index 0ed94ebbc9..abb37b82b7 100644 --- a/src/USER-DRUDE/pair_thole.cpp +++ b/src/USER-DRUDE/pair_thole.cpp @@ -199,11 +199,11 @@ void PairThole::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) { - thole[i][j] = thole_global; - cut[i][j] = cut_global; - } + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) { + thole[i][j] = thole_global; + cut[i][j] = cut_global; + } } } diff --git a/src/USER-EFF/pair_eff_cut.cpp b/src/USER-EFF/pair_eff_cut.cpp index 66f59c86c3..850c523629 100644 --- a/src/USER-EFF/pair_eff_cut.cpp +++ b/src/USER-EFF/pair_eff_cut.cpp @@ -846,7 +846,7 @@ void PairEffCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-FEP/pair_coul_cut_soft.cpp b/src/USER-FEP/pair_coul_cut_soft.cpp index 2c675c607f..a7ac8004fe 100644 --- a/src/USER-FEP/pair_coul_cut_soft.cpp +++ b/src/USER-FEP/pair_coul_cut_soft.cpp @@ -168,7 +168,7 @@ void PairCoulCutSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp index 16da07a657..b2e781c57b 100644 --- a/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp @@ -211,7 +211,7 @@ void PairLJCutCoulCutSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp index 6636e72715..3b80729b0b 100644 --- a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp @@ -582,7 +582,7 @@ void PairLJCutCoulLongSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/USER-FEP/pair_lj_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_soft.cpp index 3798b27936..800fdfcde8 100644 --- a/src/USER-FEP/pair_lj_cut_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_soft.cpp @@ -462,7 +462,7 @@ void PairLJCutSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-FEP/pair_lj_cut_tip4p_long_soft.cpp b/src/USER-FEP/pair_lj_cut_tip4p_long_soft.cpp index 5beed08b72..8d9162e564 100644 --- a/src/USER-FEP/pair_lj_cut_tip4p_long_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_tip4p_long_soft.cpp @@ -436,7 +436,7 @@ void PairLJCutTIP4PLongSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/USER-FEP/pair_morse_soft.cpp b/src/USER-FEP/pair_morse_soft.cpp index 6c86d8916f..1333bc28ca 100644 --- a/src/USER-FEP/pair_morse_soft.cpp +++ b/src/USER-FEP/pair_morse_soft.cpp @@ -222,7 +222,7 @@ void PairMorseSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-H5MD/Install.sh b/src/USER-H5MD/Install.sh index bdda732807..1070afaa96 100644 --- a/src/USER-H5MD/Install.sh +++ b/src/USER-H5MD/Install.sh @@ -27,7 +27,7 @@ action () { } for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-MANIFOLD/fix_nve_manifold_rattle.cpp b/src/USER-MANIFOLD/fix_nve_manifold_rattle.cpp index 0d48e145e0..4f6b62590d 100644 --- a/src/USER-MANIFOLD/fix_nve_manifold_rattle.cpp +++ b/src/USER-MANIFOLD/fix_nve_manifold_rattle.cpp @@ -94,6 +94,7 @@ FixNVEManifoldRattle::FixNVEManifoldRattle( LAMMPS *lmp, int &narg, char **arg, dof_flag = 1; nevery = 0; + next_output = 0; dtv = dtf = 0; tolerance = force->numeric( FLERR, arg[3] ); @@ -145,6 +146,11 @@ FixNVEManifoldRattle::FixNVEManifoldRattle( LAMMPS *lmp, int &narg, char **arg, while( argi < narg ){ if( strcmp(arg[argi], "every") == 0 ){ nevery = force->inumeric(FLERR,arg[argi+1]); + next_output = update->ntimestep + nevery; + if( comm->me == 0 ){ + fprintf(screen,"Outputing every %d steps, next is %d\n", + nevery, next_output); + } argi += 2; }else if( error_on_unknown_keyword ){ char msg[2048]; @@ -220,6 +226,11 @@ void FixNVEManifoldRattle::print_stats( const char *header ) x_iters * inv_tdiff, v_iters * inv_tdiff, stats.dofs_removed); fprintf(screen,"\n"); } + + stats.x_iters_per_atom = 0; + stats.v_iters_per_atom = 0; + stats.x_iters = 0; + stats.v_iters = 0; } @@ -263,14 +274,6 @@ void FixNVEManifoldRattle::init() void FixNVEManifoldRattle::update_var_params() { - if( nevery > 0 ){ - stats.x_iters = 0; - stats.v_iters = 0; - stats.natoms = 0; - stats.x_iters_per_atom = 0.0; - stats.v_iters_per_atom = 0.0; - } - double *ptr_params = ptr_m->params; for( int i = 0; i < nvars; ++i ){ @@ -358,7 +361,12 @@ void FixNVEManifoldRattle::final_integrate() ---------------------------------------------------------------------------*/ void FixNVEManifoldRattle::end_of_step() { - print_stats( "nve/manifold/rattle" ); + if (nevery && (update->ntimestep == next_output)){ + if( comm->me == 0 ){ + print_stats( "nve/manifold/rattle" ); + next_output += nevery; + } + } } /* ----------------------------------------------------------------------------- diff --git a/src/USER-MANIFOLD/fix_nve_manifold_rattle.h b/src/USER-MANIFOLD/fix_nve_manifold_rattle.h index 4bd17ab899..71aa1aed9a 100644 --- a/src/USER-MANIFOLD/fix_nve_manifold_rattle.h +++ b/src/USER-MANIFOLD/fix_nve_manifold_rattle.h @@ -81,7 +81,7 @@ namespace LAMMPS_NS { protected: - int nevery; + int nevery, next_output; double dtv, dtf; double tolerance; diff --git a/src/USER-MISC/Install.sh b/src/USER-MISC/Install.sh index 203d923164..2d42125ec3 100644 --- a/src/USER-MISC/Install.sh +++ b/src/USER-MISC/Install.sh @@ -35,6 +35,6 @@ for file in *.cpp *.h; do elif (test $file = "pair_cdeam.h") then action pair_cdeam.h pair_eam_alloy.cpp else - action $file + test -f ${file} && action $file fi done diff --git a/src/USER-MISC/compute_temp_rotate.cpp b/src/USER-MISC/compute_temp_rotate.cpp index 2210555a7c..b948738e00 100644 --- a/src/USER-MISC/compute_temp_rotate.cpp +++ b/src/USER-MISC/compute_temp_rotate.cpp @@ -221,6 +221,17 @@ void ComputeTempRotate::remove_bias(int i, double *v) v[2] -= vbiasall[i][2]; } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempRotate::remove_bias_thr(int i, double *v, double *) +{ + v[0] -= vbiasall[i][0]; + v[1] -= vbiasall[i][1]; + v[2] -= vbiasall[i][2]; +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -251,6 +262,18 @@ void ComputeTempRotate::restore_bias(int i, double *v) v[2] += vbiasall[i][2]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called +------------------------------------------------------------------------- */ + +void ComputeTempRotate::restore_bias_thr(int i, double *v, double *) +{ + v[0] += vbiasall[i][0]; + v[1] += vbiasall[i][1]; + v[2] += vbiasall[i][2]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/USER-MISC/compute_temp_rotate.h b/src/USER-MISC/compute_temp_rotate.h index 69643b68a5..9590366b15 100644 --- a/src/USER-MISC/compute_temp_rotate.h +++ b/src/USER-MISC/compute_temp_rotate.h @@ -34,9 +34,12 @@ class ComputeTempRotate : public Compute { void compute_vector(); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void restore_bias(int, double *); void restore_bias_all(); + void restore_bias_thr(int, double *, double *); + double memory_usage(); private: diff --git a/src/USER-MISC/fix_srp.cpp b/src/USER-MISC/fix_srp.cpp index fbd8473cb0..f3dec42a83 100644 --- a/src/USER-MISC/fix_srp.cpp +++ b/src/USER-MISC/fix_srp.cpp @@ -101,6 +101,13 @@ void FixSRP::init() if (force->pair_match("hybrid",1) == NULL) error->all(FLERR,"Cannot use pair srp without pair_style hybrid"); + int has_rigid = 0; + for (int i = 0; i < modify->nfix; i++) + if (strncmp(modify->fix[i]->style,"rigid",5) == 0) ++has_rigid; + + if (has_rigid > 0) + error->all(FLERR,"Pair srp is not compatible with rigid fixes."); + if ((bptype < 1) || (bptype > atom->ntypes)) error->all(FLERR,"Illegal bond particle type"); diff --git a/src/USER-MISC/improper_ring.cpp b/src/USER-MISC/improper_ring.cpp index 5a7937e4ee..adf17ed1d5 100644 --- a/src/USER-MISC/improper_ring.cpp +++ b/src/USER-MISC/improper_ring.cpp @@ -204,7 +204,7 @@ void ImproperRing::compute(int eflag, int vflag) cfact2 = ckjji / ckjkj; cfact3 = ckjji / cjiji; - /* Calculate the force acted on the thrid atom of the angle. */ + /* Calculate the force acted on the third atom of the angle. */ fkx = cfact2 * bvec2x[icomb] - bvec1x[icomb]; fky = cfact2 * bvec2y[icomb] - bvec1y[icomb]; fkz = cfact2 * bvec2z[icomb] - bvec1z[icomb]; diff --git a/src/USER-MISC/pair_buck_mdf.cpp b/src/USER-MISC/pair_buck_mdf.cpp index 3a433b16bd..6c3dcbd7ee 100644 --- a/src/USER-MISC/pair_buck_mdf.cpp +++ b/src/USER-MISC/pair_buck_mdf.cpp @@ -197,7 +197,7 @@ void PairBuckMDF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_coul_diel.cpp b/src/USER-MISC/pair_coul_diel.cpp index a732ace1a0..a62362aa6f 100644 --- a/src/USER-MISC/pair_coul_diel.cpp +++ b/src/USER-MISC/pair_coul_diel.cpp @@ -168,7 +168,7 @@ void PairCoulDiel::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_edip.cpp b/src/USER-MISC/pair_edip.cpp index 6ce84ab767..bd58b746b8 100644 --- a/src/USER-MISC/pair_edip.cpp +++ b/src/USER-MISC/pair_edip.cpp @@ -798,6 +798,9 @@ void PairEDIP::coeff(int narg, char **arg) } } + if (nelements != 1) + error->all(FLERR,"Pair style edip only supports single element potentials"); + // read potential file and initialize potential parameters read_file(arg[2]); @@ -836,7 +839,7 @@ void PairEDIP::coeff(int narg, char **arg) void PairEDIP::init_style() { if (force->newton_pair == 0) - error->all(FLERR,"Pair style EDIP requires newton pair on"); + error->all(FLERR,"Pair style edip requires newton pair on"); // need a full neighbor list diff --git a/src/USER-MISC/pair_edip_multi.cpp b/src/USER-MISC/pair_edip_multi.cpp new file mode 100644 index 0000000000..d52b2e4a47 --- /dev/null +++ b/src/USER-MISC/pair_edip_multi.cpp @@ -0,0 +1,807 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Environment Dependent Interatomic Potential + + Contributing author: Chao Jiang +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include "pair_edip_multi.h" +#include "atom.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" +#include "citeme.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 +#define DELTA 4 + + +static const char cite_pair_edip[] = + "@article{cjiang2012\n" + " author = {Jian, Chao and Morgan, Dane, and Szlufarska, Izabella},\n" + " title = {Carbon tri-interstitial defect: A model for DII center},\n" + " journal = {Physical Review B},\n" + " volume = {86},\n" + " pages = {144118},\n" + " year = {2012},\n" + "}\n\n" + "@article{lpizzagalli2010,\n" + " author = {G. Lucas, M. Bertolus, and L. Pizzagalli},\n" + " journal = {J. Phys. : Condens. Matter 22},\n" + " volume = {22},\n" + " pages = {035802},\n" + " year = {2010},\n" + "}\n\n"; + + + +/* ---------------------------------------------------------------------- */ + +PairEDIPMulti::PairEDIPMulti(LAMMPS *lmp) : Pair(lmp) +{ + if (lmp->citeme) lmp->citeme->add(cite_pair_edip); + + single_enable = 0; + restartinfo = 0; + one_coeff = 1; + manybody_flag = 1; + + nelements = 0; + elements = NULL; + nparams = maxparam = 0; + params = NULL; + elem2param = NULL; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairEDIPMulti::~PairEDIPMulti() +{ + if (elements) + for (int i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + memory->destroy(params); + memory->destroy(elem2param); + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + delete [] map; + +//XXX deallocateGrids(); + deallocatePreLoops(); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPMulti::compute(int eflag, int vflag) +{ + int i,j,k,ii,jj,kk,inum,jnum; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp,evdwl; + int *ilist,*jlist,*numneigh,**firstneigh; + register int preForceCoord_counter; + + double zeta_i; + double dzetair; + double fpair; + double costheta; + double dpairZ,dtripleZ; + + // eflag != 0 means compute energy contributions in this step + // vflag != 0 means compute virial contributions in this step + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + inum = list->inum;//total number of atoms in the cell + ilist = list->ilist;//list of atoms + numneigh = list->numneigh;//number of near neighbors + firstneigh = list->firstneigh;//list of neighbors + + // loop over full neighbor list of my atoms + + for (ii = 0; ii < inum; ii++) { + zeta_i = 0.0; + int numForceCoordPairs = 0; + + i = ilist[ii]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // all the neighbors of atom i + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // pre-loop to compute environment coordination f(Z) + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + double delx, dely, delz, r_ij; + + delx = x[j][0] - xtmp; + dely = x[j][1] - ytmp; + delz = x[j][2] - ztmp; + r_ij = delx * delx + dely * dely + delz * delz; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + // zeta and its derivative dZ/dr + + if (r_ij < params[ijparam].cutoffC) zeta_i += 1.0; + else { + double f, fdr; + edip_fc(r_ij, ¶ms[ijparam], f, fdr); + zeta_i += f; + dzetair = -fdr / r_ij; + + preForceCoord_counter=numForceCoordPairs*5; + preForceCoord[preForceCoord_counter+0]=dzetair; + preForceCoord[preForceCoord_counter+1]=delx; + preForceCoord[preForceCoord_counter+2]=dely; + preForceCoord[preForceCoord_counter+3]=delz; + preForceCoord[preForceCoord_counter+4]=j; + numForceCoordPairs++; + } + } + + // two-body interactions + + dpairZ=0; + dtripleZ=0; + + for (jj = 0; jj < jnum; jj++) { + double dr_ij[3], r_ij, f_ij[3]; + + j = jlist[jj]; + j &= NEIGHMASK; + + dr_ij[0] = x[j][0] - xtmp; + dr_ij[1] = x[j][1] - ytmp; + dr_ij[2] = x[j][2] - ztmp; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + // potential energy and force + // since pair i-j is different from pair j-i, double counting is + // already considered in constructing the potential + + double fdr, fdZ; + edip_pair(r_ij, zeta_i, ¶ms[ijparam], evdwl, fdr, fdZ); + fpair = -fdr / r_ij; + dpairZ += fdZ; + + f[i][0] -= fpair * dr_ij[0]; + f[i][1] -= fpair * dr_ij[1]; + f[i][2] -= fpair * dr_ij[2]; + + f[j][0] += fpair * dr_ij[0]; + f[j][1] += fpair * dr_ij[1]; + f[j][2] += fpair * dr_ij[2]; + + if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, -dr_ij[0], -dr_ij[1], -dr_ij[2]); + + // three-body Forces + + for (kk = jj + 1; kk < jnum; kk++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = x[k][0] - xtmp; + dr_ik[1] = x[k][1] - ytmp; + dr_ik[2] = x[k][2] - ztmp; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + costheta=vec3_dot(dr_ij, dr_ik) / r_ij / r_ik; + + double v1, v2, v3, v4, v5, v6, v7; + + edip_fcut3(r_ij, ¶ms[ijparam], v1, v2); + edip_fcut3(r_ik, ¶ms[ikparam], v3, v4); + edip_h(costheta, zeta_i, ¶ms[ijkparam], v5, v6, v7); + + // potential energy and forces + evdwl = v1 * v3 * v5; + dtripleZ += v1 * v3 * v7; + + double dri[3], drj[3], drk[3]; + double dhl, dfr; + + dhl = v1 * v3 * v6; + + costheta_d(dr_ij, r_ij, dr_ik, r_ik, dri, drj, drk); + + f_ij[0] = -dhl * drj[0]; + f_ij[1] = -dhl * drj[1]; + f_ij[2] = -dhl * drj[2]; + f_ik[0] = -dhl * drk[0]; + f_ik[1] = -dhl * drk[1]; + f_ik[2] = -dhl * drk[2]; + + dfr = v2 * v3 * v5; + fpair = -dfr / r_ij; + + f_ij[0] += fpair * dr_ij[0]; + f_ij[1] += fpair * dr_ij[1]; + f_ij[2] += fpair * dr_ij[2]; + + dfr = v1 * v4 * v5; + fpair = -dfr / r_ik; + + f_ik[0] += fpair * dr_ik[0]; + f_ik[1] += fpair * dr_ik[1]; + f_ik[2] += fpair * dr_ik[2]; + + f[j][0] += f_ij[0]; + f[j][1] += f_ij[1]; + f[j][2] += f_ij[2]; + + f[k][0] += f_ik[0]; + f[k][1] += f_ik[1]; + f[k][2] += f_ik[2]; + + f[i][0] -= f_ij[0] + f_ik[0]; + f[i][1] -= f_ij[1] + f_ik[1]; + f[i][2] -= f_ij[2] + f_ik[2]; + + if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik); + } + } + + // forces due to environment coordination f(Z) + for (int idx = 0; idx < numForceCoordPairs; idx++) { + double delx, dely, delz; + + preForceCoord_counter = idx * 5; + dzetair = preForceCoord[preForceCoord_counter+0]; + delx = preForceCoord[preForceCoord_counter+1]; + dely = preForceCoord[preForceCoord_counter+2]; + delz = preForceCoord[preForceCoord_counter+3]; + j = static_cast (preForceCoord[preForceCoord_counter+4]); + + dzetair *= (dpairZ + dtripleZ); + + f[j][0] += dzetair * delx; + f[j][1] += dzetair * dely; + f[j][2] += dzetair * delz; + + f[i][0] -= dzetair * delx; + f[i][1] -= dzetair * dely; + f[i][2] -= dzetair * delz; + + evdwl = 0.0; + if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, dzetair, -delx, -dely, -delz); + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +double sqr(double x) +{ + return x * x; +} + +//pair Vij, partial derivatives dVij(r,Z)/dr and dVij(r,Z)/dZ +void PairEDIPMulti::edip_pair(double r, double z, Param *param, double &eng, + double &fdr, double &fZ) +{ + double A = param->A; + double B = param->B; + double rho = param->rho; + double beta = param->beta; + double v1,v2,v3,v4; + + v1 = pow(B / r, rho); + v2 = exp(-beta * z * z); + edip_fcut2(r, param, v3, v4); + + eng = A * (v1 - v2) * v3; + fdr = A * (v1 - v2) * v4 + A * (-rho * v1 / r) * v3; + fZ = A * (2 * beta * z * v2) * v3; +} + +//function fc(r) in calculating coordination Z and derivative fc'(r) +void PairEDIPMulti::edip_fc(double r, Param *param, double &f, double &fdr) +{ + double a = param->cutoffA; + double c = param->cutoffC; + double alpha = param->alpha; + double x; + double v1, v2, v3; + + if(r < c + 1E-6) + { + f=1.0; + fdr=0.0; + return; + } + + if(r > a - 1E-6) + { + f=0.0; + fdr=0.0; + return; + } + + x = (a - c) / (r - c); + v1 = x * x * x; + v2 = 1.0 / (1.0 - v1); + + f = exp(alpha * v2); + fdr = (3.0 * x * v1 / (a - c)) * (-alpha * v2 * v2) * f; +} + +//cut-off function for Vij and its derivative fcut2'(r) +void PairEDIPMulti::edip_fcut2(double r, Param *param, double &f, double &fdr) +{ + double sigma = param->sigma; + double a = param->cutoffA; + double v1; + + if(r > a - 1E-6) + { + f=0.0; + fdr=0.0; + return; + } + + v1 = 1.0 / (r - a); + f = exp(sigma * v1); + fdr = (-sigma * v1 * v1) * f; +} + +//function tau(Z) and its derivative tau'(Z) +void PairEDIPMulti::edip_tau(double z, Param *param, double &f, double &fdZ) +{ + double u1 = param->u1; + double u2 = param->u2; + double u3 = param->u3; + double u4 = param->u4; + double v1, v2; + + v1 = exp(-u4 * z); + v2 = exp(-2.0 * u4 * z); + + f = u1 + u2 * u3 * v1 - u2 * v2; + fdZ = -u2 * u3 * u4 * v1 + 2.0 * u2 * u4 * v2; +} + +//function h(l,Z) and its partial derivatives dh(l,Z)/dl and dh(l,Z)/dZ +void PairEDIPMulti::edip_h(double l, double z, Param *param, double &f, + double &fdl, double &fdZ) +{ + double lambda = param->lambda; + double eta = param->eta; + double Q0 = param->Q0; + double mu = param->mu; + double Q, QdZ, Tau, TaudZ; + double u2, du2l, du2Z; + double v1, v2, v3; + + //function Q(Z) + Q = Q0 * exp(-mu * z); + //derivative Q'(Z) + QdZ= -mu * Q; + + edip_tau(z, param, Tau, TaudZ); + + v1 = sqr(l + Tau); + u2 = Q * v1; + v2 = exp(-u2); + + f = lambda * (1 - v2 + eta * u2); + + //df/du2 + v3 = lambda * (v2 + eta); + + //du2/dl + du2l = Q * 2 * (l + Tau); + fdl = v3 * du2l; + + //du2/dZ + du2Z = QdZ * v1 + Q * 2 * (l + Tau) * TaudZ; + fdZ = v3 * du2Z; +} + +//cut-off function for Vijk and its derivative fcut3'(r) +void PairEDIPMulti::edip_fcut3(double r, Param *param, double &f, double &fdr) +{ + double gamma = param->gamma; + double a = param->cutoffA; + double v1; + + if(r > a - 1E-6) + { + f=0.0; + fdr=0.0; + return; + } + + v1 = 1.0 / (r - a); + f = exp(gamma * v1); + fdr = (-gamma * v1 * v1) * f; +} + +/* ---------------------------------------------------------------------- + pre-calculated structures +------------------------------------------------------------------------- */ + +void PairEDIPMulti::allocatePreLoops(void) +{ + int nthreads = comm->nthreads; + + memory->create(preForceCoord,5*nthreads*leadDimInteractionList,"edip:preForceCoord"); +} + +/* ---------------------------------------------------------------------- + deallocate preLoops +------------------------------------------------------------------------- */ + +void PairEDIPMulti::deallocatePreLoops(void) +{ + memory->destroy(preForceCoord); +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPMulti::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + + map = new int[n+1]; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairEDIPMulti::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairEDIPMulti::coeff(int narg, char **arg) +{ + int i,j,n; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + // nelements = # of unique elements + // elements = list of element names + + if (elements) { + for (i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + } + elements = new char*[atom->ntypes]; + for (i = 0; i < atom->ntypes; i++) elements[i] = NULL; + + nelements = 0; + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (strcmp(arg[i],elements[j]) == 0) break; + map[i-2] = j; + if (j == nelements) { + n = strlen(arg[i]) + 1; + elements[j] = new char[n]; + strcpy(elements[j],arg[i]); + nelements++; + } + } + + // read potential file and initialize potential parameters + + read_file(arg[2]); + setup(); + + // clear setflag since coeff() called once with I,J = * * + + n = atom->ntypes; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + + int count = 0; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + count++; + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); + + // allocate tables and internal structures + + allocatePreLoops(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairEDIPMulti::init_style() +{ + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style edip/multi requires atom IDs"); + if (force->newton_pair == 0) + error->all(FLERR,"Pair style edip/multi requires newton pair on"); + + // need a full neighbor list + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairEDIPMulti::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + return cutmax; +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPMulti::read_file(char *file) +{ + int params_per_line = 20; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = maxparam = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open EDIP potential file %s",file); + error->one(FLERR,str); + } + } + + // read each set of params from potential file + // one set of params can span multiple lines + // store params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in EDIP potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next entry in file + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].A = atof(words[3]); + params[nparams].B = atof(words[4]); + params[nparams].cutoffA = atof(words[5]); + params[nparams].cutoffC = atof(words[6]); + params[nparams].alpha = atof(words[7]); + params[nparams].beta = atof(words[8]); + params[nparams].eta = atof(words[9]); + params[nparams].gamma = atof(words[10]); + params[nparams].lambda = atof(words[11]); + params[nparams].mu = atof(words[12]); + params[nparams].rho = atof(words[13]); + params[nparams].sigma = atof(words[14]); + params[nparams].Q0 = atof(words[15]); + params[nparams].u1 = atof(words[16]); + params[nparams].u2 = atof(words[17]); + params[nparams].u3 = atof(words[18]); + params[nparams].u4 = atof(words[19]); + + if (params[nparams].A < 0.0 || params[nparams].B < 0.0 || + params[nparams].cutoffA < 0.0 || params[nparams].cutoffC < 0.0 || + params[nparams].alpha < 0.0 || params[nparams].beta < 0.0 || + params[nparams].eta < 0.0 || params[nparams].gamma < 0.0 || + params[nparams].lambda < 0.0 || params[nparams].mu < 0.0 || + params[nparams].rho < 0.0 || params[nparams].sigma < 0.0) + error->all(FLERR,"Illegal EDIP parameter"); + + nparams++; + } + + delete [] words; +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPMulti::setup() +{ + int i,j,k,m,n; + double rtmp; + + // set elem2param for all triplet combinations + // must be a single exact match to lines read from file + // do not allow for ACB in place of ABC + + memory->destroy(elem2param); + memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param"); + + for (i = 0; i < nelements; i++) + for (j = 0; j < nelements; j++) + for (k = 0; k < nelements; k++) { + n = -1; + for (m = 0; m < nparams; m++) { + if (i == params[m].ielement && j == params[m].jelement && + k == params[m].kelement) { + if (n >= 0) error->all(FLERR,"Potential file has duplicate entry"); + n = m; + } + } + if (n < 0) error->all(FLERR,"Potential file is missing an entry"); + elem2param[i][j][k] = n; + } + + // set cutoff square + + for (m = 0; m < nparams; m++) { + params[m].cutsq = params[m].cutoffA*params[m].cutoffA; + } + + // set cutmax to max of all params + + cutmax = 0.0; + for (m = 0; m < nparams; m++) { + rtmp = sqrt(params[m].cutsq); + if (rtmp > cutmax) cutmax = rtmp; + } + +} diff --git a/src/USER-MISC/pair_edip_multi.h b/src/USER-MISC/pair_edip_multi.h new file mode 100644 index 0000000000..e55916f79b --- /dev/null +++ b/src/USER-MISC/pair_edip_multi.h @@ -0,0 +1,113 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(edip/multi,PairEDIPMulti) + +#else + +#ifndef LMP_PAIR_EDIP_MULTI_H +#define LMP_PAIR_EDIP_MULTI_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairEDIPMulti : public Pair { + public: + PairEDIPMulti(class LAMMPS *); + virtual ~PairEDIPMulti(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void init_style(); + + protected: + struct Param { + double A, B;//coefficients for pair interaction I-J + double cutoffA;//cut-off distance for pair interaction I-J + double cutoffC;//lower cut-off distance for calculating Z_I + double alpha;//coefficient for calculating Z_I + double beta;//attractive term for pair I-J + double sigma;//cut-off coefficient for pair I-J + double rho;//pair I-J + double gamma;//coefficient for three-body interaction I-J-K + double eta, lambda;//coefficients for function h(l,Z) + double mu, Q0;//coefficients for function Q(Z) + double u1, u2, u3, u4;//coefficients for function tau(Z) + double cutsq; + int ielement,jelement,kelement; + }; + + double *preForceCoord; + + double cutmax; // max cutoff for all elements + int nelements; // # of unique elements + char **elements; // names of unique elements + int ***elem2param; // mapping from element triplets to parameters + int *map; // mapping from atom types to elements + int nparams; // # of stored parameter sets + int maxparam; // max # of parameter sets + Param *params; // parameter set for an I-J-K interaction + + // max number of interaction per atom for f(Z) environment potential + + static const int leadDimInteractionList = 64; + + void allocate(); + void allocatePreLoops(void); + void deallocatePreLoops(void); + + void read_file(char *); + void setup(); + + void edip_pair(double, double, Param *, double &, double &, double &); + void edip_fc(double, Param *, double &, double &); + void edip_fcut2(double, Param *, double &, double &); + void edip_tau(double, Param *, double &, double &); + void edip_h(double, double, Param *, double &, double &, double &); + void edip_fcut3(double, Param *, double &, double &); + + double vec3_dot(double x[3], double y[3]) + { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + void vec3_add(double k1, double x[3], double k2, double y[3], double *z) + { + z[0] = k1 * x[0] + k2 * y[0]; + z[1] = k1 * x[1] + k2 * y[1]; + z[2] = k1 * x[2] + k2 * y[2]; + } + + //dr_ij=r_j - r_i + //dr_ik=r_k - r_i + void costheta_d(double *dr_ij, double r_ij, double *dr_ik, double r_ik, + double *dri, double *drj, double *drk) + { + double costheta; + + costheta = vec3_dot(dr_ij, dr_ik) / r_ij / r_ik; + vec3_add(1 / r_ij / r_ik, dr_ik, -costheta / r_ij / r_ij, dr_ij, drj); + vec3_add(1 / r_ij / r_ik, dr_ij, -costheta / r_ik / r_ik, dr_ik, drk); + vec3_add(-1, drj, -1, drk, dri); + } + +}; + +} + +#endif +#endif diff --git a/src/USER-MISC/pair_gauss_cut.cpp b/src/USER-MISC/pair_gauss_cut.cpp index f44b1bbd2d..3836187a64 100644 --- a/src/USER-MISC/pair_gauss_cut.cpp +++ b/src/USER-MISC/pair_gauss_cut.cpp @@ -175,7 +175,7 @@ void PairGaussCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_kolmogorov_crespi_z.cpp b/src/USER-MISC/pair_kolmogorov_crespi_z.cpp index ddb39f6870..15a325e106 100644 --- a/src/USER-MISC/pair_kolmogorov_crespi_z.cpp +++ b/src/USER-MISC/pair_kolmogorov_crespi_z.cpp @@ -209,7 +209,7 @@ void PairKolmogorovCrespiZ::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_lennard_mdf.cpp b/src/USER-MISC/pair_lennard_mdf.cpp index 3a81955199..b959f513c0 100644 --- a/src/USER-MISC/pair_lennard_mdf.cpp +++ b/src/USER-MISC/pair_lennard_mdf.cpp @@ -197,7 +197,7 @@ void PairLJ_AB_MDF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/USER-MISC/pair_lj_mdf.cpp b/src/USER-MISC/pair_lj_mdf.cpp index 3b52cf0b86..ebec1f80e1 100644 --- a/src/USER-MISC/pair_lj_mdf.cpp +++ b/src/USER-MISC/pair_lj_mdf.cpp @@ -197,7 +197,7 @@ void PairLJMDF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/USER-MISC/pair_lj_sf.cpp b/src/USER-MISC/pair_lj_sf.cpp index 32f45ff48d..a34119f880 100644 --- a/src/USER-MISC/pair_lj_sf.cpp +++ b/src/USER-MISC/pair_lj_sf.cpp @@ -181,7 +181,7 @@ void PairLJShiftedForce::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_lj_sf_dipole_sf.cpp b/src/USER-MISC/pair_lj_sf_dipole_sf.cpp index 33f10f2f12..fb63638b52 100644 --- a/src/USER-MISC/pair_lj_sf_dipole_sf.cpp +++ b/src/USER-MISC/pair_lj_sf_dipole_sf.cpp @@ -342,7 +342,7 @@ void PairLJSFDipoleSF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/USER-MISC/pair_meam_spline.cpp b/src/USER-MISC/pair_meam_spline.cpp index 614661ad55..0148ed51cb 100644 --- a/src/USER-MISC/pair_meam_spline.cpp +++ b/src/USER-MISC/pair_meam_spline.cpp @@ -13,6 +13,9 @@ /* ---------------------------------------------------------------------- Contributing author: Alexander Stukowski (LLNL), alex@stukowski.com + Will Tipton (Cornell), wwt26@cornell.edu + Dallas R. Trinkle (UIUC), dtrinkle@illinois.edu + Pinchao Zhang (UIUC) see LLNL copyright notice at bottom of file ------------------------------------------------------------------------- */ @@ -23,6 +26,9 @@ * 25-Mar-11 - AS: Fixed calculation of per-atom virial stress. * 11-Apr-11 - AS: Adapted code to new memory management of LAMMPS. * 24-Sep-11 - AS: Adapted code to new interface of Error::one() function. + * 20-Jun-13 - WT: Added support for multiple species types + * 25-Apr-17 - DRT/PZ: Modified format of multiple species type to + conform with pairing, updated to LAMMPS style ------------------------------------------------------------------------- */ #include @@ -49,7 +55,6 @@ PairMEAMSpline::PairMEAMSpline(LAMMPS *lmp) : Pair(lmp) single_enable = 0; restartinfo = 0; one_coeff = 1; - manybody_flag = 1; nelements = 0; elements = NULL; @@ -77,6 +82,15 @@ PairMEAMSpline::~PairMEAMSpline() if(allocated) { memory->destroy(setflag); memory->destroy(cutsq); + + delete[] phis; + delete[] Us; + delete[] rhos; + delete[] fs; + delete[] gs; + + delete[] zero_atom_energies; + delete [] map; } } @@ -85,11 +99,16 @@ PairMEAMSpline::~PairMEAMSpline() void PairMEAMSpline::compute(int eflag, int vflag) { - if (eflag || vflag) ev_setup(eflag, vflag); - else evflag = vflag_fdotr = - eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + const double* const * const x = atom->x; + double* const * const forces = atom->f; + const int ntypes = atom->ntypes; - double cutforcesq = cutoff*cutoff; + if (eflag || vflag) { + ev_setup(eflag, vflag); + } else { + evflag = vflag_fdotr = eflag_global = 0; + vflag_global = eflag_atom = vflag_atom = 0; + } // Grow per-atom array if necessary @@ -99,22 +118,13 @@ void PairMEAMSpline::compute(int eflag, int vflag) memory->create(Uprime_values,nmax,"pair:Uprime"); } - double** const x = atom->x; - double** forces = atom->f; - int nlocal = atom->nlocal; - bool newton_pair = force->newton_pair; - - int inum_full = listfull->inum; - int* ilist_full = listfull->ilist; - int* numneigh_full = listfull->numneigh; - int** firstneigh_full = listfull->firstneigh; - // Determine the maximum number of neighbors a single atom has int newMaxNeighbors = 0; - for(int ii = 0; ii < inum_full; ii++) { - int jnum = numneigh_full[ilist_full[ii]]; - if(jnum > newMaxNeighbors) newMaxNeighbors = jnum; + for(int ii = 0; ii < listfull->inum; ii++) { + int jnum = listfull->numneigh[listfull->ilist[ii]]; + if(jnum > newMaxNeighbors) + newMaxNeighbors = jnum; } // Allocate array for temporary bond info @@ -126,35 +136,35 @@ void PairMEAMSpline::compute(int eflag, int vflag) } // Sum three-body contributions to charge density and - // compute embedding energies + // the embedding energy - for(int ii = 0; ii < inum_full; ii++) { - int i = ilist_full[ii]; - double xtmp = x[i][0]; - double ytmp = x[i][1]; - double ztmp = x[i][2]; - int* jlist = firstneigh_full[i]; - int jnum = numneigh_full[i]; - double rho_value = 0; + for(int ii = 0; ii < listfull->inum; ii++) { + int i = listfull->ilist[ii]; int numBonds = 0; - MEAM2Body* nextTwoBodyInfo = twoBodyInfo; - for(int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; + // compute charge density and numBonds + MEAM2Body* nextTwoBodyInfo = twoBodyInfo; + double rho_value = 0; + const int ntypes = atom->ntypes; + const int itype = atom->type[i]; + + for(int jj = 0; jj < listfull->numneigh[i]; jj++) { + int j = listfull->firstneigh[i][jj]; j &= NEIGHMASK; - double jdelx = x[j][0] - xtmp; - double jdely = x[j][1] - ytmp; - double jdelz = x[j][2] - ztmp; + double jdelx = x[j][0] - x[i][0]; + double jdely = x[j][1] - x[i][1]; + double jdelz = x[j][2] - x[i][2]; double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz; - if(rij_sq < cutforcesq) { + if(rij_sq < cutoff*cutoff) { double rij = sqrt(rij_sq); double partial_sum = 0; + const int jtype = atom->type[j]; nextTwoBodyInfo->tag = j; nextTwoBodyInfo->r = rij; - nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime); + nextTwoBodyInfo->f = fs[i_to_potl(jtype)].eval(rij, nextTwoBodyInfo->fprime); nextTwoBodyInfo->del[0] = jdelx / rij; nextTwoBodyInfo->del[1] = jdely / rij; nextTwoBodyInfo->del[2] = jdelz / rij; @@ -164,11 +174,11 @@ void PairMEAMSpline::compute(int eflag, int vflag) double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]); - partial_sum += bondk.f * g.eval(cos_theta); + partial_sum += bondk.f * gs[ij_to_potl(jtype,atom->type[bondk.tag],ntypes)].eval(cos_theta); } rho_value += nextTwoBodyInfo->f * partial_sum; - rho_value += rho.eval(rij); + rho_value += rhos[i_to_potl(jtype)].eval(rij); numBonds++; nextTwoBodyInfo++; @@ -176,19 +186,20 @@ void PairMEAMSpline::compute(int eflag, int vflag) } // Compute embedding energy and its derivative - double Uprime_i; - double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy; + double embeddingEnergy = Us[i_to_potl(itype)].eval(rho_value, Uprime_i) + - zero_atom_energies[i_to_potl(itype)]; + Uprime_values[i] = Uprime_i; if(eflag) { - if(eflag_global) eng_vdwl += embeddingEnergy; - if(eflag_atom) eatom[i] += embeddingEnergy; + if(eflag_global) + eng_vdwl += embeddingEnergy; + if(eflag_atom) + eatom[i] += embeddingEnergy; } - double forces_i[3] = {0, 0, 0}; - // Compute three-body contributions to force - + double forces_i[3] = {0, 0, 0}; for(int jj = 0; jj < numBonds; jj++) { const MEAM2Body bondj = twoBodyInfo[jj]; double rij = bondj.r; @@ -198,6 +209,7 @@ void PairMEAMSpline::compute(int eflag, int vflag) double f_rij = bondj.f; double forces_j[3] = {0, 0, 0}; + const int jtype = atom->type[j]; MEAM2Body const* bondk = twoBodyInfo; for(int kk = 0; kk < jj; kk++, ++bondk) { @@ -207,7 +219,7 @@ void PairMEAMSpline::compute(int eflag, int vflag) bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]); double g_prime; - double g_value = g.eval(cos_theta, g_prime); + double g_value = gs[ij_to_potl(jtype,atom->type[bondk->tag],ntypes)].eval(cos_theta, g_prime); double f_rik_prime = bondk->fprime; double f_rik = bondk->f; @@ -271,40 +283,32 @@ void PairMEAMSpline::compute(int eflag, int vflag) comm->forward_comm_pair(this); - int inum_half = listhalf->inum; - int* ilist_half = listhalf->ilist; - int* numneigh_half = listhalf->numneigh; - int** firstneigh_half = listhalf->firstneigh; - // Compute two-body pair interactions + for(int ii = 0; ii < listhalf->inum; ii++) { + int i = listhalf->ilist[ii]; + const int itype = atom->type[i]; - for(int ii = 0; ii < inum_half; ii++) { - int i = ilist_half[ii]; - double xtmp = x[i][0]; - double ytmp = x[i][1]; - double ztmp = x[i][2]; - int* jlist = firstneigh_half[i]; - int jnum = numneigh_half[i]; - - for(int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; + for(int jj = 0; jj < listhalf->numneigh[i]; jj++) { + int j = listhalf->firstneigh[i][jj]; j &= NEIGHMASK; double jdel[3]; - jdel[0] = x[j][0] - xtmp; - jdel[1] = x[j][1] - ytmp; - jdel[2] = x[j][2] - ztmp; + jdel[0] = x[j][0] - x[i][0]; + jdel[1] = x[j][1] - x[i][1]; + jdel[2] = x[j][2] - x[i][2]; double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2]; - if(rij_sq < cutforcesq) { + if(rij_sq < cutoff*cutoff) { double rij = sqrt(rij_sq); + const int jtype = atom->type[j]; - double rho_prime; - rho.eval(rij, rho_prime); - double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]); - + double rho_prime_i,rho_prime_j; + rhos[i_to_potl(itype)].eval(rij,rho_prime_i); + rhos[i_to_potl(jtype)].eval(rij,rho_prime_j); + double fpair = rho_prime_j * Uprime_values[i] + rho_prime_i*Uprime_values[j]; double pair_pot_deriv; - double pair_pot = phi.eval(rij, pair_pot_deriv); + double pair_pot = phis[ij_to_potl(itype,jtype,ntypes)].eval(rij, pair_pot_deriv); + fpair += pair_pot_deriv; // Divide by r_ij to get forces from gradient @@ -317,13 +321,14 @@ void PairMEAMSpline::compute(int eflag, int vflag) forces[j][0] -= jdel[0]*fpair; forces[j][1] -= jdel[1]*fpair; forces[j][2] -= jdel[2]*fpair; - if (evflag) ev_tally(i, j, nlocal, newton_pair, + if (evflag) ev_tally(i, j, atom->nlocal, force->newton_pair, pair_pot, 0.0, -fpair, jdel[0], jdel[1], jdel[2]); } } } - if(vflag_fdotr) virial_fdotr_compute(); + if(vflag_fdotr) + virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ @@ -331,11 +336,23 @@ void PairMEAMSpline::compute(int eflag, int vflag) void PairMEAMSpline::allocate() { allocated = 1; - int n = atom->ntypes; + int n = nelements; memory->create(setflag,n+1,n+1,"pair:setflag"); memory->create(cutsq,n+1,n+1,"pair:cutsq"); + int nmultichoose2 = n*(n+1)/2; + //Change the functional form + //f_ij->f_i + //g_i(cos\theta_ijk)->g_jk(cos\theta_ijk) + phis = new SplineFunction[nmultichoose2]; + Us = new SplineFunction[n]; + rhos = new SplineFunction[n]; + fs = new SplineFunction[n]; + gs = new SplineFunction[nmultichoose2]; + + zero_atom_energies = new double[n]; + map = new int[n+1]; } @@ -356,8 +373,6 @@ void PairMEAMSpline::coeff(int narg, char **arg) { int i,j,n; - if (!allocated) allocate(); - if (narg != 3 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients"); @@ -366,45 +381,34 @@ void PairMEAMSpline::coeff(int narg, char **arg) if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) error->all(FLERR,"Incorrect args for pair coefficients"); + // read potential file: also sets the number of elements. + read_file(arg[2]); + // read args that map atom types to elements in potential file // map[i] = which element the Ith atom type is, -1 if NULL // nelements = # of unique elements // elements = list of element names - if (elements) { - for (i = 0; i < nelements; i++) delete [] elements[i]; - delete [] elements; - } - elements = new char*[atom->ntypes]; - for (i = 0; i < atom->ntypes; i++) elements[i] = NULL; - - nelements = 0; - for (i = 3; i < narg; i++) { - if (strcmp(arg[i],"NULL") == 0) { - map[i-2] = -1; - continue; - } - for (j = 0; j < nelements; j++) - if (strcmp(arg[i],elements[j]) == 0) break; - map[i-2] = j; - if (j == nelements) { - n = strlen(arg[i]) + 1; - elements[j] = new char[n]; - strcpy(elements[j],arg[i]); - nelements++; + if ((nelements == 1) && (strlen(elements[0]) == 0)) { + // old style: we only have one species, so we're either "NULL" or we match. + for (i = 3; i < narg; i++) + if (strcmp(arg[i],"NULL") == 0) + map[i-2] = -1; + else + map[i-2] = 0; + } else { + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (strcmp(arg[i],elements[j]) == 0) + break; + if (j < nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); } } - - // for now, only allow single element - - if (nelements > 1) - error->all(FLERR, - "Pair meam/spline only supports single element potentials"); - - // read potential file - - read_file(arg[2]); - // clear setflag since coeff() called once with I,J = * * n = atom->ntypes; @@ -425,65 +429,134 @@ void PairMEAMSpline::coeff(int narg, char **arg) if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - #define MAXLINE 1024 void PairMEAMSpline::read_file(const char* filename) { - if(comm->me == 0) { - FILE *fp = force->open_potential(filename); - if(fp == NULL) { - char str[1024]; - sprintf(str,"Cannot open spline MEAM potential file %s", filename); - error->one(FLERR,str); - } + int nmultichoose2; // = (n+1)*n/2; - // Skip first line of file. - char line[MAXLINE]; - fgets(line, MAXLINE, fp); + if(comm->me == 0) { + FILE *fp = force->open_potential(filename); + if(fp == NULL) { + char str[1024]; + sprintf(str,"Cannot open spline MEAM potential file %s", filename); + error->one(FLERR,str); + } - // Parse spline functions. - phi.parse(fp, error); - rho.parse(fp, error); - U.parse(fp, error); - f.parse(fp, error); - g.parse(fp, error); + // Skip first line of file. It's a comment. + char line[MAXLINE]; + char *ptr; + fgets(line, MAXLINE, fp); - fclose(fp); - } + // Second line holds potential type ("meam/spline") + // in new potential format. - // Transfer spline functions from master processor to all other processors. - phi.communicate(world, comm->me); - rho.communicate(world, comm->me); - f.communicate(world, comm->me); - U.communicate(world, comm->me); - g.communicate(world, comm->me); + bool isNewFormat = false; + fgets(line, MAXLINE, fp); + ptr = strtok(line, " \t\n\r\f"); - // Calculate 'zero-point energy' of single atom in vacuum. - zero_atom_energy = U.eval(0.0); + if (strcmp(ptr, "meam/spline") == 0) { + isNewFormat = true; + // parse the rest of the line! + ptr = strtok(NULL," \t\n\r\f"); + if (ptr == NULL) + error->one(FLERR,"Need to include number of atomic species on" + " meam/spline line in multi-element potential file"); + nelements = atoi(ptr); + if (nelements < 1) + error->one(FLERR, "Invalid number of atomic species on" + " meam/spline line in potential file"); + elements = new char*[nelements]; + for (int i=0; ione(FLERR, "Not enough atomic species in meam/spline" + " line of multi-element potential file"); + elements[i] = new char[strlen(ptr)+1]; + strcpy(elements[i], ptr); + } + } else { + isNewFormat = false; + nelements = 1; // old format only handles one species; (backwards compatibility) + elements = new char*[1]; + elements[0] = new char[1]; + strcpy(elements[0], ""); + rewind(fp); + fgets(line, MAXLINE, fp); + } - // Determine maximum cutoff radius of all relevant spline functions. - cutoff = 0.0; - if(phi.cutoff() > cutoff) cutoff = phi.cutoff(); - if(rho.cutoff() > cutoff) cutoff = rho.cutoff(); - if(f.cutoff() > cutoff) cutoff = f.cutoff(); + nmultichoose2 = ((nelements+1)*nelements)/2; + // allocate!! + allocate(); - // Set LAMMPS pair interaction flags. - for(int i = 1; i <= atom->ntypes; i++) { - for(int j = 1; j <= atom->ntypes; j++) { - setflag[i][j] = 1; - cutsq[i][j] = cutoff; - } - } + // Parse spline functions. + + for (int i = 0; i < nmultichoose2; i++) + phis[i].parse(fp, error, isNewFormat); + for (int i = 0; i < nelements; i++) + rhos[i].parse(fp, error, isNewFormat); + for (int i = 0; i < nelements; i++) + Us[i].parse(fp, error, isNewFormat); + for (int i = 0; i < nelements; i++) + fs[i].parse(fp, error, isNewFormat); + for (int i = 0; i < nmultichoose2; i++) + gs[i].parse(fp, error, isNewFormat); + + fclose(fp); + } + + // Transfer spline functions from master processor to all other processors. + MPI_Bcast(&nelements, 1, MPI_INT, 0, world); + MPI_Bcast(&nmultichoose2, 1, MPI_INT, 0, world); + // allocate!! + if (comm->me != 0) { + allocate(); + elements = new char*[nelements]; + } + for (int i = 0; i < nelements; ++i) { + int n; + if (comm->me == 0) + n = strlen(elements[i]); + MPI_Bcast(&n, 1, MPI_INT, 0, world); + if (comm->me != 0) + elements[i] = new char[n+1]; + MPI_Bcast(elements[i], n+1, MPI_CHAR, 0, world); + } + for (int i = 0; i < nmultichoose2; i++) + phis[i].communicate(world, comm->me); + for (int i = 0; i < nelements; i++) + rhos[i].communicate(world, comm->me); + for (int i = 0; i < nelements; i++) + fs[i].communicate(world, comm->me); + for (int i = 0; i < nelements; i++) + Us[i].communicate(world, comm->me); + for (int i = 0; i < nmultichoose2; i++) + gs[i].communicate(world, comm->me); + + // Calculate 'zero-point energy' of single atom in vacuum. + for (int i = 0; i < nelements; i++) + zero_atom_energies[i] = Us[i].eval(0.0); + + // Determine maximum cutoff radius of all relevant spline functions. + cutoff = 0.0; + for (int i = 0; i < nmultichoose2; i++) + if(phis[i].cutoff() > cutoff) + cutoff = phis[i].cutoff(); + for (int i = 0; i < nelements; i++) + if(rhos[i].cutoff() > cutoff) + cutoff = rhos[i].cutoff(); + for (int i = 0; i < nelements; i++) + if(fs[i].cutoff() > cutoff) + cutoff = fs[i].cutoff(); + + // Set LAMMPS pair interaction flags. + for(int i = 1; i <= atom->ntypes; i++) { + for(int j = 1; j <= atom->ntypes; j++) { + // setflag[i][j] = 1; + cutsq[i][j] = cutoff; + } + } - //phi.writeGnuplot("phi.gp", "Phi(r)"); - //rho.writeGnuplot("rho.gp", "Rho(r)"); - //f.writeGnuplot("f.gp", "f(r)"); - //U.writeGnuplot("U.gp", "U(rho)"); - //g.writeGnuplot("g.gp", "g(x)"); } /* ---------------------------------------------------------------------- @@ -491,16 +564,19 @@ void PairMEAMSpline::read_file(const char* filename) ------------------------------------------------------------------------- */ void PairMEAMSpline::init_style() { - if(force->newton_pair == 0) - error->all(FLERR,"Pair style meam/spline requires newton pair on"); + if(force->newton_pair == 0) + error->all(FLERR,"Pair style meam/spline requires newton pair on"); - // Need both full and half neighbor list. - int irequest_full = neighbor->request(this,instance_me); - neighbor->requests[irequest_full]->id = 1; - neighbor->requests[irequest_full]->half = 0; - neighbor->requests[irequest_full]->full = 1; - int irequest_half = neighbor->request(this,instance_me); - neighbor->requests[irequest_half]->id = 2; + // Need both full and half neighbor list. + int irequest_full = neighbor->request(this,instance_me); + neighbor->requests[irequest_full]->id = 1; + neighbor->requests[irequest_full]->half = 0; + neighbor->requests[irequest_full]->full = 1; + int irequest_half = neighbor->request(this,instance_me); + neighbor->requests[irequest_half]->id = 2; + // neighbor->requests[irequest_half]->half = 1; + // neighbor->requests[irequest_half]->halffull = 1; + // neighbor->requests[irequest_half]->halffulllist = irequest_full; } /* ---------------------------------------------------------------------- @@ -509,8 +585,8 @@ void PairMEAMSpline::init_style() ------------------------------------------------------------------------- */ void PairMEAMSpline::init_list(int id, NeighList *ptr) { - if(id == 1) listfull = ptr; - else if(id == 2) listhalf = ptr; + if(id == 1) listfull = ptr; + else if(id == 2) listhalf = ptr; } /* ---------------------------------------------------------------------- @@ -518,33 +594,33 @@ void PairMEAMSpline::init_list(int id, NeighList *ptr) ------------------------------------------------------------------------- */ double PairMEAMSpline::init_one(int i, int j) { - return cutoff; + return cutoff; } /* ---------------------------------------------------------------------- */ int PairMEAMSpline::pack_forward_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) + int pbc_flag, int *pbc) { - int* list_iter = list; - int* list_iter_end = list + n; - while(list_iter != list_iter_end) - *buf++ = Uprime_values[*list_iter++]; - return n; + int* list_iter = list; + int* list_iter_end = list + n; + while(list_iter != list_iter_end) + *buf++ = Uprime_values[*list_iter++]; + return n; } /* ---------------------------------------------------------------------- */ void PairMEAMSpline::unpack_forward_comm(int n, int first, double *buf) { - memcpy(&Uprime_values[first], buf, n * sizeof(buf[0])); + memcpy(&Uprime_values[first], buf, n * sizeof(buf[0])); } /* ---------------------------------------------------------------------- */ int PairMEAMSpline::pack_reverse_comm(int n, int first, double *buf) { - return 0; + return 0; } /* ---------------------------------------------------------------------- */ @@ -558,141 +634,148 @@ void PairMEAMSpline::unpack_reverse_comm(int n, int *list, double *buf) ------------------------------------------------------------------------- */ double PairMEAMSpline::memory_usage() { - return nmax * sizeof(double); // The Uprime_values array. + return nmax * sizeof(double); // The Uprime_values array. } /// Parses the spline knots from a text file. -void PairMEAMSpline::SplineFunction::parse(FILE* fp, Error* error) +void PairMEAMSpline::SplineFunction::parse(FILE* fp, Error* error, + bool isNewFormat) { - char line[MAXLINE]; + char line[MAXLINE]; - // Parse number of spline knots. - fgets(line, MAXLINE, fp); - int n = atoi(line); - if(n < 2) - error->one(FLERR,"Invalid number of spline knots in MEAM potential file"); + // If new format, read the spline format. Should always be "spline3eq" for now. + if (isNewFormat) + fgets(line, MAXLINE, fp); - // Parse first derivatives at beginning and end of spline. - fgets(line, MAXLINE, fp); - double d0 = atof(strtok(line, " \t\n\r\f")); - double dN = atof(strtok(NULL, " \t\n\r\f")); - init(n, d0, dN); + // Parse number of spline knots. + fgets(line, MAXLINE, fp); + int n = atoi(line); + if(n < 2) + error->one(FLERR,"Invalid number of spline knots in MEAM potential file"); - // Skip line. - fgets(line, MAXLINE, fp); + // Parse first derivatives at beginning and end of spline. + fgets(line, MAXLINE, fp); + double d0 = atof(strtok(line, " \t\n\r\f")); + double dN = atof(strtok(NULL, " \t\n\r\f")); + init(n, d0, dN); - // Parse knot coordinates. - for(int i=0; ione(FLERR,"Invalid knot line in MEAM potential file"); - } - setKnot(i, x, y); - } + // Skip line in old format + if (!isNewFormat) + fgets(line, MAXLINE, fp); - prepareSpline(error); + // Parse knot coordinates. + for(int i=0; ione(FLERR,"Invalid knot line in MEAM potential file"); + } + setKnot(i, x, y); + } + + prepareSpline(error); } /// Calculates the second derivatives at the knots of the cubic spline. void PairMEAMSpline::SplineFunction::prepareSpline(Error* error) { - xmin = X[0]; - xmax = X[N-1]; + xmin = X[0]; + xmax = X[N-1]; - isGridSpline = true; - h = (xmax-xmin)/(N-1); - hsq = h*h; + isGridSpline = true; + h = (xmax-xmin)/(N-1); + hsq = h*h; - double* u = new double[N]; - Y2[0] = -0.5; - u[0] = (3.0/(X[1]-X[0])) * ((Y[1]-Y[0])/(X[1]-X[0]) - deriv0); - for(int i = 1; i <= N-2; i++) { - double sig = (X[i]-X[i-1]) / (X[i+1]-X[i-1]); - double p = sig * Y2[i-1] + 2.0; - Y2[i] = (sig - 1.0) / p; - u[i] = (Y[i+1]-Y[i]) / (X[i+1]-X[i]) - (Y[i]-Y[i-1])/(X[i]-X[i-1]); - u[i] = (6.0 * u[i]/(X[i+1]-X[i-1]) - sig*u[i-1])/p; + double* u = new double[N]; + Y2[0] = -0.5; + u[0] = (3.0/(X[1]-X[0])) * ((Y[1]-Y[0])/(X[1]-X[0]) - deriv0); + for(int i = 1; i <= N-2; i++) { + double sig = (X[i]-X[i-1]) / (X[i+1]-X[i-1]); + double p = sig * Y2[i-1] + 2.0; + Y2[i] = (sig - 1.0) / p; + u[i] = (Y[i+1]-Y[i]) / (X[i+1]-X[i]) - (Y[i]-Y[i-1])/(X[i]-X[i-1]); + u[i] = (6.0 * u[i]/(X[i+1]-X[i-1]) - sig*u[i-1])/p; - if(fabs(h*i+xmin - X[i]) > 1e-8) - isGridSpline = false; - } + if(fabs(h*i+xmin - X[i]) > 1e-8) + isGridSpline = false; + } - double qn = 0.5; - double un = (3.0/(X[N-1]-X[N-2])) * (derivN - (Y[N-1]-Y[N-2])/(X[N-1]-X[N-2])); - Y2[N-1] = (un - qn*u[N-2]) / (qn * Y2[N-2] + 1.0); - for(int k = N-2; k >= 0; k--) { - Y2[k] = Y2[k] * Y2[k+1] + u[k]; - } + double qn = 0.5; + double un = (3.0/(X[N-1]-X[N-2])) * (derivN - (Y[N-1]-Y[N-2])/(X[N-1]-X[N-2])); + Y2[N-1] = (un - qn*u[N-2]) / (qn * Y2[N-2] + 1.0); + for(int k = N-2; k >= 0; k--) { + Y2[k] = Y2[k] * Y2[k+1] + u[k]; + } - delete[] u; + delete[] u; #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES - if(!isGridSpline) - error->one(FLERR,"Support for MEAM potentials with non-uniform cubic splines has not been enabled in the MEAM potential code. Set SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES in pair_spline_meam.h to 1 to enable it"); + if(!isGridSpline) + error->one(FLERR,"Support for MEAM potentials with non-uniform cubic splines has not been enabled in the MEAM potential code. Set SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES in pair_spline_meam.h to 1 to enable it"); #endif - // Shift the spline to X=0 to speed up interpolation. - for(int i = 0; i < N; i++) { - Xs[i] = X[i] - xmin; + // Shift the spline to X=0 to speed up interpolation. + for(int i = 0; i < N; i++) { + Xs[i] = X[i] - xmin; #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES - if(i < N-1) Ydelta[i] = (Y[i+1]-Y[i])/h; - Y2[i] /= h*6.0; + if(i < N-1) Ydelta[i] = (Y[i+1]-Y[i])/h; + Y2[i] /= h*6.0; #endif - } - xmax_shifted = xmax - xmin; + } + xmax_shifted = xmax - xmin; } /// Broadcasts the spline function parameters to all processors. void PairMEAMSpline::SplineFunction::communicate(MPI_Comm& world, int me) { - MPI_Bcast(&N, 1, MPI_INT, 0, world); - MPI_Bcast(&deriv0, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&derivN, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&xmin, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&xmax, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&xmax_shifted, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&isGridSpline, 1, MPI_INT, 0, world); - MPI_Bcast(&h, 1, MPI_DOUBLE, 0, world); - MPI_Bcast(&hsq, 1, MPI_DOUBLE, 0, world); - if(me != 0) { - X = new double[N]; - Xs = new double[N]; - Y = new double[N]; - Y2 = new double[N]; - Ydelta = new double[N]; - } - MPI_Bcast(X, N, MPI_DOUBLE, 0, world); - MPI_Bcast(Xs, N, MPI_DOUBLE, 0, world); - MPI_Bcast(Y, N, MPI_DOUBLE, 0, world); - MPI_Bcast(Y2, N, MPI_DOUBLE, 0, world); - MPI_Bcast(Ydelta, N, MPI_DOUBLE, 0, world); + MPI_Bcast(&N, 1, MPI_INT, 0, world); + MPI_Bcast(&deriv0, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&derivN, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&xmin, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&xmax, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&xmax_shifted, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&isGridSpline, 1, MPI_INT, 0, world); + MPI_Bcast(&h, 1, MPI_DOUBLE, 0, world); + MPI_Bcast(&hsq, 1, MPI_DOUBLE, 0, world); + if(me != 0) { + X = new double[N]; + Xs = new double[N]; + Y = new double[N]; + Y2 = new double[N]; + Ydelta = new double[N]; + } + MPI_Bcast(X, N, MPI_DOUBLE, 0, world); + MPI_Bcast(Xs, N, MPI_DOUBLE, 0, world); + MPI_Bcast(Y, N, MPI_DOUBLE, 0, world); + MPI_Bcast(Y2, N, MPI_DOUBLE, 0, world); + MPI_Bcast(Ydelta, N, MPI_DOUBLE, 0, world); } /// Writes a Gnuplot script that plots the spline function. /// /// This function is for debugging only! -void PairMEAMSpline::SplineFunction::writeGnuplot(const char* filename, const char* title) const +void PairMEAMSpline::SplineFunction::writeGnuplot(const char* filename, + const char* title) const { - FILE* fp = fopen(filename, "w"); - fprintf(fp, "#!/usr/bin/env gnuplot\n"); - if(title) fprintf(fp, "set title \"%s\"\n", title); - double tmin = X[0] - (X[N-1] - X[0]) * 0.05; - double tmax = X[N-1] + (X[N-1] - X[0]) * 0.05; - double delta = (tmax - tmin) / (N*200); - fprintf(fp, "set xrange [%f:%f]\n", tmin, tmax); - fprintf(fp, "plot '-' with lines notitle, '-' with points notitle pt 3 lc 3\n"); - for(double x = tmin; x <= tmax+1e-8; x += delta) { - double y = eval(x); - fprintf(fp, "%f %f\n", x, y); - } - fprintf(fp, "e\n"); - for(int i = 0; i < N; i++) { - fprintf(fp, "%f %f\n", X[i], Y[i]); - } - fprintf(fp, "e\n"); - fclose(fp); + FILE* fp = fopen(filename, "w"); + fprintf(fp, "#!/usr/bin/env gnuplot\n"); + if(title) fprintf(fp, "set title \"%s\"\n", title); + double tmin = X[0] - (X[N-1] - X[0]) * 0.05; + double tmax = X[N-1] + (X[N-1] - X[0]) * 0.05; + double delta = (tmax - tmin) / (N*200); + fprintf(fp, "set xrange [%f:%f]\n", tmin, tmax); + fprintf(fp, "plot '-' with lines notitle, '-' with points notitle pt 3 lc 3\n"); + for(double x = tmin; x <= tmax+1e-8; x += delta) { + double y = eval(x); + fprintf(fp, "%f %f\n", x, y); + } + fprintf(fp, "e\n"); + for(int i = 0; i < N; i++) { + fprintf(fp, "%f %f\n", X[i], Y[i]); + } + fprintf(fp, "e\n"); + fclose(fp); } /* ---------------------------------------------------------------------- @@ -734,3 +817,5 @@ void PairMEAMSpline::SplineFunction::writeGnuplot(const char* filename, const ch * Lawrence Livermore National Security, LLC, and shall not be used for * advertising or product endorsement purposes. ------------------------------------------------------------------------- */ + + diff --git a/src/USER-MISC/pair_meam_spline.h b/src/USER-MISC/pair_meam_spline.h index d16a321cb6..6200254674 100644 --- a/src/USER-MISC/pair_meam_spline.h +++ b/src/USER-MISC/pair_meam_spline.h @@ -1,4 +1,4 @@ -/* -*- c++ -*- ---------------------------------------------------------- +/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -28,209 +28,230 @@ PairStyle(meam/spline,PairMEAMSpline) namespace LAMMPS_NS { -/// Set this to 1 if you intend to use MEAM potentials with non-uniform spline knots. -/// Set this to 0 if you intend to use only MEAM potentials with spline knots on a uniform grid. -/// -/// With SUPPORT_NON_GRID_SPLINES == 0, the code runs about 50% faster. +// Set this to 1 if you intend to use MEAM potentials with +// non-uniform spline knots. +// Set this to 0 if you intend to use only MEAM potentials with +// spline knots on a uniform grid. +// +// With SUPPORT_NON_GRID_SPLINES == 0, the code runs about 50% faster. #define SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES 0 class PairMEAMSpline : public Pair { public: - PairMEAMSpline(class LAMMPS *); - virtual ~PairMEAMSpline(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_style(); - void init_list(int, class NeighList *); - double init_one(int, int); + PairMEAMSpline(class LAMMPS *); + virtual ~PairMEAMSpline(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void get_coeff(double *, double *); + double pair_density(int ); + double three_body_density(int ); + void init_style(); + void init_list(int, class NeighList *); + double init_one(int, int); - int pack_forward_comm(int, int *, double *, int, int *); - void unpack_forward_comm(int, int, double *); - int pack_reverse_comm(int, int, double *); - void unpack_reverse_comm(int, int *, double *); - double memory_usage(); + // helper functions for compute() + + int ij_to_potl(const int itype, const int jtype, const int ntypes) const { + return jtype - 1 + (itype-1)*ntypes - (itype-1)*itype/2; + } + int i_to_potl(const int itype) const { return itype-1; } + + + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + double memory_usage(); protected: char **elements; // names of unique elements int *map; // mapping from atom types to elements int nelements; // # of unique elements - class SplineFunction { - public: + class SplineFunction { + public: + /// Default constructor. + SplineFunction() : X(NULL), Xs(NULL), Y(NULL), Y2(NULL), Ydelta(NULL), N(0) {} - /// Default constructor. - SplineFunction() : X(NULL), Xs(NULL), Y(NULL), Y2(NULL), Ydelta(NULL), N(0) {} + /// Destructor. + ~SplineFunction() { + delete[] X; + delete[] Xs; + delete[] Y; + delete[] Y2; + delete[] Ydelta; + } - /// Destructor. - ~SplineFunction() { - delete[] X; - delete[] Xs; - delete[] Y; - delete[] Y2; - delete[] Ydelta; - } + /// Initialization of spline function. + void init(int _N, double _deriv0, double _derivN) { + N = _N; + deriv0 = _deriv0; + derivN = _derivN; + // if (X) delete[] X; + // if (Xs) delete[] Xs; + // if (Y) delete[] Y; + // if (Y2) delete[] Y2; + // if (Ydelta) delete[] Ydelta; + X = new double[N]; + Xs = new double[N]; + Y = new double[N]; + Y2 = new double[N]; + Ydelta = new double[N]; + } - /// Initialization of spline function. - void init(int _n, double _deriv0, double _derivN) { - N = _n; - deriv0 = _deriv0; - derivN = _derivN; - delete[] X; - delete[] Xs; - delete[] Y; - delete[] Y2; - delete[] Ydelta; - X = new double[N]; - Xs = new double[N]; - Y = new double[N]; - Y2 = new double[N]; - Ydelta = new double[N]; - } + /// Adds a knot to the spline. + void setKnot(int n, double x, double y) { X[n] = x; Y[n] = y; } - /// Adds a knot to the spline. - void setKnot(int n, double x, double y) { X[n] = x; Y[n] = y; } + /// Returns the number of knots. + int numKnots() const { return N; } - /// Returns the number of knots. - int numKnots() const { return N; } + /// Parses the spline knots from a text file. + void parse(FILE* fp, Error* error, bool isNewFormat); - /// Parses the spline knots from a text file. - void parse(FILE* fp, Error* error); + /// Calculates the second derivatives of the cubic spline. + void prepareSpline(Error* error); - /// Calculates the second derivatives of the cubic spline. - void prepareSpline(Error* error); - - /// Evaluates the spline function at position x. - inline double eval(double x) const - { - x -= xmin; - if(x <= 0.0) { // Left extrapolation. - return Y[0] + deriv0 * x; - } - else if(x >= xmax_shifted) { // Right extrapolation. - return Y[N-1] + derivN * (x - xmax_shifted); - } - else { + /// Evaluates the spline function at position x. + inline double eval(double x) const + { + x -= xmin; + if(x <= 0.0) { // Left extrapolation. + return Y[0] + deriv0 * x; + } + else if(x >= xmax_shifted) { // Right extrapolation. + return Y[N-1] + derivN * (x - xmax_shifted); + } + else { #if SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES - // Do interval search. - int klo = 0; - int khi = N-1; - while(khi - klo > 1) { - int k = (khi + klo) / 2; - if(Xs[k] > x) khi = k; - else klo = k; - } - double h = Xs[khi] - Xs[klo]; - // Do spline interpolation. - double a = (Xs[khi] - x)/h; - double b = 1.0 - a; // = (x - X[klo])/h - return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi])*(h*h)/6.0; + // Do interval search. + int klo = 0; + int khi = N-1; + while(khi - klo > 1) { + int k = (khi + klo) / 2; + if(Xs[k] > x) khi = k; + else klo = k; + } + double h = Xs[khi] - Xs[klo]; + // Do spline interpolation. + double a = (Xs[khi] - x)/h; + double b = 1.0 - a; // = (x - X[klo])/h + return a * Y[klo] + b * Y[khi] + + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi])*(h*h)/6.0; #else - // For a spline with grid points, we can directly calculate the interval X is in. - int klo = (int)(x / h); - int khi = klo + 1; - double a = Xs[khi] - x; - double b = h - a; - return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]); + // For a spline with regular grid, we directly calculate the interval X is in. + int klo = (int)(x / h); + int khi = klo + 1; + double a = Xs[khi] - x; + double b = h - a; + return Y[khi] - a * Ydelta[klo] + + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]); #endif - } - } + } + } - /// Evaluates the spline function and its first derivative at position x. - inline double eval(double x, double& deriv) const - { - x -= xmin; - if(x <= 0.0) { // Left extrapolation. - deriv = deriv0; - return Y[0] + deriv0 * x; - } - else if(x >= xmax_shifted) { // Right extrapolation. - deriv = derivN; - return Y[N-1] + derivN * (x - xmax_shifted); - } - else { + /// Evaluates the spline function and its first derivative at position x. + inline double eval(double x, double& deriv) const + { + x -= xmin; + if(x <= 0.0) { // Left extrapolation. + deriv = deriv0; + return Y[0] + deriv0 * x; + } + else if(x >= xmax_shifted) { // Right extrapolation. + deriv = derivN; + return Y[N-1] + derivN * (x - xmax_shifted); + } + else { #if SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES - // Do interval search. - int klo = 0; - int khi = N-1; - while(khi - klo > 1) { - int k = (khi + klo) / 2; - if(Xs[k] > x) khi = k; - else klo = k; - } - double h = Xs[khi] - Xs[klo]; - // Do spline interpolation. - double a = (Xs[khi] - x)/h; - double b = 1.0 - a; // = (x - X[klo])/h - deriv = (Y[khi] - Y[klo]) / h + ((3.0*b*b - 1.0) * Y2[khi] - (3.0*a*a - 1.0) * Y2[klo]) * h / 6.0; - return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi]) * (h*h) / 6.0; + // Do interval search. + int klo = 0; + int khi = N-1; + while(khi - klo > 1) { + int k = (khi + klo) / 2; + if(Xs[k] > x) khi = k; + else klo = k; + } + double h = Xs[khi] - Xs[klo]; + // Do spline interpolation. + double a = (Xs[khi] - x)/h; + double b = 1.0 - a; // = (x - X[klo])/h + deriv = (Y[khi] - Y[klo]) / h + + ((3.0*b*b - 1.0) * Y2[khi] - + (3.0*a*a - 1.0) * Y2[klo]) * h / 6.0; + return a * Y[klo] + b * Y[khi] + + ((a*a*a - a) * Y2[klo] + + (b*b*b - b) * Y2[khi]) * (h*h) / 6.0; #else - // For a spline with grid points, we can directly calculate the interval X is in. - int klo = (int)(x / h); - int khi = klo + 1; - double a = Xs[khi] - x; - double b = h - a; - deriv = Ydelta[klo] + ((3.0*b*b - hsq) * Y2[khi] - (3.0*a*a - hsq) * Y2[klo]); - return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]); + // For a spline with regular grid, we directly calculate the interval X is in. + int klo = (int)(x / h); + int khi = klo + 1; + double a = Xs[khi] - x; + double b = h - a; + deriv = Ydelta[klo] + ((3.0*b*b - hsq) * Y2[khi] + - (3.0*a*a - hsq) * Y2[klo]); + return Y[khi] - a * Ydelta[klo] + + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]); #endif - } - } + } + } - /// Returns the number of bytes used by this function object. - double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; } + /// Returns the number of bytes used by this function object. + double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; } - /// Returns the cutoff radius of this function. - double cutoff() const { return X[N-1]; } + /// Returns the cutoff radius of this function. + double cutoff() const { return X[N-1]; } - /// Writes a Gnuplot script that plots the spline function. - void writeGnuplot(const char* filename, const char* title = NULL) const; + /// Writes a Gnuplot script that plots the spline function. + void writeGnuplot(const char* filename, const char* title = NULL) const; - /// Broadcasts the spline function parameters to all processors. - void communicate(MPI_Comm& world, int me); + /// Broadcasts the spline function parameters to all processors. + void communicate(MPI_Comm& world, int me); - private: - double* X; // Positions of spline knots - double* Xs; // Shifted positions of spline knots - double* Y; // Function values at spline knots - double* Y2; // Second derivatives at spline knots - double* Ydelta; // If this is a grid spline, Ydelta[i] = (Y[i+1]-Y[i])/h - int N; // Number of spline knots - double deriv0; // First derivative at knot 0 - double derivN; // First derivative at knot (N-1) - double xmin; // The beginning of the interval on which the spline function is defined. - double xmax; // The end of the interval on which the spline function is defined. - int isGridSpline; // Indicates that all spline knots are on a regular grid. - double h; // The distance between knots if this is a grid spline with equidistant knots. - double hsq; // The squared distance between knots if this is a grid spline with equidistant knots. - double xmax_shifted; // The end of the spline interval after it has been shifted to begin at X=0. - }; + private: + double* X; // Positions of spline knots + double* Xs; // Shifted positions of spline knots + double* Y; // Function values at spline knots + double* Y2; // Second derivatives at spline knots + double* Ydelta; // If this is a grid spline, Ydelta[i] = (Y[i+1]-Y[i])/h + int N; // Number of spline knots + double deriv0; // First derivative at knot 0 + double derivN; // First derivative at knot (N-1) + double xmin; // The beginning of the interval on which the spline function is defined. + double xmax; // The end of the interval on which the spline function is defined. + int isGridSpline;// Indicates that all spline knots are on a regular grid. + double h; // The distance between knots if this is a grid spline with equidistant knots. + double hsq; // The squared distance between knots if this is a grid spline with equidistant knots. + double xmax_shifted; // The end of the spline interval after it has been shifted to begin at X=0. + }; - /// Helper data structure for potential routine. - struct MEAM2Body { - int tag; - double r; - double f, fprime; - double del[3]; - }; + /// Helper data structure for potential routine. + struct MEAM2Body { + int tag; // holds the index of the second atom (j) + double r; + double f, fprime; + double del[3]; + }; - SplineFunction phi; // Phi(r_ij) - SplineFunction rho; // Rho(r_ij) - SplineFunction f; // f(r_ij) - SplineFunction U; // U(rho) - SplineFunction g; // g(cos_theta) - double zero_atom_energy; // Shift embedding energy by this value to make it zero for a single atom in vacuum. + SplineFunction* phis; // Phi_i(r_ij) + SplineFunction* rhos; // Rho_ij(r_ij) + SplineFunction* fs; // f_i(r_ij) + SplineFunction* Us; // U_i(rho) + SplineFunction* gs; // g_ij(cos_theta) + double* zero_atom_energies; // Shift embedding energy by this value to make it zero for a single atom in vacuum. - double cutoff; // The cutoff radius + double cutoff; // The cutoff radius - double* Uprime_values; // Used for temporary storage of U'(rho) values - int nmax; // Size of temporary array. - int maxNeighbors; // The last maximum number of neighbors a single atoms has. - MEAM2Body* twoBodyInfo; // Temporary array. + double* Uprime_values; // Used for temporary storage of U'(rho) values + int nmax; // Size of temporary array. + int maxNeighbors; // The last maximum number of neighbors a single atoms has. + MEAM2Body* twoBodyInfo; // Temporary array. + + void read_file(const char* filename); + void allocate(); - void read_file(const char* filename); - void allocate(); }; } @@ -279,3 +300,5 @@ protected: * * See file 'pair_spline_meam.cpp' for history of changes. ------------------------------------------------------------------------- */ + + diff --git a/src/USER-MISC/pair_momb.cpp b/src/USER-MISC/pair_momb.cpp index b7337c17a8..0d8d2e060e 100644 --- a/src/USER-MISC/pair_momb.cpp +++ b/src/USER-MISC/pair_momb.cpp @@ -199,7 +199,7 @@ void PairMomb::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MISC/pair_morse_smooth_linear.cpp b/src/USER-MISC/pair_morse_smooth_linear.cpp index ea33510b58..3e776e7e1c 100644 --- a/src/USER-MISC/pair_morse_smooth_linear.cpp +++ b/src/USER-MISC/pair_morse_smooth_linear.cpp @@ -104,7 +104,7 @@ void PairMorseSmoothLinear::compute(int eflag, int vflag) dexp = exp(-alpha[itype][jtype] * dr); fpartial = morse1[itype][jtype] * (dexp*dexp - dexp) / r; - fpair = factor_lj * ( fpartial - der_at_cutoff[itype][jtype] / r); + fpair = factor_lj * ( fpartial + der_at_cutoff[itype][jtype] / r); f[i][0] += delx*fpair; f[i][1] += dely*fpair; @@ -118,7 +118,7 @@ void PairMorseSmoothLinear::compute(int eflag, int vflag) if (eflag) { evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) - offset[itype][jtype]; - evdwl += ( r - cut[itype][jtype] ) * der_at_cutoff[itype][jtype]; + evdwl -= ( r - cut[itype][jtype] ) * der_at_cutoff[itype][jtype]; evdwl *= factor_lj; } @@ -171,7 +171,7 @@ void PairMorseSmoothLinear::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } @@ -349,10 +349,11 @@ double PairMorseSmoothLinear::single(int i, int j, int itype, int jtype, double r = sqrt(rsq); dr = r - r0[itype][jtype]; dexp = exp(-alpha[itype][jtype] * dr); - fforce = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r; + fforce = factor_lj * (morse1[itype][jtype] * (dexp*dexp - dexp) + + der_at_cutoff[itype][jtype]) / r; phi = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) - offset[itype][jtype]; - dr = cut[itype][jtype] - r0[itype][jtype]; + dr = cut[itype][jtype] - r; phi += dr * der_at_cutoff[itype][jtype]; return factor_lj*phi; diff --git a/src/USER-MISC/pair_srp.cpp b/src/USER-MISC/pair_srp.cpp index 18ea4dc332..46c53349fa 100644 --- a/src/USER-MISC/pair_srp.cpp +++ b/src/USER-MISC/pair_srp.cpp @@ -408,7 +408,7 @@ void PairSRP::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= bptype; i++) - for (j = i+1; j <= bptype; j++) + for (j = i; j <= bptype; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/USER-MOLFILE/Install.sh b/src/USER-MOLFILE/Install.sh index 19fd3bd361..85885f66b9 100644 --- a/src/USER-MOLFILE/Install.sh +++ b/src/USER-MOLFILE/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-MOLFILE/README b/src/USER-MOLFILE/README index f6defed6ae..4437b587e4 100644 --- a/src/USER-MOLFILE/README +++ b/src/USER-MOLFILE/README @@ -2,8 +2,8 @@ This package provides a C++ interface class to the VMD molfile plugins, http://www.ks.uiuc.edu/Research/vmd/plugins/molfile, and a set of LAMMPS classes that use this interface. -Molfile plugins provide a consistent programming interface to read and -write file formats commonly used in molecular simulations. This +Molfile plugins provide a consistent programming interface to read +and write file formats commonly used in molecular simulations. This package only provides the interface code, not the plugins; these can be taken as precompiled binaries directly from a VMD installation that matches the platform of your LAMMPS executable. Using the plugin @@ -18,18 +18,5 @@ LAMMPS, you need to link with an appropriate system library, which is done using the settings in lib/molfile/Makefile.lammps. See that file and the lib/molfile/README file for more details. -NOTE: while the programming interface (API) to the molfile plugins is -backward compatible (i.e. you can expect to be able to compile this -package for plugins from newer VMD packages), the binary interface -(ABI) is not. So it is necessary to compile this package with the -molfile plugin header files (vmdplugin.h and molfile_plugin.h) taken -from the _same_ VMD installation that the (binary) plugin files are -taken from. These header files can be found inside the VMD -installation tree under: "plugins/include". - -For convenience, this package includes a set of header files that is -compatible with VMD 1.9 and 1.9.1 (the current version in June 2012) -and should be compilable with VMD versions back to about version 1.8.4 - The person who created this package is Axel Kohlmeyer at Temple U (akohlmey at gmail.com). Contact him directly if you have questions. diff --git a/src/USER-NC-DUMP/Install.sh b/src/USER-NETCDF/Install.sh similarity index 97% rename from src/USER-NC-DUMP/Install.sh rename to src/USER-NETCDF/Install.sh index 37ebd0a0a5..4d21f0f894 100644 --- a/src/USER-NC-DUMP/Install.sh +++ b/src/USER-NETCDF/Install.sh @@ -27,7 +27,7 @@ action () { } for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-NC-DUMP/README b/src/USER-NETCDF/README similarity index 95% rename from src/USER-NC-DUMP/README rename to src/USER-NETCDF/README index c02e879c61..57dec5e4c8 100644 --- a/src/USER-NC-DUMP/README +++ b/src/USER-NETCDF/README @@ -1,7 +1,7 @@ -USER-NC-DUMP +USER-NETCDF ============ -This package provides the nc and (optionally) the nc/mpiio dump styles. +This package provides the netcf and netcdf/mpiio dump styles. See the doc page for dump nc or dump nc/mpiio command for how to use them. Compiling these dump styles requires having the netCDF library installed on your system. See lib/netcdf/README for additional details. diff --git a/src/USER-NC-DUMP/dump_nc.cpp b/src/USER-NETCDF/dump_netcdf.cpp similarity index 97% rename from src/USER-NC-DUMP/dump_nc.cpp rename to src/USER-NETCDF/dump_netcdf.cpp index 7a66eb0224..bad90bdef3 100644 --- a/src/USER-NC-DUMP/dump_nc.cpp +++ b/src/USER-NETCDF/dump_netcdf.cpp @@ -32,14 +32,14 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + #if defined(LMP_HAS_NETCDF) #include #include #include - #include - +#include "dump_netcdf.h" #include "atom.h" #include "comm.h" #include "compute.h" @@ -56,8 +56,6 @@ #include "variable.h" #include "force.h" -#include "dump_nc.h" - using namespace LAMMPS_NS; using namespace MathConst; @@ -91,7 +89,7 @@ const int THIS_IS_A_BIGINT = -4; /* ---------------------------------------------------------------------- */ -DumpNC::DumpNC(LAMMPS *lmp, int narg, char **arg) : +DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) : DumpCustom(lmp, narg, arg) { // arrays for data rearrangement @@ -224,7 +222,7 @@ DumpNC::DumpNC(LAMMPS *lmp, int narg, char **arg) : /* ---------------------------------------------------------------------- */ -DumpNC::~DumpNC() +DumpNetCDF::~DumpNetCDF() { closefile(); @@ -238,7 +236,7 @@ DumpNC::~DumpNC() /* ---------------------------------------------------------------------- */ -void DumpNC::openfile() +void DumpNetCDF::openfile() { // now the computes and fixes have been initialized, so we can query // for the size of vector quantities @@ -594,12 +592,12 @@ void DumpNC::openfile() /* ---------------------------------------------------------------------- */ -void DumpNC::closefile() +void DumpNetCDF::closefile() { if (filewriter && singlefile_opened) { NCERR( nc_close(ncid) ); singlefile_opened = 0; - // append next time DumpNC::openfile is called + // append next time DumpNetCDF::openfile is called append_flag = 1; // write to next frame upon next open framei++; @@ -608,7 +606,7 @@ void DumpNC::closefile() /* ---------------------------------------------------------------------- */ -void DumpNC::write() +void DumpNetCDF::write() { // open file @@ -678,7 +676,7 @@ void DumpNC::write() /* ---------------------------------------------------------------------- */ -void DumpNC::write_header(bigint n) +void DumpNetCDF::write_header(bigint n) { size_t start[2]; @@ -753,7 +751,7 @@ void DumpNC::write_header(bigint n) write head of block (mass & element name) only if has atoms of the type ------------------------------------------------------------------------- */ -void DumpNC::write_data(int n, double *mybuf) +void DumpNetCDF::write_data(int n, double *mybuf) { size_t start[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS]; ptrdiff_t stride[NC_MAX_VAR_DIMS]; @@ -761,18 +759,17 @@ void DumpNC::write_data(int n, double *mybuf) if (!int_buffer) { n_buffer = n; int_buffer = (int *) - memory->smalloc(n*sizeof(int), "DumpNC::int_buffer"); + memory->smalloc(n*sizeof(int),"dump::int_buffer"); double_buffer = (double *) - memory->smalloc(n*sizeof(double), "DumpNC::double_buffer"); + memory->smalloc(n*sizeof(double),"dump::double_buffer"); } if (n > n_buffer) { n_buffer = n; int_buffer = (int *) - memory->srealloc(int_buffer, n*sizeof(int), "DumpNC::int_buffer"); + memory->srealloc(int_buffer, n*sizeof(int),"dump::int_buffer"); double_buffer = (double *) - memory->srealloc(double_buffer, n*sizeof(double), - "DumpNC::double_buffer"); + memory->srealloc(double_buffer, n*sizeof(double),"dump::double_buffer"); } start[0] = framei-1; @@ -887,7 +884,7 @@ void DumpNC::write_data(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -int DumpNC::modify_param(int narg, char **arg) +int DumpNetCDF::modify_param(int narg, char **arg) { int iarg = 0; if (strcmp(arg[iarg],"double") == 0) { @@ -925,17 +922,17 @@ int DumpNC::modify_param(int narg, char **arg) if (!strcmp(arg[iarg],"step")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNC::compute_step; + perframe[i].compute = &DumpNetCDF::compute_step; strcpy(perframe[i].name, arg[iarg]); } else if (!strcmp(arg[iarg],"elapsed")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNC::compute_elapsed; + perframe[i].compute = &DumpNetCDF::compute_elapsed; strcpy(perframe[i].name, arg[iarg]); } else if (!strcmp(arg[iarg],"elaplong")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNC::compute_elapsed_long; + perframe[i].compute = &DumpNetCDF::compute_elapsed_long; strcpy(perframe[i].name, arg[iarg]); } else { @@ -1036,7 +1033,7 @@ int DumpNC::modify_param(int narg, char **arg) /* ---------------------------------------------------------------------- */ -void DumpNC::write_prmtop() +void DumpNetCDF::write_prmtop() { char fn[1024]; char tmp[81]; @@ -1098,7 +1095,7 @@ void DumpNC::write_prmtop() /* ---------------------------------------------------------------------- */ -void DumpNC::ncerr(int err, const char *descr, int line) +void DumpNetCDF::ncerr(int err, const char *descr, int line) { if (err != NC_NOERR) { char errstr[1024]; @@ -1122,21 +1119,21 @@ void DumpNC::ncerr(int err, const char *descr, int line) customize a new keyword by adding a method ------------------------------------------------------------------------- */ -void DumpNC::compute_step(void *r) +void DumpNetCDF::compute_step(void *r) { *((bigint *) r) = update->ntimestep; } /* ---------------------------------------------------------------------- */ -void DumpNC::compute_elapsed(void *r) +void DumpNetCDF::compute_elapsed(void *r) { *((bigint *) r) = update->ntimestep - update->firststep; } /* ---------------------------------------------------------------------- */ -void DumpNC::compute_elapsed_long(void *r) +void DumpNetCDF::compute_elapsed_long(void *r) { *((bigint *) r) = update->ntimestep - update->beginstep; } diff --git a/src/USER-NC-DUMP/dump_nc.h b/src/USER-NETCDF/dump_netcdf.h similarity index 94% rename from src/USER-NC-DUMP/dump_nc.h rename to src/USER-NETCDF/dump_netcdf.h index 788a9368f9..daf4e9d0de 100644 --- a/src/USER-NC-DUMP/dump_nc.h +++ b/src/USER-NETCDF/dump_netcdf.h @@ -32,16 +32,17 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + #if defined(LMP_HAS_NETCDF) #ifdef DUMP_CLASS -DumpStyle(nc,DumpNC) +DumpStyle(netcdf,DumpNetCDF) #else -#ifndef LMP_DUMP_NC_H -#define LMP_DUMP_NC_H +#ifndef LMP_DUMP_NETCDF_H +#define LMP_DUMP_NETCDFC_H #include "dump_custom.h" @@ -50,10 +51,10 @@ namespace LAMMPS_NS { const int NC_FIELD_NAME_MAX = 100; const int DUMP_NC_MAX_DIMS = 100; -class DumpNC : public DumpCustom { +class DumpNetCDF : public DumpCustom { public: - DumpNC(class LAMMPS *, int, char **); - virtual ~DumpNC(); + DumpNetCDF(class LAMMPS *, int, char **); + virtual ~DumpNetCDF(); virtual void write(); private: @@ -68,7 +69,7 @@ class DumpNC : public DumpCustom { int ndumped; // number of enties written for this prop. }; - typedef void (DumpNC::*funcptr_t)(void *); + typedef void (DumpNetCDF::*funcptr_t)(void *); // per-frame quantities (variables, fixes or computes) struct nc_perframe_t { diff --git a/src/USER-NC-DUMP/dump_nc_mpiio.cpp b/src/USER-NETCDF/dump_netcdf_mpiio.cpp similarity index 96% rename from src/USER-NC-DUMP/dump_nc_mpiio.cpp rename to src/USER-NETCDF/dump_netcdf_mpiio.cpp index 6b26014030..2e9ec274a5 100644 --- a/src/USER-NC-DUMP/dump_nc_mpiio.cpp +++ b/src/USER-NETCDF/dump_netcdf_mpiio.cpp @@ -32,14 +32,14 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + #if defined(LMP_HAS_PNETCDF) #include #include #include - #include - +#include "dump_netcdf_mpiio.h" #include "atom.h" #include "comm.h" #include "compute.h" @@ -56,8 +56,6 @@ #include "variable.h" #include "force.h" -#include "dump_nc_mpiio.h" - using namespace LAMMPS_NS; using namespace MathConst; @@ -91,7 +89,7 @@ const int THIS_IS_A_BIGINT = -4; /* ---------------------------------------------------------------------- */ -DumpNCMPIIO::DumpNCMPIIO(LAMMPS *lmp, int narg, char **arg) : +DumpNetCDFMPIIO::DumpNetCDFMPIIO(LAMMPS *lmp, int narg, char **arg) : DumpCustom(lmp, narg, arg) { // arrays for data rearrangement @@ -217,7 +215,7 @@ DumpNCMPIIO::DumpNCMPIIO(LAMMPS *lmp, int narg, char **arg) : /* ---------------------------------------------------------------------- */ -DumpNCMPIIO::~DumpNCMPIIO() +DumpNetCDFMPIIO::~DumpNetCDFMPIIO() { closefile(); @@ -231,7 +229,7 @@ DumpNCMPIIO::~DumpNCMPIIO() /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::openfile() +void DumpNetCDFMPIIO::openfile() { // now the computes and fixes have been initialized, so we can query // for the size of vector quantities @@ -570,12 +568,12 @@ void DumpNCMPIIO::openfile() /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::closefile() +void DumpNetCDFMPIIO::closefile() { if (singlefile_opened) { NCERR( ncmpi_close(ncid) ); singlefile_opened = 0; - // append next time DumpNCMPIIO::openfile is called + // append next time DumpNetCDFMPIIO::openfile is called append_flag = 1; // write to next frame upon next open framei++; @@ -584,7 +582,7 @@ void DumpNCMPIIO::closefile() /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::write() +void DumpNetCDFMPIIO::write() { // open file @@ -687,7 +685,7 @@ void DumpNCMPIIO::write() /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::write_time_and_cell() +void DumpNetCDFMPIIO::write_time_and_cell() { MPI_Offset start[2]; @@ -759,7 +757,7 @@ void DumpNCMPIIO::write_time_and_cell() write head of block (mass & element name) only if has atoms of the type ------------------------------------------------------------------------- */ -void DumpNCMPIIO::write_data(int n, double *mybuf) +void DumpNetCDFMPIIO::write_data(int n, double *mybuf) { MPI_Offset start[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS]; MPI_Offset stride[NC_MAX_VAR_DIMS]; @@ -767,19 +765,18 @@ void DumpNCMPIIO::write_data(int n, double *mybuf) if (!int_buffer) { n_buffer = std::max(1, n); int_buffer = (int *) - memory->smalloc(n_buffer*sizeof(int), "DumpNCMPIIO::int_buffer"); + memory->smalloc(n_buffer*sizeof(int),"dump::int_buffer"); double_buffer = (double *) - memory->smalloc(n_buffer*sizeof(double), "DumpNCMPIIO::double_buffer"); + memory->smalloc(n_buffer*sizeof(double),"dump::double_buffer"); } if (n > n_buffer) { n_buffer = std::max(1, n); int_buffer = (int *) - memory->srealloc(int_buffer, n_buffer*sizeof(int), - "DumpNCMPIIO::int_buffer"); + memory->srealloc(int_buffer, n_buffer*sizeof(int),"dump::int_buffer"); double_buffer = (double *) memory->srealloc(double_buffer, n_buffer*sizeof(double), - "DumpNCMPIIO::double_buffer"); + "dump::double_buffer"); } start[0] = framei-1; @@ -882,7 +879,7 @@ void DumpNCMPIIO::write_data(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -int DumpNCMPIIO::modify_param(int narg, char **arg) +int DumpNetCDFMPIIO::modify_param(int narg, char **arg) { int iarg = 0; if (strcmp(arg[iarg],"double") == 0) { @@ -920,17 +917,17 @@ int DumpNCMPIIO::modify_param(int narg, char **arg) if (!strcmp(arg[iarg],"step")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNCMPIIO::compute_step; + perframe[i].compute = &DumpNetCDFMPIIO::compute_step; strcpy(perframe[i].name, arg[iarg]); } else if (!strcmp(arg[iarg],"elapsed")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNCMPIIO::compute_elapsed; + perframe[i].compute = &DumpNetCDFMPIIO::compute_elapsed; strcpy(perframe[i].name, arg[iarg]); } else if (!strcmp(arg[iarg],"elaplong")) { perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNCMPIIO::compute_elapsed_long; + perframe[i].compute = &DumpNetCDFMPIIO::compute_elapsed_long; strcpy(perframe[i].name, arg[iarg]); } else { @@ -1031,7 +1028,7 @@ int DumpNCMPIIO::modify_param(int narg, char **arg) /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::ncerr(int err, const char *descr, int line) +void DumpNetCDFMPIIO::ncerr(int err, const char *descr, int line) { if (err != NC_NOERR) { char errstr[1024]; @@ -1055,21 +1052,21 @@ void DumpNCMPIIO::ncerr(int err, const char *descr, int line) customize a new keyword by adding a method ------------------------------------------------------------------------- */ -void DumpNCMPIIO::compute_step(void *r) +void DumpNetCDFMPIIO::compute_step(void *r) { *((bigint *) r) = update->ntimestep; } /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::compute_elapsed(void *r) +void DumpNetCDFMPIIO::compute_elapsed(void *r) { *((bigint *) r) = update->ntimestep - update->firststep; } /* ---------------------------------------------------------------------- */ -void DumpNCMPIIO::compute_elapsed_long(void *r) +void DumpNetCDFMPIIO::compute_elapsed_long(void *r) { *((bigint *) r) = update->ntimestep - update->beginstep; } diff --git a/src/USER-NC-DUMP/dump_nc_mpiio.h b/src/USER-NETCDF/dump_netcdf_mpiio.h similarity index 95% rename from src/USER-NC-DUMP/dump_nc_mpiio.h rename to src/USER-NETCDF/dump_netcdf_mpiio.h index 5e36335e64..6f5b00b033 100644 --- a/src/USER-NC-DUMP/dump_nc_mpiio.h +++ b/src/USER-NETCDF/dump_netcdf_mpiio.h @@ -32,16 +32,17 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + #if defined(LMP_HAS_PNETCDF) #ifdef DUMP_CLASS -DumpStyle(nc/mpiio,DumpNCMPIIO) +DumpStyle(netcdf/mpiio,DumpNetCDFMPIIO) #else -#ifndef LMP_DUMP_NC_MPIIO_H -#define LMP_DUMP_NC_MPIIO_H +#ifndef LMP_DUMP_NETCDF_MPIIO_H +#define LMP_DUMP_NETCDF_MPIIO_H #include "dump_custom.h" @@ -50,10 +51,10 @@ namespace LAMMPS_NS { const int NC_MPIIO_FIELD_NAME_MAX = 100; const int DUMP_NC_MPIIO_MAX_DIMS = 100; -class DumpNCMPIIO : public DumpCustom { +class DumpNetCDFMPIIO : public DumpCustom { public: - DumpNCMPIIO(class LAMMPS *, int, char **); - virtual ~DumpNCMPIIO(); + DumpNetCDFMPIIO(class LAMMPS *, int, char **); + virtual ~DumpNetCDFMPIIO(); virtual void write(); private: diff --git a/src/USER-OMP/angle_sdk_omp.h b/src/USER-OMP/angle_sdk_omp.h index 9ab75904ce..c041c2ecc2 100644 --- a/src/USER-OMP/angle_sdk_omp.h +++ b/src/USER-OMP/angle_sdk_omp.h @@ -18,7 +18,6 @@ #ifdef ANGLE_CLASS AngleStyle(sdk/omp,AngleSDKOMP) -AngleStyle(cg/cmm/omp,AngleSDKOMP) #else diff --git a/src/USER-OMP/fix_nh_asphere_omp.cpp b/src/USER-OMP/fix_nh_asphere_omp.cpp index f11b1f5e94..e710e6f12e 100644 --- a/src/USER-OMP/fix_nh_asphere_omp.cpp +++ b/src/USER-OMP/fix_nh_asphere_omp.cpp @@ -183,12 +183,13 @@ void FixNHAsphereOMP::nh_v_temp() #pragma omp parallel for default(none) private(i) schedule(static) #endif for (i = 0; i < nlocal; i++) { + double buf[3]; if (mask[i] & groupbit) { - temperature->remove_bias(i,&v[i].x); + temperature->remove_bias_thr(i,&v[i].x,buf); v[i].x *= factor_eta; v[i].y *= factor_eta; v[i].z *= factor_eta; - temperature->restore_bias(i,&v[i].x); + temperature->restore_bias_thr(i,&v[i].x,buf); angmom[i].x *= factor_eta; angmom[i].y *= factor_eta; angmom[i].z *= factor_eta; diff --git a/src/USER-OMP/fix_nh_omp.cpp b/src/USER-OMP/fix_nh_omp.cpp index e77f18304d..ccb6090378 100644 --- a/src/USER-OMP/fix_nh_omp.cpp +++ b/src/USER-OMP/fix_nh_omp.cpp @@ -261,8 +261,9 @@ void FixNHOMP::nh_v_press() #pragma omp parallel for default(none) private(i) schedule(static) #endif for (i = 0; i < nlocal; i++) { + double buf[3]; if (mask[i] & groupbit) { - temperature->remove_bias(i,&v[i].x); + temperature->remove_bias_thr(i,&v[i].x,buf); v[i].x *= factor0; v[i].y *= factor1; v[i].z *= factor2; @@ -273,7 +274,7 @@ void FixNHOMP::nh_v_press() v[i].x *= factor0; v[i].y *= factor1; v[i].z *= factor2; - temperature->restore_bias(i,&v[i].x); + temperature->restore_bias_thr(i,&v[i].x,buf); } } } @@ -373,12 +374,13 @@ void FixNHOMP::nh_v_temp() #pragma omp parallel for default(none) private(i) schedule(static) #endif for (i = 0; i < nlocal; i++) { + double buf[3]; if (mask[i] & groupbit) { - temperature->remove_bias(i,&v[i].x); + temperature->remove_bias_thr(i,&v[i].x,buf); v[i].x *= factor_eta; v[i].y *= factor_eta; v[i].z *= factor_eta; - temperature->restore_bias(i,&v[i].x); + temperature->restore_bias_thr(i,&v[i].x,buf); } } } diff --git a/src/USER-OMP/fix_nh_sphere_omp.cpp b/src/USER-OMP/fix_nh_sphere_omp.cpp index cd06c581ca..dd0530dcfa 100644 --- a/src/USER-OMP/fix_nh_sphere_omp.cpp +++ b/src/USER-OMP/fix_nh_sphere_omp.cpp @@ -137,12 +137,13 @@ void FixNHSphereOMP::nh_v_temp() #pragma omp parallel for default(none) private(i) schedule(static) #endif for (i = 0; i < nlocal; i++) { + double buf[3]; if (mask[i] & groupbit) { - temperature->remove_bias(i,&v[i].x); + temperature->remove_bias_thr(i,&v[i].x,buf); v[i].x *= factor_eta; v[i].y *= factor_eta; v[i].z *= factor_eta; - temperature->restore_bias(i,&v[i].x); + temperature->restore_bias_thr(i,&v[i].x,buf); omega[i].x *= factor_eta; omega[i].y *= factor_eta; omega[i].z *= factor_eta; diff --git a/src/USER-OMP/fix_nvt_sllod_omp.cpp b/src/USER-OMP/fix_nvt_sllod_omp.cpp index f233dc459b..a829d49c0f 100644 --- a/src/USER-OMP/fix_nvt_sllod_omp.cpp +++ b/src/USER-OMP/fix_nvt_sllod_omp.cpp @@ -121,16 +121,16 @@ void FixNVTSllodOMP::nh_v_temp() #pragma omp parallel for default(none) private(i) shared(h_two) schedule(static) #endif for (i = 0; i < nlocal; i++) { - double vdelu0,vdelu1,vdelu2; + double vdelu0,vdelu1,vdelu2,buf[3]; if (mask[i] & groupbit) { vdelu0 = h_two[0]*v[i].x + h_two[5]*v[i].y + h_two[4]*v[i].z; vdelu1 = h_two[1]*v[i].y + h_two[3]*v[i].z; vdelu2 = h_two[2]*v[i].z; - temperature->remove_bias(i,&v[i].x); + temperature->remove_bias_thr(i,&v[i].x,buf); v[i].x = v[i].x*factor_eta - dthalf*vdelu0; v[i].y = v[i].y*factor_eta - dthalf*vdelu1; v[i].z = v[i].z*factor_eta - dthalf*vdelu2; - temperature->restore_bias(i,&v[i].x); + temperature->restore_bias_thr(i,&v[i].x,buf); } } } diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp index bd7593c51a..4eadc83183 100644 --- a/src/USER-OMP/improper_ring_omp.cpp +++ b/src/USER-OMP/improper_ring_omp.cpp @@ -206,7 +206,7 @@ void ImproperRingOMP::eval(int nfrom, int nto, ThrData * const thr) cfact2 = ckjji / ckjkj; cfact3 = ckjji / cjiji; - /* Calculate the force acted on the thrid atom of the angle. */ + /* Calculate the force acted on the third atom of the angle. */ fkx = cfact2 * bvec2x[icomb] - bvec1x[icomb]; fky = cfact2 * bvec2y[icomb] - bvec1y[icomb]; fkz = cfact2 * bvec2z[icomb] - bvec1z[icomb]; diff --git a/src/USER-OMP/npair_full_bin_ghost_omp.cpp b/src/USER-OMP/npair_full_bin_ghost_omp.cpp index 7f7239fe63..b915aca002 100644 --- a/src/USER-OMP/npair_full_bin_ghost_omp.cpp +++ b/src/USER-OMP/npair_full_bin_ghost_omp.cpp @@ -97,7 +97,7 @@ void NPairFullBinGhostOmp::build(NeighList *list) // no molecular test when i = ghost atom if (i < nlocal) { - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (i == j) continue; diff --git a/src/USER-OMP/npair_full_bin_omp.cpp b/src/USER-OMP/npair_full_bin_omp.cpp index ad9e48784e..e1f75c06e2 100644 --- a/src/USER-OMP/npair_full_bin_omp.cpp +++ b/src/USER-OMP/npair_full_bin_omp.cpp @@ -90,7 +90,7 @@ void NPairFullBinOmp::build(NeighList *list) // loop over all atoms in surrounding bins in stencil including self // skip i = j - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/USER-OMP/npair_full_multi_omp.cpp b/src/USER-OMP/npair_full_multi_omp.cpp index eb0153d63f..9bc196e17a 100644 --- a/src/USER-OMP/npair_full_multi_omp.cpp +++ b/src/USER-OMP/npair_full_multi_omp.cpp @@ -94,7 +94,7 @@ void NPairFullMultiOmp::build(NeighList *list) // skip if i,j neighbor cutoff is less than bin distance // skip i = j - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp b/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp index e46afebb0d..05763c3d68 100644 --- a/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp @@ -103,7 +103,7 @@ void NPairHalfBinNewtoffGhostOmp::build(NeighList *list) // no molecular test when i = ghost atom if (i < nlocal) { - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/USER-OMP/npair_half_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_bin_newtoff_omp.cpp index 99698b1d30..ff74b54d7d 100644 --- a/src/USER-OMP/npair_half_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newtoff_omp.cpp @@ -94,7 +94,7 @@ void NPairHalfBinNewtoffOmp::build(NeighList *list) // stores own/own pairs only once // stores own/ghost pairs on both procs - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/USER-OMP/npair_half_bin_newton_omp.cpp b/src/USER-OMP/npair_half_bin_newton_omp.cpp index 33d78fe55a..f7d969ba27 100644 --- a/src/USER-OMP/npair_half_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newton_omp.cpp @@ -130,7 +130,7 @@ void NPairHalfBinNewtonOmp::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; diff --git a/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp index 9eb9612235..c843d623cd 100644 --- a/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp @@ -94,7 +94,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/USER-OMP/npair_half_multi_newtoff_omp.cpp b/src/USER-OMP/npair_half_multi_newtoff_omp.cpp index 37dc805857..705d1b8d9f 100644 --- a/src/USER-OMP/npair_half_multi_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newtoff_omp.cpp @@ -97,7 +97,7 @@ void NPairHalfMultiNewtoffOmp::build(NeighList *list) // stores own/own pairs only once // stores own/ghost pairs on both procs - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/USER-OMP/npair_half_multi_newton_omp.cpp b/src/USER-OMP/npair_half_multi_newton_omp.cpp index 9719911afa..f16dd027a0 100644 --- a/src/USER-OMP/npair_half_multi_newton_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newton_omp.cpp @@ -131,7 +131,7 @@ void NPairHalfMultiNewtonOmp::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair // skip if i,j neighbor cutoff is less than bin distance - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp b/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp index 717a709386..f66cf194e7 100644 --- a/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp @@ -99,7 +99,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp index 287f11efa7..12780fa4a3 100644 --- a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp @@ -117,7 +117,7 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list) xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; if (moltemplate) { imol = molindex[i]; iatom = molatom[i]; diff --git a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp index 30256bd20d..b9a6364242 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp @@ -176,7 +176,7 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; diff --git a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp index 27d02000c5..bc03972d85 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp @@ -128,7 +128,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp index 699d347db5..dbb62e96ef 100644 --- a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp @@ -113,7 +113,7 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) ytmp = x[i][1]; ztmp = x[i][2]; radi = radius[i]; - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; // loop over all atoms in surrounding bins in stencil including self // only store pair if i < j diff --git a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp index 0d7e4e68da..2c26c7952c 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp @@ -168,7 +168,7 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; diff --git a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp index 7463a6aba6..bf273f545f 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp @@ -84,7 +84,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp index 84821f1c8c..f3aa9986fe 100644 --- a/src/USER-OMP/pair_airebo_omp.cpp +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -1622,7 +1622,7 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, (1.0-tspjik)*(1.0-tspijl); aaa1 = -prefactor*(1.0-square(om1234)) * (1.0-tspjik)*(1.0-tspijl); - aaa2 = aaa1*w21*w34; + aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34; at2 = aa*cwnum; fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + @@ -2550,7 +2550,7 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag (1.0-tspjik)*(1.0-tspijl); aaa1 = -prefactor*(1.0-square(om1234)) * (1.0-tspjik)*(1.0-tspijl); - aaa2 = aaa1*w21*w34; + aaa2 = -prefactor*(1.0-square(om1234)) * w21*w34; at2 = aa*cwnum; fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h index a615efb507..1886d2c7b5 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/coul/long/omp,PairLJSDKCoulLongOMP) -PairStyle(cg/cmm/coul/long/omp,PairLJSDKCoulLongOMP) #else diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h index 9e4a922c39..9841408b8a 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h +++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/coul/msm/omp,PairLJSDKCoulMSMOMP) -PairStyle(cg/cmm/coul/msm/omp,PairLJSDKCoulMSMOMP) #else @@ -54,4 +53,4 @@ E: Must use 'kspace_modify pressure/scalar no' with OMP MSM Pair styles The kspace scalar pressure option is not (yet) compatible with OMP MSM Pair styles. -*/ \ No newline at end of file +*/ diff --git a/src/USER-OMP/pair_lj_sdk_omp.h b/src/USER-OMP/pair_lj_sdk_omp.h index c3837fb683..36c913252a 100644 --- a/src/USER-OMP/pair_lj_sdk_omp.h +++ b/src/USER-OMP/pair_lj_sdk_omp.h @@ -18,7 +18,6 @@ #ifdef PAIR_CLASS PairStyle(lj/sdk/omp,PairLJSDKOMP) -PairStyle(cg/cmm/omp,PairLJSDKOMP) #else diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp index 98e1541319..4333d3b2a9 100644 --- a/src/USER-OMP/pair_meam_spline_omp.cpp +++ b/src/USER-OMP/pair_meam_spline_omp.cpp @@ -110,6 +110,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) const int nthreads = comm->nthreads; const int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; + const int ntypes = atom->ntypes; const double cutforcesq = cutoff*cutoff; @@ -135,33 +136,38 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz; if (rij_sq < cutforcesq) { + const int jtype = atom->type[j]; const double rij = sqrt(rij_sq); double partial_sum = 0; nextTwoBodyInfo->tag = j; nextTwoBodyInfo->r = rij; - nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime); + nextTwoBodyInfo->f = fs[i_to_potl(jtype)].eval(rij, nextTwoBodyInfo->fprime); nextTwoBodyInfo->del[0] = jdelx / rij; nextTwoBodyInfo->del[1] = jdely / rij; nextTwoBodyInfo->del[2] = jdelz / rij; for(int kk = 0; kk < numBonds; kk++) { const MEAM2Body& bondk = myTwoBodyInfo[kk]; - double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]); - partial_sum += bondk.f * g.eval(cos_theta); + double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + + nextTwoBodyInfo->del[1]*bondk.del[1] + + nextTwoBodyInfo->del[2]*bondk.del[2]); + partial_sum += bondk.f * gs[ij_to_potl(jtype,atom->type[bondk.tag],ntypes)].eval(cos_theta); } rho_value += nextTwoBodyInfo->f * partial_sum; - rho_value += rho.eval(rij); + rho_value += rhos[i_to_potl(jtype)].eval(rij); numBonds++; nextTwoBodyInfo++; } } + const int itype = atom->type[i]; // Compute embedding energy and its derivative. double Uprime_i; - double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy; + double embeddingEnergy = Us[i_to_potl(itype)].eval(rho_value, Uprime_i) + - zero_atom_energies[i_to_potl(itype)]; Uprime_thr[i] = Uprime_i; if (EFLAG) e_tally_thr(this,i,i,nlocal,1/*newton_pair*/,embeddingEnergy,0.0,thr); @@ -173,6 +179,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) const MEAM2Body bondj = myTwoBodyInfo[jj]; const double rij = bondj.r; const int j = bondj.tag; + const int jtype = atom->type[j]; const double f_rij_prime = bondj.fprime; const double f_rij = bondj.f; @@ -187,7 +194,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) + bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]); double g_prime; - double g_value = g.eval(cos_theta, g_prime); + double g_value = gs[ij_to_potl(jtype,atom->type[bondk->tag],ntypes)].eval(cos_theta, g_prime); const double f_rik_prime = bondk->fprime; const double f_rik = bondk->f; @@ -279,6 +286,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) const double ztmp = x[i][2]; const int* const jlist = firstneigh_half[i]; const int jnum = numneigh_half[i]; + const int itype = atom->type[i]; for(int jj = 0; jj < jnum; jj++) { const int j = jlist[jj] & NEIGHMASK; @@ -291,13 +299,16 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) if(rij_sq < cutforcesq) { double rij = sqrt(rij_sq); + const int jtype = atom->type[j]; - double rho_prime; - rho.eval(rij, rho_prime); - double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]); + double rho_prime_i,rho_prime_j; + rhos[i_to_potl(itype)].eval(rij,rho_prime_i); + rhos[i_to_potl(jtype)].eval(rij,rho_prime_j); + double fpair = rho_prime_j * Uprime_values[i] + rho_prime_i*Uprime_values[j]; double pair_pot_deriv; - double pair_pot = phi.eval(rij, pair_pot_deriv); + double pair_pot = phis[ij_to_potl(itype,jtype,ntypes)].eval(rij, pair_pot_deriv); + fpair += pair_pot_deriv; // Divide by r_ij to get forces from gradient. diff --git a/src/USER-QMMM/Install.sh b/src/USER-QMMM/Install.sh index 089b880a77..4bede66d80 100755 --- a/src/USER-QMMM/Install.sh +++ b/src/USER-QMMM/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-QUIP/Install.sh b/src/USER-QUIP/Install.sh index ee7faaf62a..20174e664a 100644 --- a/src/USER-QUIP/Install.sh +++ b/src/USER-QUIP/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-REAXC/compute_spec_atom.cpp b/src/USER-REAXC/compute_spec_atom.cpp index 4af8efcae7..164ce87205 100644 --- a/src/USER-REAXC/compute_spec_atom.cpp +++ b/src/USER-REAXC/compute_spec_atom.cpp @@ -24,7 +24,7 @@ #include "reaxc_defs.h" #include "reaxc_types.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" using namespace LAMMPS_NS; @@ -71,7 +71,7 @@ ComputeSpecAtom::ComputeSpecAtom(LAMMPS *lmp, int narg, char **arg) : } else if (strcmp(arg[iarg],"vz") == 0) { pack_choice[i] = &ComputeSpecAtom::pack_vz; - // from pair_reax_c + // from pair_reaxc } else if (strcmp(arg[iarg],"abo01") == 0) { pack_choice[i] = &ComputeSpecAtom::pack_abo01; } else if (strcmp(arg[iarg],"abo02") == 0) { diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 26cf03f60a..96df03c668 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -23,7 +23,7 @@ #include #include #include "fix_qeq_reax.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "atom.h" #include "comm.h" #include "domain.h" @@ -375,7 +375,7 @@ void FixQEqReax::init_shielding() ntypes = atom->ntypes; if (shld == NULL) - memory->create(shld,ntypes+1,ntypes+1,"qeq:shileding"); + memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); for( i = 1; i <= ntypes; ++i ) for( j = 1; j <= ntypes; ++j ) @@ -416,9 +416,6 @@ void FixQEqReax::init_taper() void FixQEqReax::setup_pre_force(int vflag) { - // should not be needed - // neighbor->build_one(list); - deallocate_storage(); allocate_storage(); diff --git a/src/USER-REAXC/fix_reax_c.cpp b/src/USER-REAXC/fix_reaxc.cpp similarity index 99% rename from src/USER-REAXC/fix_reax_c.cpp rename to src/USER-REAXC/fix_reaxc.cpp index e1cc4e340e..df06217993 100644 --- a/src/USER-REAXC/fix_reax_c.cpp +++ b/src/USER-REAXC/fix_reaxc.cpp @@ -21,7 +21,7 @@ Algorithmic Techniques", Parallel Computing, in press. ------------------------------------------------------------------------- */ -#include "fix_reax_c.h" +#include "fix_reaxc.h" #include "atom.h" #include "pair.h" #include "comm.h" diff --git a/src/USER-REAXC/fix_reax_c.h b/src/USER-REAXC/fix_reaxc.h similarity index 100% rename from src/USER-REAXC/fix_reax_c.h rename to src/USER-REAXC/fix_reaxc.h diff --git a/src/USER-REAXC/fix_reaxc_bonds.cpp b/src/USER-REAXC/fix_reaxc_bonds.cpp index 543669de76..fe830b508e 100644 --- a/src/USER-REAXC/fix_reaxc_bonds.cpp +++ b/src/USER-REAXC/fix_reaxc_bonds.cpp @@ -21,7 +21,7 @@ #include "fix_reaxc_bonds.h" #include "atom.h" #include "update.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" @@ -58,7 +58,21 @@ FixReaxCBonds::FixReaxCBonds(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Illegal fix reax/c/bonds command"); if (me == 0) { - fp = fopen(arg[4],"w"); + char *suffix = strrchr(arg[4],'.'); + if (suffix && strcmp(suffix,".gz") == 0) { +#ifdef LAMMPS_GZIP + char gzip[128]; + sprintf(gzip,"gzip -6 > %s",arg[4]); +#ifdef _WIN32 + fp = _popen(gzip,"wb"); +#else + fp = popen(gzip,"w"); +#endif +#else + error->one(FLERR,"Cannot open gzipped file"); +#endif + } else fp = fopen(arg[4],"w"); + if (fp == NULL) { char str[128]; sprintf(str,"Cannot open fix reax/c/bonds file %s",arg[4]); diff --git a/src/USER-REAXC/fix_reaxc_species.cpp b/src/USER-REAXC/fix_reaxc_species.cpp index ead73f02a1..fe74337128 100644 --- a/src/USER-REAXC/fix_reaxc_species.cpp +++ b/src/USER-REAXC/fix_reaxc_species.cpp @@ -24,7 +24,7 @@ #include "fix_reaxc_species.h" #include "domain.h" #include "update.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" @@ -110,7 +110,21 @@ FixReaxCSpecies::FixReaxCSpecies(LAMMPS *lmp, int narg, char **arg) : strcpy(tmparg[2],arg[5]); if (me == 0) { - fp = fopen(arg[6],"w"); + char *suffix = strrchr(arg[6],'.'); + if (suffix && strcmp(suffix,".gz") == 0) { +#ifdef LAMMPS_GZIP + char gzip[128]; + sprintf(gzip,"gzip -6 > %s",arg[6]); +#ifdef _WIN32 + fp = _popen(gzip,"wb"); +#else + fp = popen(gzip,"w"); +#endif +#else + error->one(FLERR,"Cannot open gzipped file"); +#endif + } else fp = fopen(arg[6],"w"); + if (fp == NULL) { char str[128]; sprintf(str,"Cannot open fix reax/c/species file %s",arg[6]); @@ -486,7 +500,7 @@ void FixReaxCSpecies::Output_ReaxC_Bonds(bigint ntimestep, FILE *fp) AtomCoord FixReaxCSpecies::chAnchor(AtomCoord in1, AtomCoord in2) { - if (in1.x < in2.x) + if (in1.x <= in2.x) return in1; return in2; } diff --git a/src/USER-REAXC/fix_reaxc_species.h b/src/USER-REAXC/fix_reaxc_species.h index 872ea2528f..563a10f39d 100644 --- a/src/USER-REAXC/fix_reaxc_species.h +++ b/src/USER-REAXC/fix_reaxc_species.h @@ -23,7 +23,7 @@ FixStyle(reax/c/species,FixReaxCSpecies) #include "fix.h" #include "pointers.h" -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_types.h" #include "reaxc_defs.h" diff --git a/src/USER-REAXC/pair_reax_c.cpp b/src/USER-REAXC/pair_reaxc.cpp similarity index 97% rename from src/USER-REAXC/pair_reax_c.cpp rename to src/USER-REAXC/pair_reaxc.cpp index 4933c90f01..d51b0fc2f8 100644 --- a/src/USER-REAXC/pair_reax_c.cpp +++ b/src/USER-REAXC/pair_reaxc.cpp @@ -20,7 +20,7 @@ Hybrid and hybrid/overlay compatibility added by Ray Shan (Sandia) ------------------------------------------------------------------------- */ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "atom.h" #include "update.h" #include "force.h" @@ -30,7 +30,7 @@ #include "neigh_request.h" #include "modify.h" #include "fix.h" -#include "fix_reax_c.h" +#include "fix_reaxc.h" #include "citeme.h" #include "memory.h" #include "error.h" @@ -223,10 +223,11 @@ void PairReaxC::settings(int narg, char **arg) qeqflag = 1; control->lgflag = 0; + control->enobondsflag = 1; system->mincap = MIN_CAP; system->safezone = SAFE_ZONE; system->saferzone = SAFER_ZONE; - + // process optional keywords int iarg = 1; @@ -238,7 +239,13 @@ void PairReaxC::settings(int narg, char **arg) else if (strcmp(arg[iarg+1],"no") == 0) qeqflag = 0; else error->all(FLERR,"Illegal pair_style reax/c command"); iarg += 2; - } else if (strcmp(arg[iarg],"lgvdw") == 0) { + } else if (strcmp(arg[iarg],"enobonds") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command"); + if (strcmp(arg[iarg+1],"yes") == 0) control->enobondsflag = 1; + else if (strcmp(arg[iarg+1],"no") == 0) control->enobondsflag = 0; + else error->all(FLERR,"Illegal pair_style reax/c command"); + iarg += 2; + } else if (strcmp(arg[iarg],"lgvdw") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command"); if (strcmp(arg[iarg+1],"yes") == 0) control->lgflag = 1; else if (strcmp(arg[iarg+1],"no") == 0) control->lgflag = 0; diff --git a/src/USER-REAXC/pair_reax_c.h b/src/USER-REAXC/pair_reaxc.h similarity index 100% rename from src/USER-REAXC/pair_reax_c.h rename to src/USER-REAXC/pair_reaxc.h diff --git a/src/USER-REAXC/reaxc_allocate.cpp b/src/USER-REAXC/reaxc_allocate.cpp index dc8545e006..969912e082 100644 --- a/src/USER-REAXC/reaxc_allocate.cpp +++ b/src/USER-REAXC/reaxc_allocate.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_allocate.h" #include "reaxc_list.h" #include "reaxc_reset_tools.h" diff --git a/src/USER-REAXC/reaxc_bond_orders.cpp b/src/USER-REAXC/reaxc_bond_orders.cpp index 0b4ca21adf..04cedf18a8 100644 --- a/src/USER-REAXC/reaxc_bond_orders.cpp +++ b/src/USER-REAXC/reaxc_bond_orders.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_types.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_bonds.cpp b/src/USER-REAXC/reaxc_bonds.cpp index e0ef38ba0f..a8a1298166 100644 --- a/src/USER-REAXC/reaxc_bonds.cpp +++ b/src/USER-REAXC/reaxc_bonds.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_bonds.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_control.cpp b/src/USER-REAXC/reaxc_control.cpp index 3753360c68..4def41bc8c 100644 --- a/src/USER-REAXC/reaxc_control.cpp +++ b/src/USER-REAXC/reaxc_control.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_control.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_defs.h b/src/USER-REAXC/reaxc_defs.h index d0a75d431b..101b554fb2 100644 --- a/src/USER-REAXC/reaxc_defs.h +++ b/src/USER-REAXC/reaxc_defs.h @@ -116,8 +116,8 @@ #define MAX_BOND 20 -#define MAXREAXBOND 24 /* used in fix_reaxc_bonds.cpp and pair_reax_c.cpp */ -#define MAXSPECBOND 24 /* used in fix_reaxc_species.cpp and pair_reax_c.cpp */ +#define MAXREAXBOND 24 /* used in fix_reaxc_bonds.cpp and pair_reaxc.cpp */ +#define MAXSPECBOND 24 /* used in fix_reaxc_species.cpp and pair_reaxc.cpp */ /******************* ENUMERATIONS *************************/ enum geo_formats { CUSTOM, PDB, ASCII_RESTART, BINARY_RESTART, GF_N }; diff --git a/src/USER-REAXC/reaxc_ffield.cpp b/src/USER-REAXC/reaxc_ffield.cpp index fda2841403..58a347ebf7 100644 --- a/src/USER-REAXC/reaxc_ffield.cpp +++ b/src/USER-REAXC/reaxc_ffield.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "error.h" #include "reaxc_ffield.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_forces.cpp b/src/USER-REAXC/reaxc_forces.cpp index 7f11f5565f..215ded6e5d 100644 --- a/src/USER-REAXC/reaxc_forces.cpp +++ b/src/USER-REAXC/reaxc_forces.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_forces.h" #include "reaxc_bond_orders.h" #include "reaxc_bonds.h" diff --git a/src/USER-REAXC/reaxc_hydrogen_bonds.cpp b/src/USER-REAXC/reaxc_hydrogen_bonds.cpp index 8d7b3b3819..ff771ad65b 100644 --- a/src/USER-REAXC/reaxc_hydrogen_bonds.cpp +++ b/src/USER-REAXC/reaxc_hydrogen_bonds.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_hydrogen_bonds.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_init_md.cpp b/src/USER-REAXC/reaxc_init_md.cpp index f912c95ea5..b11cdd2fbc 100644 --- a/src/USER-REAXC/reaxc_init_md.cpp +++ b/src/USER-REAXC/reaxc_init_md.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_init_md.h" #include "reaxc_allocate.h" #include "reaxc_forces.h" diff --git a/src/USER-REAXC/reaxc_io_tools.cpp b/src/USER-REAXC/reaxc_io_tools.cpp index 0c14dad5d4..4d58f7514d 100644 --- a/src/USER-REAXC/reaxc_io_tools.cpp +++ b/src/USER-REAXC/reaxc_io_tools.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "update.h" #include "reaxc_io_tools.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_list.cpp b/src/USER-REAXC/reaxc_list.cpp index d22ac4ca7f..2755d5506e 100644 --- a/src/USER-REAXC/reaxc_list.cpp +++ b/src/USER-REAXC/reaxc_list.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_list.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_lookup.cpp b/src/USER-REAXC/reaxc_lookup.cpp index 903e54962d..9db8b7b9f6 100644 --- a/src/USER-REAXC/reaxc_lookup.cpp +++ b/src/USER-REAXC/reaxc_lookup.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_lookup.h" #include "reaxc_nonbonded.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_multi_body.cpp b/src/USER-REAXC/reaxc_multi_body.cpp index 1923668e89..ecfd3ad04d 100644 --- a/src/USER-REAXC/reaxc_multi_body.cpp +++ b/src/USER-REAXC/reaxc_multi_body.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_multi_body.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" @@ -79,7 +79,7 @@ void Atom_Energy( reax_system *system, control_params *control, numbonds ++; /* calculate the energy */ - if (numbonds > 0) + if (numbonds > 0 || control->enobondsflag) data->my_en.e_lp += e_lp = p_lp2 * workspace->Delta_lp[i] * inv_expvd2; @@ -87,7 +87,8 @@ void Atom_Energy( reax_system *system, control_params *control, 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); CElp = dElp * workspace->dDelta_lp[i]; - if (numbonds > 0) workspace->CdDelta[i] += CElp; // lp - 1st term + if (numbonds > 0 || control->enobondsflag) + workspace->CdDelta[i] += CElp; // lp - 1st term /* tally into per-atom energy */ if( system->pair_ptr->evflag) @@ -187,7 +188,7 @@ void Atom_Energy( reax_system *system, control_params *control, for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) numbonds ++; - if (numbonds > 0) + if (numbonds > 0 || control->enobondsflag) data->my_en.e_un += e_un = -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; @@ -202,13 +203,15 @@ void Atom_Energy( reax_system *system, control_params *control, /* tally into per-atom energy */ if( system->pair_ptr->evflag) { eng_tmp = e_ov; - if (numbonds > 0) eng_tmp += e_un; + if (numbonds > 0 || control->enobondsflag) + eng_tmp += e_un; system->pair_ptr->ev_tally(i,i,system->n,1,eng_tmp,0.0,0.0,0.0,0.0,0.0); } /* forces */ workspace->CdDelta[i] += CEover3; // OvCoor - 2nd term - if (numbonds > 0) workspace->CdDelta[i] += CEunder3; // UnCoor - 1st term + if (numbonds > 0 || control->enobondsflag) + workspace->CdDelta[i] += CEunder3; // UnCoor - 1st term for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { pbond = &(bonds->select.bond_list[pj]); diff --git a/src/USER-REAXC/reaxc_nonbonded.cpp b/src/USER-REAXC/reaxc_nonbonded.cpp index cb24e2dc37..9c223428a6 100644 --- a/src/USER-REAXC/reaxc_nonbonded.cpp +++ b/src/USER-REAXC/reaxc_nonbonded.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_types.h" #include "reaxc_nonbonded.h" #include "reaxc_bond_orders.h" diff --git a/src/USER-REAXC/reaxc_reset_tools.cpp b/src/USER-REAXC/reaxc_reset_tools.cpp index 1e6aeab475..4ec744e7b1 100644 --- a/src/USER-REAXC/reaxc_reset_tools.cpp +++ b/src/USER-REAXC/reaxc_reset_tools.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_reset_tools.h" #include "reaxc_list.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_system_props.cpp b/src/USER-REAXC/reaxc_system_props.cpp index 6b4551a03f..54eeb6da1e 100644 --- a/src/USER-REAXC/reaxc_system_props.cpp +++ b/src/USER-REAXC/reaxc_system_props.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_system_props.h" #include "reaxc_tool_box.h" #include "reaxc_vector.h" diff --git a/src/USER-REAXC/reaxc_tool_box.cpp b/src/USER-REAXC/reaxc_tool_box.cpp index 22576e9f3b..4fc6796efe 100644 --- a/src/USER-REAXC/reaxc_tool_box.cpp +++ b/src/USER-REAXC/reaxc_tool_box.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_tool_box.h" struct timeval tim; diff --git a/src/USER-REAXC/reaxc_torsion_angles.cpp b/src/USER-REAXC/reaxc_torsion_angles.cpp index 2cfe329765..74d5b04f20 100644 --- a/src/USER-REAXC/reaxc_torsion_angles.cpp +++ b/src/USER-REAXC/reaxc_torsion_angles.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_torsion_angles.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_traj.cpp b/src/USER-REAXC/reaxc_traj.cpp index 9d4fa73524..ae2bba2150 100644 --- a/src/USER-REAXC/reaxc_traj.cpp +++ b/src/USER-REAXC/reaxc_traj.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_traj.h" #include "reaxc_list.h" #include "reaxc_tool_box.h" diff --git a/src/USER-REAXC/reaxc_types.h b/src/USER-REAXC/reaxc_types.h index db4cf04178..b3e2f40f02 100644 --- a/src/USER-REAXC/reaxc_types.h +++ b/src/USER-REAXC/reaxc_types.h @@ -61,13 +61,12 @@ #define MAX_BOND 20 // same as reaxc_defs.h /********************** TYPE DEFINITIONS ********************/ -typedef int ivec[3]; +typedef int ivec[3]; typedef double rvec[3]; typedef double rtensor[3][3]; typedef double rvec2[2]; typedef double rvec4[4]; - // import LAMMPS' definition of tagint and bigint typedef LAMMPS_NS::tagint rc_tagint; typedef LAMMPS_NS::bigint rc_bigint; @@ -79,7 +78,6 @@ typedef struct void *out_atoms; } mpi_out_data; - typedef struct { MPI_Comm world; @@ -107,7 +105,6 @@ typedef struct void *in2_buffer; } mpi_datatypes; - typedef struct { int n_global; @@ -115,8 +112,6 @@ typedef struct int vdw_type; } global_parameters; - - typedef struct { /* Line one in field file */ @@ -163,8 +158,6 @@ typedef struct } single_body_parameters; - - /* Two Body Parameters */ typedef struct { /* Bond Order parameters */ @@ -193,8 +186,6 @@ typedef struct { double v13cor, ovc; } two_body_parameters; - - /* 3-body parameters */ typedef struct { /* valence angle */ @@ -214,15 +205,11 @@ typedef struct{ three_body_parameters prm[REAX_MAX_3BODY_PARAM]; } three_body_header; - - /* hydrogen-bond parameters */ typedef struct{ double r0_hb, p_hb1, p_hb2, p_hb3; } hbond_parameters; - - /* 4-body parameters */ typedef struct { double V1, V2, V3; @@ -234,14 +221,12 @@ typedef struct { double p_cot1; } four_body_parameters; - typedef struct { int cnt; four_body_parameters prm[REAX_MAX_4BODY_PARAM]; } four_body_header; - typedef struct { int num_atom_types; @@ -253,8 +238,6 @@ typedef struct four_body_header ****fbp; } reax_interaction; - - struct _reax_atom { rc_tagint orig_id; @@ -283,8 +266,6 @@ struct _reax_atom }; typedef _reax_atom reax_atom; - - typedef struct { double V; @@ -295,8 +276,6 @@ typedef struct rtensor g; } simulation_box; - - struct grid_cell { double cutoff; @@ -471,7 +450,8 @@ typedef struct int restrict_type; int lgflag; - + int enobondsflag; + } control_params; diff --git a/src/USER-REAXC/reaxc_valence_angles.cpp b/src/USER-REAXC/reaxc_valence_angles.cpp index c2b3287be5..c92996e56b 100644 --- a/src/USER-REAXC/reaxc_valence_angles.cpp +++ b/src/USER-REAXC/reaxc_valence_angles.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_valence_angles.h" #include "reaxc_bond_orders.h" #include "reaxc_list.h" diff --git a/src/USER-REAXC/reaxc_vector.cpp b/src/USER-REAXC/reaxc_vector.cpp index ee63e94280..977b17a6dc 100644 --- a/src/USER-REAXC/reaxc_vector.cpp +++ b/src/USER-REAXC/reaxc_vector.cpp @@ -24,7 +24,7 @@ . ----------------------------------------------------------------------*/ -#include "pair_reax_c.h" +#include "pair_reaxc.h" #include "reaxc_vector.h" diff --git a/src/USER-SMD/Install.sh b/src/USER-SMD/Install.sh index c0f48c5460..cb9aa5452b 100644 --- a/src/USER-SMD/Install.sh +++ b/src/USER-SMD/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-TALLY/compute_force_tally.cpp b/src/USER-TALLY/compute_force_tally.cpp index e9ecedd5ab..e97a1c751c 100644 --- a/src/USER-TALLY/compute_force_tally.cpp +++ b/src/USER-TALLY/compute_force_tally.cpp @@ -65,12 +65,12 @@ ComputeForceTally::~ComputeForceTally() void ComputeForceTally::init() { if (force->pair == NULL) - error->all(FLERR,"Trying to use compute force/tally with no pair style"); + error->all(FLERR,"Trying to use compute force/tally without pair style"); else force->pair->add_tally_callback(this); if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->all(FLERR,"Compute force/tally used with incompatible pair style."); + error->warning(FLERR,"Compute force/tally used with incompatible pair style"); if ((comm->me == 0) && (force->bond || force->angle || force->dihedral || force->improper || force->kspace)) diff --git a/src/USER-TALLY/compute_heat_flux_tally.cpp b/src/USER-TALLY/compute_heat_flux_tally.cpp index 214311cb3d..48cad538d5 100644 --- a/src/USER-TALLY/compute_heat_flux_tally.cpp +++ b/src/USER-TALLY/compute_heat_flux_tally.cpp @@ -64,12 +64,12 @@ ComputeHeatFluxTally::~ComputeHeatFluxTally() void ComputeHeatFluxTally::init() { if (force->pair == NULL) - error->all(FLERR,"Trying to use compute heat/flux/tally with no pair style"); + error->all(FLERR,"Trying to use compute heat/flux/tally without pair style"); else force->pair->add_tally_callback(this); if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->all(FLERR,"Compute heat/flux/tally used with incompatible pair style."); + error->warning(FLERR,"Compute heat/flux/tally used with incompatible pair style"); if ((comm->me == 0) && (force->bond || force->angle || force->dihedral || force->improper || force->kspace)) diff --git a/src/USER-TALLY/compute_pe_mol_tally.cpp b/src/USER-TALLY/compute_pe_mol_tally.cpp index 09ee04d57a..a30f2d6b9a 100644 --- a/src/USER-TALLY/compute_pe_mol_tally.cpp +++ b/src/USER-TALLY/compute_pe_mol_tally.cpp @@ -59,15 +59,15 @@ ComputePEMolTally::~ComputePEMolTally() void ComputePEMolTally::init() { if (force->pair == NULL) - error->all(FLERR,"Trying to use compute pe/mol/tally with no pair style"); + error->all(FLERR,"Trying to use compute pe/mol/tally without pair style"); else force->pair->add_tally_callback(this); if (atom->molecule_flag == 0) - error->all(FLERR,"Compute pe/mol/tally requires molecule IDs."); + error->all(FLERR,"Compute pe/mol/tally requires molecule IDs"); if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->all(FLERR,"Compute pe/mol/tally used with incompatible pair style."); + error->warning(FLERR,"Compute pe/mol/tally used with incompatible pair style"); if ((comm->me == 0) && (force->bond || force->angle || force->dihedral || force->improper || force->kspace)) diff --git a/src/USER-TALLY/compute_pe_tally.cpp b/src/USER-TALLY/compute_pe_tally.cpp index 68c00b6d2e..2117f2cb15 100644 --- a/src/USER-TALLY/compute_pe_tally.cpp +++ b/src/USER-TALLY/compute_pe_tally.cpp @@ -64,12 +64,12 @@ ComputePETally::~ComputePETally() void ComputePETally::init() { if (force->pair == NULL) - error->all(FLERR,"Trying to use compute pe/tally with no pair style"); + error->all(FLERR,"Trying to use compute pe/tally without a pair style"); else force->pair->add_tally_callback(this); if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->all(FLERR,"Compute pe/tally used with incompatible pair style."); + error->warning(FLERR,"Compute pe/tally used with incompatible pair style"); if ((comm->me == 0) && (force->bond || force->angle || force->dihedral || force->improper || force->kspace)) diff --git a/src/USER-TALLY/compute_stress_tally.cpp b/src/USER-TALLY/compute_stress_tally.cpp index 2575bd372a..66df9f6e4f 100644 --- a/src/USER-TALLY/compute_stress_tally.cpp +++ b/src/USER-TALLY/compute_stress_tally.cpp @@ -65,12 +65,12 @@ ComputeStressTally::~ComputeStressTally() void ComputeStressTally::init() { if (force->pair == NULL) - error->all(FLERR,"Trying to use compute stress/tally with no pair style"); + error->all(FLERR,"Trying to use compute stress/tally without pair style"); else force->pair->add_tally_callback(this); if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->all(FLERR,"Compute stress/tally used with incompatible pair style."); + error->warning(FLERR,"Compute stress/tally used with incompatible pair style"); if ((comm->me == 0) && (force->bond || force->angle || force->dihedral || force->improper || force->kspace)) diff --git a/src/USER-VTK/Install.sh b/src/USER-VTK/Install.sh index d02dc87772..3749242fb2 100644 --- a/src/USER-VTK/Install.sh +++ b/src/USER-VTK/Install.sh @@ -27,7 +27,7 @@ action () { } for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/USER-VTK/README b/src/USER-VTK/README index 86ef56a740..3429c96b72 100644 --- a/src/USER-VTK/README +++ b/src/USER-VTK/README @@ -1,17 +1,17 @@ -This package implements the "dump custom/vtk" command which can be used in a +This package implements the "dump vtk" command which can be used in a LAMMPS input script. -This dump allows to output atom data similar to dump custom, but directly into -VTK files. +This dump allows output of atom data similar to the dump custom +command, but in VTK format. -This package uses the VTK library (www.vtk.org) which must be installed on your -system. See the lib/vtk/README file and the LAMMPS manual for information on -building LAMMPS with external libraries. The settings in the Makefile.lammps -file in that directory must be correct for LAMMPS to build correctly with this -package installed. +This package uses the VTK library (www.vtk.org) which must be +installed on your system. See the lib/vtk/README file and the LAMMPS +manual for information on building LAMMPS with external libraries. +The settings in the Makefile.lammps file in that directory must be +correct for LAMMPS to build correctly with this package installed. -This code was initially developed for LIGGGHTS by Daniel Queteschiner at DCS -Computing. This is an effort to integrate it back to LAMMPS. +This code was initially developed for LIGGGHTS by Daniel Queteschiner +at DCS Computing. This is an effort to integrate it back to LAMMPS. The person who created this package is Richard Berger at JKU (richard.berger@jku.at). Contact him directly if you have questions. diff --git a/src/USER-VTK/dump_custom_vtk.cpp b/src/USER-VTK/dump_vtk.cpp similarity index 91% rename from src/USER-VTK/dump_custom_vtk.cpp rename to src/USER-VTK/dump_vtk.cpp index 0e4bc45976..78be5668cf 100644 --- a/src/USER-VTK/dump_custom_vtk.cpp +++ b/src/USER-VTK/dump_vtk.cpp @@ -25,7 +25,7 @@ #include #include #include -#include "dump_custom_vtk.h" +#include "dump_vtk.h" #include "atom.h" #include "force.h" #include "domain.h" @@ -39,12 +39,15 @@ #include "fix.h" #include "memory.h" #include "error.h" + #include #include #include + #ifndef VTK_MAJOR_VERSION #include #endif + #include #include #include @@ -91,12 +94,18 @@ enum{VTK,VTP,VTU,PVTP,PVTU}; // file formats #define ONEFIELD 32 #define DELTA 1048576 +#if VTK_MAJOR_VERSION == 7 +#define InsertNextTupleValue InsertNextTypedTuple +#elif VTK_MAJOR_VERSION > 7 +#error This code has only been tested with VTK 5, 6, and 7 +#endif + /* ---------------------------------------------------------------------- */ -DumpCustomVTK::DumpCustomVTK(LAMMPS *lmp, int narg, char **arg) : +DumpVTK::DumpVTK(LAMMPS *lmp, int narg, char **arg) : DumpCustom(lmp, narg, arg) { - if (narg == 5) error->all(FLERR,"No dump custom/vtk arguments specified"); + if (narg == 5) error->all(FLERR,"No dump vtk arguments specified"); pack_choice.clear(); vtype.clear(); @@ -113,7 +122,7 @@ DumpCustomVTK::DumpCustomVTK(LAMMPS *lmp, int narg, char **arg) : if (ioptional < narg && strcmp(style,"image") != 0 && strcmp(style,"movie") != 0) - error->all(FLERR,"Invalid attribute in dump custom command"); + error->all(FLERR,"Invalid attribute in dump vtk command"); size_one = pack_choice.size(); current_pack_choice_key = -1; @@ -158,11 +167,18 @@ DumpCustomVTK::DumpCustomVTK(LAMMPS *lmp, int narg, char **arg) : header_choice = NULL; write_choice = NULL; boxcorners = NULL; + + // unsupported feature by dump vtk + delete [] vformat; + vformat = NULL; + + delete [] format_column_user; + format_column_user = NULL; } /* ---------------------------------------------------------------------- */ -DumpCustomVTK::~DumpCustomVTK() +DumpVTK::~DumpVTK() { delete [] filecurrent; delete [] domainfilecurrent; @@ -173,7 +189,7 @@ DumpCustomVTK::~DumpCustomVTK() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::init_style() +void DumpVTK::init_style() { // default for element names = C @@ -191,14 +207,14 @@ void DumpCustomVTK::init_style() // setup function ptrs - header_choice = &DumpCustomVTK::header_vtk; + header_choice = &DumpVTK::header_vtk; if (vtk_file_format == VTP || vtk_file_format == PVTP) - write_choice = &DumpCustomVTK::write_vtp; + write_choice = &DumpVTK::write_vtp; else if (vtk_file_format == VTU || vtk_file_format == PVTU) - write_choice = &DumpCustomVTK::write_vtu; + write_choice = &DumpVTK::write_vtu; else - write_choice = &DumpCustomVTK::write_vtk; + write_choice = &DumpVTK::write_vtk; // find current ptr for each compute,fix,variable // check that fix frequency is acceptable @@ -206,24 +222,24 @@ void DumpCustomVTK::init_style() int icompute; for (int i = 0; i < ncompute; i++) { icompute = modify->find_compute(id_compute[i]); - if (icompute < 0) error->all(FLERR,"Could not find dump custom/vtk compute ID"); + if (icompute < 0) error->all(FLERR,"Could not find dump vtk compute ID"); compute[i] = modify->compute[icompute]; } int ifix; for (int i = 0; i < nfix; i++) { ifix = modify->find_fix(id_fix[i]); - if (ifix < 0) error->all(FLERR,"Could not find dump custom/vtk fix ID"); + if (ifix < 0) error->all(FLERR,"Could not find dump vtk fix ID"); fix[i] = modify->fix[ifix]; if (nevery % modify->fix[ifix]->peratom_freq) - error->all(FLERR,"Dump custom/vtk and fix not computed at compatible times"); + error->all(FLERR,"Dump vtk and fix not computed at compatible times"); } int ivariable; for (int i = 0; i < nvariable; i++) { ivariable = input->variable->find(id_variable[i]); if (ivariable < 0) - error->all(FLERR,"Could not find dump custom/vtk variable name"); + error->all(FLERR,"Could not find dump vtk variable name"); variable[i] = ivariable; } @@ -239,25 +255,25 @@ void DumpCustomVTK::init_style() if (iregion >= 0) { iregion = domain->find_region(idregion); if (iregion == -1) - error->all(FLERR,"Region ID for dump custom/vtk does not exist"); + error->all(FLERR,"Region ID for dump vtk does not exist"); } } /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_header(bigint) +void DumpVTK::write_header(bigint) { } /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::header_vtk(bigint) +void DumpVTK::header_vtk(bigint) { } /* ---------------------------------------------------------------------- */ -int DumpCustomVTK::count() +int DumpVTK::count() { n_calls_ = 0; @@ -807,7 +823,7 @@ int DumpCustomVTK::count() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write() +void DumpVTK::write() { // simulation box bounds @@ -905,7 +921,7 @@ void DumpCustomVTK::write() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::pack(tagint *ids) +void DumpVTK::pack(tagint *ids) { int n = 0; for (std::map::iterator it=pack_choice.begin(); it!=pack_choice.end(); ++it, ++n) { @@ -922,14 +938,14 @@ void DumpCustomVTK::pack(tagint *ids) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_data(int n, double *mybuf) +void DumpVTK::write_data(int n, double *mybuf) { (this->*write_choice)(n,mybuf); } /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::setFileCurrent() { +void DumpVTK::setFileCurrent() { delete [] filecurrent; filecurrent = NULL; @@ -1064,7 +1080,7 @@ void DumpCustomVTK::setFileCurrent() { /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::buf2arrays(int n, double *mybuf) +void DumpVTK::buf2arrays(int n, double *mybuf) { for (int iatom=0; iatom < n; ++iatom) { vtkIdType pid[1]; @@ -1123,7 +1139,7 @@ void DumpCustomVTK::buf2arrays(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::prepare_domain_data(vtkRectilinearGrid *rgrid) +void DumpVTK::prepare_domain_data(vtkRectilinearGrid *rgrid) { vtkSmartPointer xCoords = vtkSmartPointer::New(); xCoords->InsertNextValue(boxxlo); @@ -1143,7 +1159,7 @@ void DumpCustomVTK::prepare_domain_data(vtkRectilinearGrid *rgrid) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::prepare_domain_data_triclinic(vtkUnstructuredGrid *hexahedronGrid) +void DumpVTK::prepare_domain_data_triclinic(vtkUnstructuredGrid *hexahedronGrid) { vtkSmartPointer hexahedronPoints = vtkSmartPointer::New(); hexahedronPoints->SetNumberOfPoints(8); @@ -1173,7 +1189,7 @@ void DumpCustomVTK::prepare_domain_data_triclinic(vtkUnstructuredGrid *hexahedro /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_domain_vtk() +void DumpVTK::write_domain_vtk() { vtkSmartPointer rgrid = vtkSmartPointer::New(); prepare_domain_data(rgrid.GetPointer()); @@ -1197,7 +1213,7 @@ void DumpCustomVTK::write_domain_vtk() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_domain_vtk_triclinic() +void DumpVTK::write_domain_vtk_triclinic() { vtkSmartPointer hexahedronGrid = vtkSmartPointer::New(); prepare_domain_data_triclinic(hexahedronGrid.GetPointer()); @@ -1221,7 +1237,7 @@ void DumpCustomVTK::write_domain_vtk_triclinic() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_domain_vtr() +void DumpVTK::write_domain_vtr() { vtkSmartPointer rgrid = vtkSmartPointer::New(); prepare_domain_data(rgrid.GetPointer()); @@ -1242,7 +1258,7 @@ void DumpCustomVTK::write_domain_vtr() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_domain_vtu_triclinic() +void DumpVTK::write_domain_vtu_triclinic() { vtkSmartPointer hexahedronGrid = vtkSmartPointer::New(); prepare_domain_data_triclinic(hexahedronGrid.GetPointer()); @@ -1263,7 +1279,7 @@ void DumpCustomVTK::write_domain_vtu_triclinic() /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_vtk(int n, double *mybuf) +void DumpVTK::write_vtk(int n, double *mybuf) { ++n_calls_; @@ -1330,7 +1346,7 @@ void DumpCustomVTK::write_vtk(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_vtp(int n, double *mybuf) +void DumpVTK::write_vtp(int n, double *mybuf) { ++n_calls_; @@ -1394,7 +1410,7 @@ void DumpCustomVTK::write_vtp(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::write_vtu(int n, double *mybuf) +void DumpVTK::write_vtu(int n, double *mybuf) { ++n_calls_; @@ -1457,7 +1473,7 @@ void DumpCustomVTK::write_vtu(int n, double *mybuf) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::reset_vtk_data_containers() +void DumpVTK::reset_vtk_data_containers() { points = vtkSmartPointer::New(); pointsCells = vtkSmartPointer::New(); @@ -1489,16 +1505,16 @@ void DumpCustomVTK::reset_vtk_data_containers() /* ---------------------------------------------------------------------- */ -int DumpCustomVTK::parse_fields(int narg, char **arg) +int DumpVTK::parse_fields(int narg, char **arg) { - pack_choice[X] = &DumpCustomVTK::pack_x; + pack_choice[X] = &DumpVTK::pack_x; vtype[X] = DOUBLE; name[X] = "x"; - pack_choice[Y] = &DumpCustomVTK::pack_y; + pack_choice[Y] = &DumpVTK::pack_y; vtype[Y] = DOUBLE; name[Y] = "y"; - pack_choice[Z] = &DumpCustomVTK::pack_z; + pack_choice[Z] = &DumpVTK::pack_z; vtype[Z] = DOUBLE; name[Z] = "z"; @@ -1508,33 +1524,33 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) i = iarg-5; if (strcmp(arg[iarg],"id") == 0) { - pack_choice[ID] = &DumpCustomVTK::pack_id; + pack_choice[ID] = &DumpVTK::pack_id; vtype[ID] = INT; name[ID] = arg[iarg]; } else if (strcmp(arg[iarg],"mol") == 0) { if (!atom->molecule_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[MOL] = &DumpCustomVTK::pack_molecule; + pack_choice[MOL] = &DumpVTK::pack_molecule; vtype[MOL] = INT; name[MOL] = arg[iarg]; } else if (strcmp(arg[iarg],"proc") == 0) { - pack_choice[PROC] = &DumpCustomVTK::pack_proc; + pack_choice[PROC] = &DumpVTK::pack_proc; vtype[PROC] = INT; name[PROC] = arg[iarg]; } else if (strcmp(arg[iarg],"procp1") == 0) { - pack_choice[PROCP1] = &DumpCustomVTK::pack_procp1; + pack_choice[PROCP1] = &DumpVTK::pack_procp1; vtype[PROCP1] = INT; name[PROCP1] = arg[iarg]; } else if (strcmp(arg[iarg],"type") == 0) { - pack_choice[TYPE] = &DumpCustomVTK::pack_type; + pack_choice[TYPE] = &DumpVTK::pack_type; vtype[TYPE] = INT; name[TYPE] =arg[iarg]; } else if (strcmp(arg[iarg],"element") == 0) { - pack_choice[ELEMENT] = &DumpCustomVTK::pack_type; + pack_choice[ELEMENT] = &DumpVTK::pack_type; vtype[ELEMENT] = STRING; name[ELEMENT] = arg[iarg]; } else if (strcmp(arg[iarg],"mass") == 0) { - pack_choice[MASS] = &DumpCustomVTK::pack_mass; + pack_choice[MASS] = &DumpVTK::pack_mass; vtype[MASS] = DOUBLE; name[MASS] = arg[iarg]; @@ -1545,182 +1561,182 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) } else if (strcmp(arg[iarg],"z") == 0) { // required property } else if (strcmp(arg[iarg],"xs") == 0) { - if (domain->triclinic) pack_choice[XS] = &DumpCustomVTK::pack_xs_triclinic; - else pack_choice[XS] = &DumpCustomVTK::pack_xs; + if (domain->triclinic) pack_choice[XS] = &DumpVTK::pack_xs_triclinic; + else pack_choice[XS] = &DumpVTK::pack_xs; vtype[XS] = DOUBLE; name[XS] = arg[iarg]; } else if (strcmp(arg[iarg],"ys") == 0) { - if (domain->triclinic) pack_choice[YS] = &DumpCustomVTK::pack_ys_triclinic; - else pack_choice[YS] = &DumpCustomVTK::pack_ys; + if (domain->triclinic) pack_choice[YS] = &DumpVTK::pack_ys_triclinic; + else pack_choice[YS] = &DumpVTK::pack_ys; vtype[YS] = DOUBLE; name[YS] = arg[iarg]; } else if (strcmp(arg[iarg],"zs") == 0) { - if (domain->triclinic) pack_choice[ZS] = &DumpCustomVTK::pack_zs_triclinic; - else pack_choice[ZS] = &DumpCustomVTK::pack_zs; + if (domain->triclinic) pack_choice[ZS] = &DumpVTK::pack_zs_triclinic; + else pack_choice[ZS] = &DumpVTK::pack_zs; vtype[ZS] = DOUBLE; name[ZS] = arg[iarg]; } else if (strcmp(arg[iarg],"xu") == 0) { - if (domain->triclinic) pack_choice[XU] = &DumpCustomVTK::pack_xu_triclinic; - else pack_choice[XU] = &DumpCustomVTK::pack_xu; + if (domain->triclinic) pack_choice[XU] = &DumpVTK::pack_xu_triclinic; + else pack_choice[XU] = &DumpVTK::pack_xu; vtype[XU] = DOUBLE; name[XU] = arg[iarg]; } else if (strcmp(arg[iarg],"yu") == 0) { - if (domain->triclinic) pack_choice[YU] = &DumpCustomVTK::pack_yu_triclinic; - else pack_choice[YU] = &DumpCustomVTK::pack_yu; + if (domain->triclinic) pack_choice[YU] = &DumpVTK::pack_yu_triclinic; + else pack_choice[YU] = &DumpVTK::pack_yu; vtype[YU] = DOUBLE; name[YU] = arg[iarg]; } else if (strcmp(arg[iarg],"zu") == 0) { - if (domain->triclinic) pack_choice[ZU] = &DumpCustomVTK::pack_zu_triclinic; - else pack_choice[ZU] = &DumpCustomVTK::pack_zu; + if (domain->triclinic) pack_choice[ZU] = &DumpVTK::pack_zu_triclinic; + else pack_choice[ZU] = &DumpVTK::pack_zu; vtype[ZU] = DOUBLE; name[ZU] = arg[iarg]; } else if (strcmp(arg[iarg],"xsu") == 0) { - if (domain->triclinic) pack_choice[XSU] = &DumpCustomVTK::pack_xsu_triclinic; - else pack_choice[XSU] = &DumpCustomVTK::pack_xsu; + if (domain->triclinic) pack_choice[XSU] = &DumpVTK::pack_xsu_triclinic; + else pack_choice[XSU] = &DumpVTK::pack_xsu; vtype[XSU] = DOUBLE; name[XSU] = arg[iarg]; } else if (strcmp(arg[iarg],"ysu") == 0) { - if (domain->triclinic) pack_choice[YSU] = &DumpCustomVTK::pack_ysu_triclinic; - else pack_choice[YSU] = &DumpCustomVTK::pack_ysu; + if (domain->triclinic) pack_choice[YSU] = &DumpVTK::pack_ysu_triclinic; + else pack_choice[YSU] = &DumpVTK::pack_ysu; vtype[YSU] = DOUBLE; name[YSU] = arg[iarg]; } else if (strcmp(arg[iarg],"zsu") == 0) { - if (domain->triclinic) pack_choice[ZSU] = &DumpCustomVTK::pack_zsu_triclinic; - else pack_choice[ZSU] = &DumpCustomVTK::pack_zsu; + if (domain->triclinic) pack_choice[ZSU] = &DumpVTK::pack_zsu_triclinic; + else pack_choice[ZSU] = &DumpVTK::pack_zsu; vtype[ZSU] = DOUBLE; name[ZSU] = arg[iarg]; } else if (strcmp(arg[iarg],"ix") == 0) { - pack_choice[IX] = &DumpCustomVTK::pack_ix; + pack_choice[IX] = &DumpVTK::pack_ix; vtype[IX] = INT; name[IX] = arg[iarg]; } else if (strcmp(arg[iarg],"iy") == 0) { - pack_choice[IY] = &DumpCustomVTK::pack_iy; + pack_choice[IY] = &DumpVTK::pack_iy; vtype[IY] = INT; name[IY] = arg[iarg]; } else if (strcmp(arg[iarg],"iz") == 0) { - pack_choice[IZ] = &DumpCustomVTK::pack_iz; + pack_choice[IZ] = &DumpVTK::pack_iz; vtype[IZ] = INT; name[IZ] = arg[iarg]; } else if (strcmp(arg[iarg],"vx") == 0) { - pack_choice[VX] = &DumpCustomVTK::pack_vx; + pack_choice[VX] = &DumpVTK::pack_vx; vtype[VX] = DOUBLE; name[VX] = arg[iarg]; } else if (strcmp(arg[iarg],"vy") == 0) { - pack_choice[VY] = &DumpCustomVTK::pack_vy; + pack_choice[VY] = &DumpVTK::pack_vy; vtype[VY] = DOUBLE; name[VY] = arg[iarg]; } else if (strcmp(arg[iarg],"vz") == 0) { - pack_choice[VZ] = &DumpCustomVTK::pack_vz; + pack_choice[VZ] = &DumpVTK::pack_vz; vtype[VZ] = DOUBLE; name[VZ] = arg[iarg]; } else if (strcmp(arg[iarg],"fx") == 0) { - pack_choice[FX] = &DumpCustomVTK::pack_fx; + pack_choice[FX] = &DumpVTK::pack_fx; vtype[FX] = DOUBLE; name[FX] = arg[iarg]; } else if (strcmp(arg[iarg],"fy") == 0) { - pack_choice[FY] = &DumpCustomVTK::pack_fy; + pack_choice[FY] = &DumpVTK::pack_fy; vtype[FY] = DOUBLE; name[FY] = arg[iarg]; } else if (strcmp(arg[iarg],"fz") == 0) { - pack_choice[FZ] = &DumpCustomVTK::pack_fz; + pack_choice[FZ] = &DumpVTK::pack_fz; vtype[FZ] = DOUBLE; name[FZ] = arg[iarg]; } else if (strcmp(arg[iarg],"q") == 0) { if (!atom->q_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[Q] = &DumpCustomVTK::pack_q; + pack_choice[Q] = &DumpVTK::pack_q; vtype[Q] = DOUBLE; name[Q] = arg[iarg]; } else if (strcmp(arg[iarg],"mux") == 0) { if (!atom->mu_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[MUX] = &DumpCustomVTK::pack_mux; + pack_choice[MUX] = &DumpVTK::pack_mux; vtype[MUX] = DOUBLE; name[MUX] = arg[iarg]; } else if (strcmp(arg[iarg],"muy") == 0) { if (!atom->mu_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[MUY] = &DumpCustomVTK::pack_muy; + pack_choice[MUY] = &DumpVTK::pack_muy; vtype[MUY] = DOUBLE; name[MUY] = arg[iarg]; } else if (strcmp(arg[iarg],"muz") == 0) { if (!atom->mu_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[MUZ] = &DumpCustomVTK::pack_muz; + pack_choice[MUZ] = &DumpVTK::pack_muz; vtype[MUZ] = DOUBLE; name[MUZ] = arg[iarg]; } else if (strcmp(arg[iarg],"mu") == 0) { if (!atom->mu_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[MU] = &DumpCustomVTK::pack_mu; + pack_choice[MU] = &DumpVTK::pack_mu; vtype[MU] = DOUBLE; name[MU] = arg[iarg]; } else if (strcmp(arg[iarg],"radius") == 0) { if (!atom->radius_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[RADIUS] = &DumpCustomVTK::pack_radius; + pack_choice[RADIUS] = &DumpVTK::pack_radius; vtype[RADIUS] = DOUBLE; name[RADIUS] = arg[iarg]; } else if (strcmp(arg[iarg],"diameter") == 0) { if (!atom->radius_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[DIAMETER] = &DumpCustomVTK::pack_diameter; + pack_choice[DIAMETER] = &DumpVTK::pack_diameter; vtype[DIAMETER] = DOUBLE; name[DIAMETER] = arg[iarg]; } else if (strcmp(arg[iarg],"omegax") == 0) { if (!atom->omega_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[OMEGAX] = &DumpCustomVTK::pack_omegax; + pack_choice[OMEGAX] = &DumpVTK::pack_omegax; vtype[OMEGAX] = DOUBLE; name[OMEGAX] = arg[iarg]; } else if (strcmp(arg[iarg],"omegay") == 0) { if (!atom->omega_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[OMEGAY] = &DumpCustomVTK::pack_omegay; + pack_choice[OMEGAY] = &DumpVTK::pack_omegay; vtype[OMEGAY] = DOUBLE; name[OMEGAY] = arg[iarg]; } else if (strcmp(arg[iarg],"omegaz") == 0) { if (!atom->omega_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[OMEGAZ] = &DumpCustomVTK::pack_omegaz; + pack_choice[OMEGAZ] = &DumpVTK::pack_omegaz; vtype[OMEGAZ] = DOUBLE; name[OMEGAZ] = arg[iarg]; } else if (strcmp(arg[iarg],"angmomx") == 0) { if (!atom->angmom_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[ANGMOMX] = &DumpCustomVTK::pack_angmomx; + pack_choice[ANGMOMX] = &DumpVTK::pack_angmomx; vtype[ANGMOMX] = DOUBLE; name[ANGMOMX] = arg[iarg]; } else if (strcmp(arg[iarg],"angmomy") == 0) { if (!atom->angmom_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[ANGMOMY] = &DumpCustomVTK::pack_angmomy; + pack_choice[ANGMOMY] = &DumpVTK::pack_angmomy; vtype[ANGMOMY] = DOUBLE; name[ANGMOMY] = arg[iarg]; } else if (strcmp(arg[iarg],"angmomz") == 0) { if (!atom->angmom_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[ANGMOMZ] = &DumpCustomVTK::pack_angmomz; + pack_choice[ANGMOMZ] = &DumpVTK::pack_angmomz; vtype[ANGMOMZ] = DOUBLE; name[ANGMOMZ] = arg[iarg]; } else if (strcmp(arg[iarg],"tqx") == 0) { if (!atom->torque_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[TQX] = &DumpCustomVTK::pack_tqx; + pack_choice[TQX] = &DumpVTK::pack_tqx; vtype[TQX] = DOUBLE; name[TQX] = arg[iarg]; } else if (strcmp(arg[iarg],"tqy") == 0) { if (!atom->torque_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[TQY] = &DumpCustomVTK::pack_tqy; + pack_choice[TQY] = &DumpVTK::pack_tqy; vtype[TQY] = DOUBLE; name[TQY] = arg[iarg]; } else if (strcmp(arg[iarg],"tqz") == 0) { if (!atom->torque_flag) error->all(FLERR,"Dumping an atom property that isn't allocated"); - pack_choice[TQZ] = &DumpCustomVTK::pack_tqz; + pack_choice[TQZ] = &DumpVTK::pack_tqz; vtype[TQZ] = DOUBLE; name[TQZ] = arg[iarg]; @@ -1728,7 +1744,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) // if no trailing [], then arg is set to 0, else arg is int between [] } else if (strncmp(arg[iarg],"c_",2) == 0) { - pack_choice[ATTRIBUTES+i] = &DumpCustomVTK::pack_compute; + pack_choice[ATTRIBUTES+i] = &DumpVTK::pack_compute; vtype[ATTRIBUTES+i] = DOUBLE; int n = strlen(arg[iarg]); @@ -1738,24 +1754,24 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) char *ptr = strchr(suffix,'['); if (ptr) { if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Invalid attribute in dump custom/vtk command"); + error->all(FLERR,"Invalid attribute in dump vtk command"); argindex[ATTRIBUTES+i] = atoi(ptr+1); *ptr = '\0'; } else argindex[ATTRIBUTES+i] = 0; n = modify->find_compute(suffix); - if (n < 0) error->all(FLERR,"Could not find dump custom/vtk compute ID"); + if (n < 0) error->all(FLERR,"Could not find dump vtk compute ID"); if (modify->compute[n]->peratom_flag == 0) - error->all(FLERR,"Dump custom/vtk compute does not compute per-atom info"); + error->all(FLERR,"Dump vtk compute does not compute per-atom info"); if (argindex[ATTRIBUTES+i] == 0 && modify->compute[n]->size_peratom_cols > 0) error->all(FLERR, - "Dump custom/vtk compute does not calculate per-atom vector"); + "Dump vtk compute does not calculate per-atom vector"); if (argindex[ATTRIBUTES+i] > 0 && modify->compute[n]->size_peratom_cols == 0) error->all(FLERR,\ - "Dump custom/vtk compute does not calculate per-atom array"); + "Dump vtk compute does not calculate per-atom array"); if (argindex[ATTRIBUTES+i] > 0 && argindex[ATTRIBUTES+i] > modify->compute[n]->size_peratom_cols) - error->all(FLERR,"Dump custom/vtk compute vector is accessed out-of-range"); + error->all(FLERR,"Dump vtk compute vector is accessed out-of-range"); field2index[ATTRIBUTES+i] = add_compute(suffix); name[ATTRIBUTES+i] = arg[iarg]; @@ -1765,7 +1781,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) // if no trailing [], then arg is set to 0, else arg is between [] } else if (strncmp(arg[iarg],"f_",2) == 0) { - pack_choice[ATTRIBUTES+i] = &DumpCustomVTK::pack_fix; + pack_choice[ATTRIBUTES+i] = &DumpVTK::pack_fix; vtype[ATTRIBUTES+i] = DOUBLE; int n = strlen(arg[iarg]); @@ -1775,22 +1791,22 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) char *ptr = strchr(suffix,'['); if (ptr) { if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Invalid attribute in dump custom/vtk command"); + error->all(FLERR,"Invalid attribute in dump vtk command"); argindex[ATTRIBUTES+i] = atoi(ptr+1); *ptr = '\0'; } else argindex[ATTRIBUTES+i] = 0; n = modify->find_fix(suffix); - if (n < 0) error->all(FLERR,"Could not find dump custom/vtk fix ID"); + if (n < 0) error->all(FLERR,"Could not find dump vtk fix ID"); if (modify->fix[n]->peratom_flag == 0) - error->all(FLERR,"Dump custom/vtk fix does not compute per-atom info"); + error->all(FLERR,"Dump vtk fix does not compute per-atom info"); if (argindex[ATTRIBUTES+i] == 0 && modify->fix[n]->size_peratom_cols > 0) - error->all(FLERR,"Dump custom/vtk fix does not compute per-atom vector"); + error->all(FLERR,"Dump vtk fix does not compute per-atom vector"); if (argindex[ATTRIBUTES+i] > 0 && modify->fix[n]->size_peratom_cols == 0) - error->all(FLERR,"Dump custom/vtk fix does not compute per-atom array"); + error->all(FLERR,"Dump vtk fix does not compute per-atom array"); if (argindex[ATTRIBUTES+i] > 0 && argindex[ATTRIBUTES+i] > modify->fix[n]->size_peratom_cols) - error->all(FLERR,"Dump custom/vtk fix vector is accessed out-of-range"); + error->all(FLERR,"Dump vtk fix vector is accessed out-of-range"); field2index[ATTRIBUTES+i] = add_fix(suffix); name[ATTRIBUTES+i] = arg[iarg]; @@ -1799,7 +1815,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) // variable value = v_name } else if (strncmp(arg[iarg],"v_",2) == 0) { - pack_choice[ATTRIBUTES+i] = &DumpCustomVTK::pack_variable; + pack_choice[ATTRIBUTES+i] = &DumpVTK::pack_variable; vtype[ATTRIBUTES+i] = DOUBLE; int n = strlen(arg[iarg]); @@ -1809,9 +1825,9 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) argindex[ATTRIBUTES+i] = 0; n = input->variable->find(suffix); - if (n < 0) error->all(FLERR,"Could not find dump custom/vtk variable name"); + if (n < 0) error->all(FLERR,"Could not find dump vtk variable name"); if (input->variable->atomstyle(n) == 0) - error->all(FLERR,"Dump custom/vtk variable is not atom-style variable"); + error->all(FLERR,"Dump vtk variable is not atom-style variable"); field2index[ATTRIBUTES+i] = add_variable(suffix); name[ATTRIBUTES+i] = suffix; @@ -1820,7 +1836,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) // custom per-atom floating point value = d_ID } else if (strncmp(arg[iarg],"d_",2) == 0) { - pack_choice[ATTRIBUTES+i] = &DumpCustomVTK::pack_custom; + pack_choice[ATTRIBUTES+i] = &DumpVTK::pack_custom; vtype[ATTRIBUTES+i] = DOUBLE; int n = strlen(arg[iarg]); @@ -1843,7 +1859,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) // custom per-atom integer value = i_ID } else if (strncmp(arg[iarg],"i_",2) == 0) { - pack_choice[ATTRIBUTES+i] = &DumpCustomVTK::pack_custom; + pack_choice[ATTRIBUTES+i] = &DumpVTK::pack_custom; vtype[ATTRIBUTES+i] = INT; int n = strlen(arg[iarg]); @@ -1873,7 +1889,7 @@ int DumpCustomVTK::parse_fields(int narg, char **arg) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::identify_vectors() +void DumpVTK::identify_vectors() { // detect vectors vector_set.insert(X); // required @@ -1923,7 +1939,7 @@ void DumpCustomVTK::identify_vectors() if already in list, do not add, just return index, else add to list ------------------------------------------------------------------------- */ -int DumpCustomVTK::add_compute(char *id) +int DumpVTK::add_compute(char *id) { int icompute; for (icompute = 0; icompute < ncompute; icompute++) @@ -1948,7 +1964,7 @@ int DumpCustomVTK::add_compute(char *id) if already in list, do not add, just return index, else add to list ------------------------------------------------------------------------- */ -int DumpCustomVTK::add_fix(char *id) +int DumpVTK::add_fix(char *id) { int ifix; for (ifix = 0; ifix < nfix; ifix++) @@ -1973,7 +1989,7 @@ int DumpCustomVTK::add_fix(char *id) if already in list, do not add, just return index, else add to list ------------------------------------------------------------------------- */ -int DumpCustomVTK::add_variable(char *id) +int DumpVTK::add_variable(char *id) { int ivariable; for (ivariable = 0; ivariable < nvariable; ivariable++) @@ -2002,7 +2018,7 @@ int DumpCustomVTK::add_variable(char *id) if already in list, do not add, just return index, else add to list ------------------------------------------------------------------------- */ -int DumpCustomVTK::add_custom(char *id, int flag) +int DumpVTK::add_custom(char *id, int flag) { int icustom; for (icustom = 0; icustom < ncustom; icustom++) @@ -2026,7 +2042,7 @@ int DumpCustomVTK::add_custom(char *id, int flag) /* ---------------------------------------------------------------------- */ -int DumpCustomVTK::modify_param(int narg, char **arg) +int DumpVTK::modify_param(int narg, char **arg) { if (strcmp(arg[0],"region") == 0) { if (narg < 2) error->all(FLERR,"Illegal dump_modify command"); @@ -2301,7 +2317,7 @@ int DumpCustomVTK::modify_param(int narg, char **arg) return # of bytes of allocated memory in buf, choose, variable arrays ------------------------------------------------------------------------- */ -bigint DumpCustomVTK::memory_usage() +bigint DumpVTK::memory_usage() { bigint bytes = Dump::memory_usage(); bytes += memory->usage(choose,maxlocal); @@ -2315,7 +2331,7 @@ bigint DumpCustomVTK::memory_usage() extraction of Compute, Fix, Variable results ------------------------------------------------------------------------- */ -void DumpCustomVTK::pack_compute(int n) +void DumpVTK::pack_compute(int n) { double *vector = compute[field2index[current_pack_choice_key]]->vector_atom; double **array = compute[field2index[current_pack_choice_key]]->array_atom; @@ -2337,7 +2353,7 @@ void DumpCustomVTK::pack_compute(int n) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::pack_fix(int n) +void DumpVTK::pack_fix(int n) { double *vector = fix[field2index[current_pack_choice_key]]->vector_atom; double **array = fix[field2index[current_pack_choice_key]]->array_atom; @@ -2359,7 +2375,7 @@ void DumpCustomVTK::pack_fix(int n) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::pack_variable(int n) +void DumpVTK::pack_variable(int n) { double *vector = vbuf[field2index[current_pack_choice_key]]; @@ -2371,9 +2387,8 @@ void DumpCustomVTK::pack_variable(int n) /* ---------------------------------------------------------------------- */ -void DumpCustomVTK::pack_custom(int n) +void DumpVTK::pack_custom(int n) { - int index = field2index[n]; if (flag_custom[index] == 0) { // integer diff --git a/src/USER-VTK/dump_custom_vtk.h b/src/USER-VTK/dump_vtk.h similarity index 95% rename from src/USER-VTK/dump_custom_vtk.h rename to src/USER-VTK/dump_vtk.h index f3b4a8b63e..603ca114ba 100644 --- a/src/USER-VTK/dump_custom_vtk.h +++ b/src/USER-VTK/dump_vtk.h @@ -17,12 +17,12 @@ #ifdef DUMP_CLASS -DumpStyle(custom/vtk,DumpCustomVTK) +DumpStyle(vtk,DumpVTK) #else -#ifndef LMP_DUMP_CUSTOM_VTK_H -#define LMP_DUMP_CUSTOM_VTK_H +#ifndef LMP_DUMP_VTK_H +#define LMP_DUMP_VTK_H #include "dump_custom.h" #include @@ -40,7 +40,7 @@ class vtkUnstructuredGrid; namespace LAMMPS_NS { /** - * @brief DumpCustomVTK class + * @brief DumpVTK class * write atom data to vtk files. * * Similar to the DumpCustom class but uses the vtk library to write data to vtk simple @@ -54,10 +54,11 @@ namespace LAMMPS_NS { * This dump command does not support compressed files, buffering or custom format strings, * multiproc is only supported by the xml formats, multifile option has to be used. */ -class DumpCustomVTK : public DumpCustom { + +class DumpVTK : public DumpCustom { public: - DumpCustomVTK(class LAMMPS *, int, char **); - virtual ~DumpCustomVTK(); + DumpVTK(class LAMMPS *, int, char **); + virtual ~DumpVTK(); virtual void write(); protected: @@ -86,11 +87,11 @@ class DumpCustomVTK : public DumpCustom { int add_custom(char *, int); virtual int modify_param(int, char **); - typedef void (DumpCustomVTK::*FnPtrHeader)(bigint); + typedef void (DumpVTK::*FnPtrHeader)(bigint); FnPtrHeader header_choice; // ptr to write header functions void header_vtk(bigint); - typedef void (DumpCustomVTK::*FnPtrWrite)(int, double *); + typedef void (DumpVTK::*FnPtrWrite)(int, double *); FnPtrWrite write_choice; // ptr to write data functions void write_vtk(int, double *); void write_vtp(int, double *); @@ -103,7 +104,7 @@ class DumpCustomVTK : public DumpCustom { void write_domain_vtr(); void write_domain_vtu_triclinic(); - typedef void (DumpCustomVTK::*FnPtrPack)(int); + typedef void (DumpVTK::*FnPtrPack)(int); std::map pack_choice; // ptrs to pack functions std::map vtype; // data type std::map name; // attribute labels diff --git a/src/VORONOI/Install.sh b/src/VORONOI/Install.sh index f21e9404eb..6373506b19 100755 --- a/src/VORONOI/Install.sh +++ b/src/VORONOI/Install.sh @@ -29,7 +29,7 @@ action () { # all package files with no dependencies for file in *.cpp *.h; do - action $file + test -f ${file} && action $file done # edit 2 Makefile.package files to include/exclude package info diff --git a/src/bond.cpp b/src/bond.cpp index 5a33f107cf..825ff1b199 100644 --- a/src/bond.cpp +++ b/src/bond.cpp @@ -292,3 +292,14 @@ double Bond::memory_usage() bytes += comm->nthreads*maxvatom*6 * sizeof(double); return bytes; } + +/* ----------------------------------------------------------------------- + Reset all type-based bond params via init. +-------------------------------------------------------------------------- */ +void Bond::reinit() +{ + if (!reinitflag) + error->all(FLERR,"Fix adapt interface to this bond style not supported"); + + init(); +} diff --git a/src/bond.h b/src/bond.h index 41604387a3..29de7ad7d2 100644 --- a/src/bond.h +++ b/src/bond.h @@ -30,6 +30,8 @@ class Bond : protected Pointers { double virial[6]; // accumulated virial double *eatom,**vatom; // accumulated per-atom energy/virial + int reinitflag; // 1 if compatible with fix adapt and alike + // KOKKOS host/device flag and data masks ExecutionSpace execution_space; @@ -49,6 +51,8 @@ class Bond : protected Pointers { virtual void write_data(FILE *) {} virtual double single(int, double, int, int, double &) = 0; virtual double memory_usage(); + virtual void *extract(char *, int &) {return NULL;} + virtual void reinit(); void write_file(int, char**); diff --git a/src/compute.h b/src/compute.h index 18da971f82..7f12cd97e2 100644 --- a/src/compute.h +++ b/src/compute.h @@ -114,9 +114,11 @@ class Compute : protected Pointers { virtual void dof_remove_pre() {} virtual int dof_remove(int) {return 0;} virtual void remove_bias(int, double *) {} + virtual void remove_bias_thr(int, double *, double *) {} virtual void remove_bias_all() {} virtual void reapply_bias_all() {} virtual void restore_bias(int, double *) {} + virtual void restore_bias_thr(int, double *, double *) {} virtual void restore_bias_all() {} virtual void reset_extra_compute_fix(const char *); diff --git a/src/compute_dipole_chunk.cpp b/src/compute_dipole_chunk.cpp index 74d66e7c1b..45389ee614 100644 --- a/src/compute_dipole_chunk.cpp +++ b/src/compute_dipole_chunk.cpp @@ -31,10 +31,12 @@ enum { MASSCENTER, GEOMCENTER }; ComputeDipoleChunk::ComputeDipoleChunk(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg), - idchunk(NULL), massproc(NULL), masstotal(NULL), chrgproc(NULL), chrgtotal(NULL), com(NULL), + idchunk(NULL), massproc(NULL), masstotal(NULL), chrgproc(NULL), + chrgtotal(NULL), com(NULL), comall(NULL), dipole(NULL), dipoleall(NULL) { - if ((narg != 4) && (narg != 5)) error->all(FLERR,"Illegal compute dipole/chunk command"); + if ((narg != 4) && (narg != 5)) + error->all(FLERR,"Illegal compute dipole/chunk command"); array_flag = 1; size_array_cols = 4; diff --git a/src/compute_temp_com.cpp b/src/compute_temp_com.cpp index 7e05dcb776..6d64ff5aee 100644 --- a/src/compute_temp_com.cpp +++ b/src/compute_temp_com.cpp @@ -170,6 +170,17 @@ void ComputeTempCOM::remove_bias(int i, double *v) v[2] -= vbias[2]; } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempCOM::remove_bias_thr(int, double *v, double *) +{ + v[0] -= vbias[0]; + v[1] -= vbias[1]; + v[2] -= vbias[2]; +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -200,6 +211,18 @@ void ComputeTempCOM::restore_bias(int i, double *v) v[2] += vbias[2]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called +------------------------------------------------------------------------- */ + +void ComputeTempCOM::restore_bias_thr(int, double *v, double *) +{ + v[0] += vbias[0]; + v[1] += vbias[1]; + v[2] += vbias[2]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_com.h b/src/compute_temp_com.h index 47587ceccc..67bbdc39a9 100644 --- a/src/compute_temp_com.h +++ b/src/compute_temp_com.h @@ -34,9 +34,11 @@ class ComputeTempCOM : public Compute { void compute_vector(); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void restore_bias(int, double *); void restore_bias_all(); + void restore_bias_thr(int, double *, double *); private: double tfactor,masstotal; diff --git a/src/compute_temp_deform.cpp b/src/compute_temp_deform.cpp index 5af995252c..c8b8200770 100644 --- a/src/compute_temp_deform.cpp +++ b/src/compute_temp_deform.cpp @@ -221,6 +221,26 @@ void ComputeTempDeform::remove_bias(int i, double *v) v[2] -= vbias[2]; } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempDeform::remove_bias_thr(int i, double *v, double *b) +{ + double lamda[3]; + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; + + domain->x2lamda(atom->x[i],lamda); + b[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + + h_rate[4]*lamda[2] + h_ratelo[0]; + b[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; + b[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + v[0] -= b[0]; + v[1] -= b[1]; + v[2] -= b[2]; +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -266,6 +286,18 @@ void ComputeTempDeform::restore_bias(int i, double *v) v[2] += vbias[2]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempDeform::restore_bias_thr(int i, double *v, double *b) +{ + v[0] += b[0]; + v[1] += b[1]; + v[2] += b[2]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_deform.h b/src/compute_temp_deform.h index 33ca83c73d..030294e1c8 100644 --- a/src/compute_temp_deform.h +++ b/src/compute_temp_deform.h @@ -34,8 +34,10 @@ class ComputeTempDeform : public Compute { virtual void compute_vector(); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void restore_bias(int, double *); + void restore_bias_thr(int, double *, double *); void restore_bias_all(); double memory_usage(); diff --git a/src/compute_temp_partial.cpp b/src/compute_temp_partial.cpp index 7678403d88..9df8e8b580 100644 --- a/src/compute_temp_partial.cpp +++ b/src/compute_temp_partial.cpp @@ -185,6 +185,26 @@ void ComputeTempPartial::remove_bias(int i, double *v) } } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempPartial::remove_bias_thr(int i, double *v, double *b) +{ + if (!xflag) { + b[0] = v[0]; + v[0] = 0.0; + } + if (!yflag) { + b[1] = v[1]; + v[1] = 0.0; + } + if (!zflag) { + b[2] = v[2]; + v[2] = 0.0; + } +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -262,6 +282,18 @@ void ComputeTempPartial::restore_bias(int i, double *v) if (!zflag) v[2] += vbias[2]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempPartial::restore_bias_thr(int i, double *v, double *b) +{ + if (!xflag) v[0] += b[0]; + if (!yflag) v[1] += b[1]; + if (!zflag) v[2] += b[2]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_partial.h b/src/compute_temp_partial.h index 8a5c260db1..62641d4799 100644 --- a/src/compute_temp_partial.h +++ b/src/compute_temp_partial.h @@ -35,9 +35,11 @@ class ComputeTempPartial : public Compute { int dof_remove(int); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void reapply_bias_all(); void restore_bias(int, double *); + void restore_bias_thr(int, double *, double *); void restore_bias_all(); double memory_usage(); diff --git a/src/compute_temp_profile.cpp b/src/compute_temp_profile.cpp index 1c6e56359d..236f5bc096 100644 --- a/src/compute_temp_profile.cpp +++ b/src/compute_temp_profile.cpp @@ -359,6 +359,15 @@ void ComputeTempProfile::remove_bias(int i, double *v) if (zflag) v[2] -= binave[ibin][ivz]; } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempProfile::remove_bias_thr(int i, double *v, double *) +{ + remove_bias(i,v); +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -392,6 +401,16 @@ void ComputeTempProfile::restore_bias(int i, double *v) if (zflag) v[2] += binave[ibin][ivz]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called +------------------------------------------------------------------------- */ + +void ComputeTempProfile::restore_bias_thr(int i, double *v, double *) +{ + restore_bias(i,v); +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_profile.h b/src/compute_temp_profile.h index d78e5396d6..f0c07bbd48 100644 --- a/src/compute_temp_profile.h +++ b/src/compute_temp_profile.h @@ -35,8 +35,10 @@ class ComputeTempProfile : public Compute { void compute_array(); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void restore_bias(int, double *); + void restore_bias_thr(int, double *, double *); void restore_bias_all(); double memory_usage(); diff --git a/src/compute_temp_ramp.cpp b/src/compute_temp_ramp.cpp index 810d6dd08b..af6a730c9d 100644 --- a/src/compute_temp_ramp.cpp +++ b/src/compute_temp_ramp.cpp @@ -234,6 +234,19 @@ void ComputeTempRamp::remove_bias(int i, double *v) v[v_dim] -= vbias[v_dim]; } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempRamp::remove_bias_thr(int i, double *v, double *b) +{ + double fraction = (atom->x[i][coord_dim] - coord_lo) / (coord_hi - coord_lo); + fraction = MAX(fraction,0.0); + fraction = MIN(fraction,1.0); + b[v_dim] = v_lo + fraction*(v_hi - v_lo); + v[v_dim] -= b[v_dim]; +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -271,6 +284,16 @@ void ComputeTempRamp::restore_bias(int i, double *v) v[v_dim] += vbias[v_dim]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempRamp::restore_bias_thr(int i, double *v, double *b) +{ + v[v_dim] += b[v_dim]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_ramp.h b/src/compute_temp_ramp.h index 0e10e519cb..ab888ec31d 100644 --- a/src/compute_temp_ramp.h +++ b/src/compute_temp_ramp.h @@ -35,7 +35,9 @@ class ComputeTempRamp : public Compute { void remove_bias(int, double *); void remove_bias_all(); + void remove_bias_thr(int, double *, double *); void restore_bias(int, double *); + void restore_bias_thr(int, double *, double *); void restore_bias_all(); double memory_usage(); diff --git a/src/compute_temp_region.cpp b/src/compute_temp_region.cpp index e636669344..ed710bc730 100644 --- a/src/compute_temp_region.cpp +++ b/src/compute_temp_region.cpp @@ -199,6 +199,23 @@ void ComputeTempRegion::remove_bias(int i, double *v) } } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempRegion::remove_bias_thr(int i, double *v, double *b) +{ + double *x = atom->x[i]; + if (domain->regions[iregion]->match(x[0],x[1],x[2])) + b[0] = b[1] = b[2] = 0.0; + else { + b[0] = v[0]; + b[1] = v[1]; + b[2] = v[2]; + v[0] = v[1] = v[2] = 0.0; + } +} + /* ---------------------------------------------------------------------- remove velocity bias from all atoms to leave thermal velocity ------------------------------------------------------------------------- */ @@ -243,6 +260,18 @@ void ComputeTempRegion::restore_bias(int i, double *v) v[2] += vbias[2]; } +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempRegion::restore_bias_thr(int i, double *v, double *b) +{ + v[0] += b[0]; + v[1] += b[1]; + v[2] += b[2]; +} + /* ---------------------------------------------------------------------- add back in velocity bias to all atoms removed by remove_bias_all() assume remove_bias_all() was previously called diff --git a/src/compute_temp_region.h b/src/compute_temp_region.h index 95194d7fdf..fd494ab8d1 100644 --- a/src/compute_temp_region.h +++ b/src/compute_temp_region.h @@ -35,10 +35,13 @@ class ComputeTempRegion : public Compute { void dof_remove_pre(); int dof_remove(int); + void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void remove_bias_all(); void restore_bias(int, double *); void restore_bias_all(); + void restore_bias_thr(int, double *, double *); double memory_usage(); protected: diff --git a/src/compute_temp_sphere.cpp b/src/compute_temp_sphere.cpp index 50995dfa84..ad5cf565e9 100644 --- a/src/compute_temp_sphere.cpp +++ b/src/compute_temp_sphere.cpp @@ -67,6 +67,11 @@ ComputeTempSphere::ComputeTempSphere(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute temp/sphere command"); } + // when computing only the rotational temperature, + // do not remove DOFs for translation as set by default + + if (mode == ROTATE) extra_dof = 0; + vector = new double[6]; // error checks @@ -326,6 +331,15 @@ void ComputeTempSphere::remove_bias(int i, double *v) tbias->remove_bias(i,v); } +/* ---------------------------------------------------------------------- + remove velocity bias from atom I to leave thermal velocity +------------------------------------------------------------------------- */ + +void ComputeTempSphere::remove_bias_thr(int i, double *v, double *b) +{ + tbias->remove_bias_thr(i,v,b); +} + /* ---------------------------------------------------------------------- add back in velocity bias to atom I removed by remove_bias() assume remove_bias() was previously called @@ -335,3 +349,13 @@ void ComputeTempSphere::restore_bias(int i, double *v) { tbias->restore_bias(i,v); } + +/* ---------------------------------------------------------------------- + add back in velocity bias to atom I removed by remove_bias_thr() + assume remove_bias_thr() was previously called with the same buffer b +------------------------------------------------------------------------- */ + +void ComputeTempSphere::restore_bias_thr(int i, double *v, double *b) +{ + tbias->restore_bias_thr(i,v,b); +} diff --git a/src/compute_temp_sphere.h b/src/compute_temp_sphere.h index ae72fe2684..c15e02ffbb 100644 --- a/src/compute_temp_sphere.h +++ b/src/compute_temp_sphere.h @@ -34,12 +34,13 @@ class ComputeTempSphere : public Compute { void compute_vector(); void remove_bias(int, double *); + void remove_bias_thr(int, double *, double *); void restore_bias(int, double *); + void restore_bias_thr(int, double *, double *); private: int mode; double tfactor; - double *inertia; char *id_bias; Compute *tbias; // ptr to additional bias compute diff --git a/src/domain.cpp b/src/domain.cpp index 31fb3b8559..8ead12cd4e 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -944,6 +944,10 @@ void Domain::subbox_too_small_check(double thresh) changed "if" to "while" to enable distance to far-away ghost atom returned by atom->map() to be wrapped back into box could be problem for looking up atom IDs when cutoff > boxsize + this should not be used if atom has moved infinitely far outside box + b/c while could iterate forever + e.g. fix shake prediction of new position with highly overlapped atoms + use minimum_image_once() instead ------------------------------------------------------------------------- */ void Domain::minimum_image(double &dx, double &dy, double &dz) @@ -1009,6 +1013,10 @@ void Domain::minimum_image(double &dx, double &dy, double &dz) changed "if" to "while" to enable distance to far-away ghost atom returned by atom->map() to be wrapped back into box could be problem for looking up atom IDs when cutoff > boxsize + this should not be used if atom has moved infinitely far outside box + b/c while could iterate forever + e.g. fix shake prediction of new position with highly overlapped atoms + use minimum_image_once() instead ------------------------------------------------------------------------- */ void Domain::minimum_image(double *delta) @@ -1067,6 +1075,70 @@ void Domain::minimum_image(double *delta) } } +/* ---------------------------------------------------------------------- + minimum image convention in periodic dimensions + use 1/2 of box size as test + for triclinic, also add/subtract tilt factors in other dims as needed + only shift by one box length in each direction + this should not be used if multiple box shifts are required +------------------------------------------------------------------------- */ + +void Domain::minimum_image_once(double *delta) +{ + if (triclinic == 0) { + if (xperiodic) { + if (fabs(delta[0]) > xprd_half) { + if (delta[0] < 0.0) delta[0] += xprd; + else delta[0] -= xprd; + } + } + if (yperiodic) { + if (fabs(delta[1]) > yprd_half) { + if (delta[1] < 0.0) delta[1] += yprd; + else delta[1] -= yprd; + } + } + if (zperiodic) { + if (fabs(delta[2]) > zprd_half) { + if (delta[2] < 0.0) delta[2] += zprd; + else delta[2] -= zprd; + } + } + + } else { + if (zperiodic) { + if (fabs(delta[2]) > zprd_half) { + if (delta[2] < 0.0) { + delta[2] += zprd; + delta[1] += yz; + delta[0] += xz; + } else { + delta[2] -= zprd; + delta[1] -= yz; + delta[0] -= xz; + } + } + } + if (yperiodic) { + if (fabs(delta[1]) > yprd_half) { + if (delta[1] < 0.0) { + delta[1] += yprd; + delta[0] += xy; + } else { + delta[1] -= yprd; + delta[0] -= xy; + } + } + } + if (xperiodic) { + if (fabs(delta[0]) > xprd_half) { + if (delta[0] < 0.0) delta[0] += xprd; + else delta[0] -= xprd; + } + } + } +} + /* ---------------------------------------------------------------------- return local index of atom J or any of its images that is closest to atom I if J is not a valid index like -1, just return it diff --git a/src/domain.h b/src/domain.h index 22e3191231..0f47a3c2ca 100644 --- a/src/domain.h +++ b/src/domain.h @@ -112,6 +112,7 @@ class Domain : protected Pointers { void subbox_too_small_check(double); void minimum_image(double &, double &, double &); void minimum_image(double *); + void minimum_image_once(double *); int closest_image(int, int); int closest_image(double *, int); void closest_image(const double * const, const double * const, diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp index 44842619fe..4821171cc9 100644 --- a/src/dump_custom.cpp +++ b/src/dump_custom.cpp @@ -82,8 +82,8 @@ DumpCustom::DumpCustom(LAMMPS *lmp, int narg, char **arg) : pack_choice = new FnPtrPack[nfield]; vtype = new int[nfield]; - field2index = new int[nfield]; - argindex = new int[nfield]; + memory->create(field2index,nfield,"dump:field2index"); + memory->create(argindex,nfield,"dump:argindex"); buffer_allow = 1; buffer_flag = 1; @@ -200,8 +200,8 @@ DumpCustom::~DumpCustom() delete [] pack_choice; delete [] vtype; - delete [] field2index; - delete [] argindex; + memory->destroy(field2index); + memory->destroy(argindex); delete [] idregion; memory->destroy(thresh_array); @@ -244,11 +244,15 @@ DumpCustom::~DumpCustom() for (int i = 1; i <= ntypes; i++) delete [] typenames[i]; delete [] typenames; - for (int i = 0; i < size_one; i++) delete [] vformat[i]; - delete [] vformat; + if(vformat) { + for (int i = 0; i < size_one; i++) delete [] vformat[i]; + delete [] vformat; + } - for (int i = 0; i < size_one; i++) delete [] format_column_user[i]; - delete [] format_column_user; + if(format_column_user) { + for (int i = 0; i < size_one; i++) delete [] format_column_user[i]; + delete [] format_column_user; + } delete [] columns; } diff --git a/src/finish.cpp b/src/finish.cpp index b81b5e6785..45e9226388 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -130,7 +130,7 @@ void Finish::end(int flag) atom->natoms); if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps, atom->natoms); - + // Gromacs/NAMD-style performance metric for suitable unit settings if ( timeflag && !minflag && !prdflag && !tadflag && @@ -144,7 +144,7 @@ void Finish::end(int flag) double one_fs = force->femtosecond; double t_step = ((double) time_loop) / ((double) update->nsteps); double step_t = 1.0/t_step; - + if (strcmp(update->unit_style,"lj") == 0) { double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs; const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n"; @@ -161,26 +161,28 @@ void Finish::end(int flag) } // CPU use on MPI tasks and OpenMP threads - - if (lmp->kokkos) { - const char fmt2[] = - "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; - if (screen) fprintf(screen,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); - if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); - } else { + + if (timeflag) { + if (lmp->kokkos) { + const char fmt2[] = + "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; + if (screen) fprintf(screen,fmt2,cpu_loop,nprocs, + lmp->kokkos->num_threads); + if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs, + lmp->kokkos->num_threads); + } else { #if defined(_OPENMP) - const char fmt2[] = - "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; - if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads); - if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads); + const char fmt2[] = + "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; + if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads); + if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads); #else - const char fmt2[] = - "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n"; - if (screen) fprintf(screen,fmt2,cpu_loop,nprocs); - if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs); + const char fmt2[] = + "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n"; + if (screen) fprintf(screen,fmt2,cpu_loop,nprocs); + if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs); #endif + } } } } diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp index 4c7eb5f218..bc7aa843ef 100644 --- a/src/fix_adapt.cpp +++ b/src/fix_adapt.cpp @@ -16,6 +16,7 @@ #include #include "fix_adapt.h" #include "atom.h" +#include "bond.h" #include "update.h" #include "group.h" #include "modify.h" @@ -35,7 +36,7 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -enum{PAIR,KSPACE,ATOM}; +enum{PAIR,KSPACE,ATOM,BOND}; enum{DIAMETER,CHARGE}; /* ---------------------------------------------------------------------- */ @@ -68,6 +69,10 @@ nadapt(0), id_fix_diam(NULL), id_fix_chg(NULL), adapt(NULL) if (iarg+3 > narg) error->all(FLERR,"Illegal fix adapt command"); nadapt++; iarg += 3; + } else if (strcmp(arg[iarg],"bond") == 0 ){ + if (iarg+5 > narg) error->all(FLERR,"Illegal fix adapt command"); + nadapt++; + iarg += 5; } else break; } @@ -103,6 +108,25 @@ nadapt(0), id_fix_diam(NULL), id_fix_chg(NULL), adapt(NULL) } else error->all(FLERR,"Illegal fix adapt command"); nadapt++; iarg += 6; + } else if (strcmp(arg[iarg],"bond") == 0 ){ + if (iarg+5 > narg) error->all(FLERR, "Illegal fix adapt command"); + adapt[nadapt].which = BOND; + int n = strlen(arg[iarg+1]) + 1; + adapt[nadapt].bstyle = new char[n]; + strcpy(adapt[nadapt].bstyle,arg[iarg+1]); + n = strlen(arg[iarg+2]) + 1; + adapt[nadapt].bparam = new char[n]; + adapt[nadapt].bond = NULL; + strcpy(adapt[nadapt].bparam,arg[iarg+2]); + force->bounds(FLERR,arg[iarg+3],atom->ntypes, + adapt[nadapt].ilo,adapt[nadapt].ihi); + if (strstr(arg[iarg+4],"v_") == arg[iarg+4]) { + n = strlen(&arg[iarg+4][2]) + 1; + adapt[nadapt].var = new char[n]; + strcpy(adapt[nadapt].var,&arg[iarg+4][2]); + } else error->all(FLERR,"Illegal fix adapt command"); + nadapt++; + iarg += 5; } else if (strcmp(arg[iarg],"kspace") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix adapt command"); adapt[nadapt].which = KSPACE; @@ -160,6 +184,13 @@ nadapt(0), id_fix_diam(NULL), id_fix_chg(NULL), adapt(NULL) for (int m = 0; m < nadapt; m++) if (adapt[m].which == PAIR) memory->create(adapt[m].array_orig,n+1,n+1,"adapt:array_orig"); + + // allocate bond style arrays: + + n = atom->nbondtypes; + for (int m = 0; m < nadapt; ++m) + if (adapt[m].which == BOND) + memory->create(adapt[m].vector_orig,n+1,"adapt:vector_orig"); } /* ---------------------------------------------------------------------- */ @@ -172,6 +203,10 @@ FixAdapt::~FixAdapt() delete [] adapt[m].pstyle; delete [] adapt[m].pparam; memory->destroy(adapt[m].array_orig); + } else if (adapt[m].which == BOND) { + delete [] adapt[m].bstyle; + delete [] adapt[m].bparam; + memory->destroy(adapt[m].vector_orig); } } delete [] adapt; @@ -282,6 +317,7 @@ void FixAdapt::init() // setup and error checks anypair = 0; + anybond = 0; for (int m = 0; m < nadapt; m++) { Adapt *ad = &adapt[m]; @@ -350,7 +386,42 @@ void FixAdapt::init() } delete [] pstyle; + } else if (ad->which == BOND){ + ad->bond = NULL; + anybond = 1; + + int n = strlen(ad->bstyle) + 1; + char *bstyle = new char[n]; + strcpy(bstyle,ad->bstyle); + if (lmp->suffix_enable) { + int len = 2 + strlen(bstyle) + strlen(lmp->suffix); + char *bsuffix = new char[len]; + strcpy(bsuffix,bstyle); + strcat(bsuffix,"/"); + strcat(bsuffix,lmp->suffix); + ad->bond = force->bond_match(bsuffix); + delete [] bsuffix; + } + if (ad->bond == NULL) ad->bond = force->bond_match(bstyle); + if (ad->bond == NULL ) + error->all(FLERR,"Fix adapt bond style does not exist"); + + void *ptr = ad->bond->extract(ad->bparam,ad->bdim); + + if (ptr == NULL) + error->all(FLERR,"Fix adapt bond style param not supported"); + + // for bond styles, use a vector + + if (ad->bdim == 1) ad->vector = (double *) ptr; + + if (strcmp(force->bond_style,"hybrid") == 0 || + strcmp(force->bond_style,"hybrid_overlay") == 0) + error->all(FLERR,"Fix adapt does not support bond_style hybrid"); + + delete [] bstyle; + } else if (ad->which == KSPACE) { if (force->kspace == NULL) error->all(FLERR,"Fix adapt kspace style does not exist"); @@ -368,7 +439,7 @@ void FixAdapt::init() } } - // make copy of original pair array values + // make copy of original pair/bond array values for (int m = 0; m < nadapt; m++) { Adapt *ad = &adapt[m]; @@ -376,9 +447,14 @@ void FixAdapt::init() for (i = ad->ilo; i <= ad->ihi; i++) for (j = MAX(ad->jlo,i); j <= ad->jhi; j++) ad->array_orig[i][j] = ad->array[i][j]; - }else if (ad->which == PAIR && ad->pdim == 0){ + } else if (ad->which == PAIR && ad->pdim == 0){ ad->scalar_orig = *ad->scalar; + + } else if (ad->which == BOND && ad->bdim == 1){ + for (i = ad->ilo; i <= ad->ihi; ++i ) + ad->vector_orig[i] = ad->vector[i]; } + } // fixes that store initial per-atom values @@ -470,6 +546,18 @@ void FixAdapt::change_settings() ad->array[i][j] = value; } + // set bond type array values: + + } else if (ad->which == BOND) { + if (ad->bdim == 1){ + if (scaleflag) + for (i = ad->ilo; i <= ad->ihi; ++i ) + ad->vector[i] = value*ad->vector_orig[i]; + else + for (i = ad->ilo; i <= ad->ihi; ++i ) + ad->vector[i] = value; + } + // set kspace scale factor } else if (ad->which == KSPACE) { @@ -522,8 +610,10 @@ void FixAdapt::change_settings() modify->addstep_compute(update->ntimestep + nevery); // re-initialize pair styles if any PAIR settings were changed + // ditto for bond styles if any BOND setitings were changes // this resets other coeffs that may depend on changed values, - // and also offset and tail corrections + // and also offset and tail corrections + if (anypair) { for (int m = 0; m < nadapt; m++) { Adapt *ad = &adapt[m]; @@ -532,6 +622,14 @@ void FixAdapt::change_settings() } } } + if (anybond) { + for (int m = 0; m < nadapt; ++m ) { + Adapt *ad = &adapt[m]; + if (ad->which == BOND) { + ad->bond->reinit(); + } + } + } // reset KSpace charges if charges have changed @@ -554,6 +652,12 @@ void FixAdapt::restore_settings() ad->array[i][j] = ad->array_orig[i][j]; } + } else if (ad->which == BOND) { + if (ad->pdim == 1) { + for (int i = ad->ilo; i <= ad->ihi; i++) + ad->vector[i] = ad->vector_orig[i]; + } + } else if (ad->which == KSPACE) { *kspace_scale = 1.0; @@ -588,6 +692,7 @@ void FixAdapt::restore_settings() } if (anypair) force->pair->reinit(); + if (anybond) force->bond->reinit(); if (chgflag && force->kspace) force->kspace->qsum_qsq(); } diff --git a/src/fix_adapt.h b/src/fix_adapt.h index a6d45c78cc..6e49f4a284 100644 --- a/src/fix_adapt.h +++ b/src/fix_adapt.h @@ -43,7 +43,7 @@ class FixAdapt : public Fix { private: int nadapt,resetflag,scaleflag; - int anypair; + int anypair, anybond; int nlevels_respa; char *id_fix_diam,*id_fix_chg; class FixStore *fix_diam,*fix_chg; @@ -52,12 +52,15 @@ class FixAdapt : public Fix { int which,ivar; char *var; char *pstyle,*pparam; + char *bstyle,*bparam; int ilo,ihi,jlo,jhi; - int pdim; + int pdim,bdim; double *scalar,scalar_orig; + double *vector,*vector_orig; double **array,**array_orig; int aparam; class Pair *pair; + class Bond *bond; }; Adapt *adapt; diff --git a/src/fix_ave_histo.cpp b/src/fix_ave_histo.cpp index e0d010aacb..b4516a0fd2 100644 --- a/src/fix_ave_histo.cpp +++ b/src/fix_ave_histo.cpp @@ -205,14 +205,18 @@ FixAveHisto::FixAveHisto(LAMMPS *lmp, int narg, char **arg) : for (int i = 0; i < nvalues; i++) { if (which[i] == X || which[i] == V || which[i] == F) kindflag = PERATOM; else if (which[i] == COMPUTE) { - Compute *compute = modify->compute[modify->find_compute(ids[i])]; + int c_id = modify->find_compute(ids[i]); + if (c_id < 0) error->all(FLERR,"Fix ave/histo input is invalid compute"); + Compute *compute = modify->compute[c_id]; if (compute->scalar_flag || compute->vector_flag || compute->array_flag) kindflag = GLOBAL; else if (compute->peratom_flag) kindflag = PERATOM; else if (compute->local_flag) kindflag = LOCAL; else error->all(FLERR,"Fix ave/histo input is invalid compute"); } else if (which[i] == FIX) { - Fix *fix = modify->fix[modify->find_fix(ids[i])]; + int f_id = modify->find_fix(ids[i]); + if (f_id < 0) error->all(FLERR,"Fix ave/histo input is invalid fix"); + Fix *fix = modify->fix[f_id]; if (fix->scalar_flag || fix->vector_flag || fix->array_flag) kindflag = GLOBAL; else if (fix->peratom_flag) kindflag = PERATOM; @@ -220,6 +224,7 @@ FixAveHisto::FixAveHisto(LAMMPS *lmp, int narg, char **arg) : else error->all(FLERR,"Fix ave/histo input is invalid fix"); } else if (which[i] == VARIABLE) { int ivariable = input->variable->find(ids[i]); + if (ivariable < 0) error->all(FLERR,"Fix ave/histo input is invalid variable"); if (input->variable->equalstyle(ivariable)) kindflag = GLOBAL; else if (input->variable->atomstyle(ivariable)) kindflag = PERATOM; else error->all(FLERR,"Fix ave/histo input is invalid variable"); diff --git a/src/integrate.h b/src/integrate.h index 19ed546a9b..4ca3a788fa 100644 --- a/src/integrate.h +++ b/src/integrate.h @@ -23,7 +23,7 @@ class Integrate : protected Pointers { Integrate(class LAMMPS *, int, char **); virtual ~Integrate(); virtual void init(); - virtual void setup() = 0; + virtual void setup(int flag=1) = 0; virtual void setup_minimal(int) = 0; virtual void run(int) = 0; virtual void cleanup() {} diff --git a/src/lammps.cpp b/src/lammps.cpp index cc3133f2d9..bde7ca035d 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -45,6 +45,7 @@ #include "accelerator_kokkos.h" #include "accelerator_omp.h" #include "timer.h" +#include "python.h" #include "memory.h" #include "version.h" #include "error.h" @@ -67,6 +68,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) error = new Error(this); universe = new Universe(this,communicator); output = NULL; + python = NULL; screen = NULL; logfile = NULL; @@ -585,6 +587,7 @@ LAMMPS::~LAMMPS() if (world != universe->uworld) MPI_Comm_free(&world); + delete python; delete kokkos; delete [] suffix; delete [] suffix2; @@ -639,6 +642,8 @@ void LAMMPS::create() // must be after modify so can create Computes update = new Update(this); // must be after output, force, neighbor timer = new Timer(this); + + python = new Python(this); } /* ---------------------------------------------------------------------- @@ -759,6 +764,9 @@ void LAMMPS::destroy() delete timer; timer = NULL; + + delete python; + python = NULL; } /* ---------------------------------------------------------------------- diff --git a/src/lammps.h b/src/lammps.h index 02490a1836..c432784a0b 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -54,6 +54,8 @@ class LAMMPS { class KokkosLMP *kokkos; // KOKKOS accelerator class class AtomKokkos *atomKK; // KOKKOS version of Atom class + class Python * python; // Python interface + class CiteMe *citeme; // citation info LAMMPS(int, char **, MPI_Comm); diff --git a/src/min.cpp b/src/min.cpp index 9207c6bdc2..d308efb848 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -180,13 +180,17 @@ void Min::init() setup before run ------------------------------------------------------------------------- */ -void Min::setup() +void Min::setup(int flag) { if (comm->me == 0 && screen) { fprintf(screen,"Setting up %s style minimization ...\n", update->minimize_style); - fprintf(screen," Unit style : %s\n", update->unit_style); - timer->print_timeout(screen); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + timer->print_timeout(screen); + } } update->setupflag = 1; @@ -194,7 +198,12 @@ void Min::setup() // cannot be done in init() b/c update init() is before modify init() nextra_global = modify->min_dof(); - if (nextra_global) fextra = new double[nextra_global]; + if (nextra_global) { + fextra = new double[nextra_global]; + if (comm->me == 0 && screen) + fprintf(screen,"WARNING: Energy due to %d extra global DOFs will" + " be included in minimizer energies\n",nextra_global); + } // compute for potential energy @@ -294,7 +303,7 @@ void Min::setup() requestor[m]->min_xf_get(m); modify->setup(vflag); - output->setup(); + output->setup(flag); update->setupflag = 0; // stats for initial thermo output diff --git a/src/min.h b/src/min.h index 639f87ed66..021198bc09 100644 --- a/src/min.h +++ b/src/min.h @@ -31,7 +31,7 @@ class Min : protected Pointers { Min(class LAMMPS *); virtual ~Min(); virtual void init(); - void setup(); + void setup(int flag=1); void setup_minimal(int); void run(int); void cleanup(); @@ -123,6 +123,12 @@ Minimization requires that neigh_modify settings be delay = 0, every = changed them and will restore them to their original values after the minimization. +W: Energy due to X extra global DOFs will be included in minimizer energies + +When using fixes like box/relax, the potential energy used by the minimizer +is augmented by an additional energy provided by the fix. Thus the printed +converged energy may be different from the total potential energy. + E: Minimization could not find thermo_pe compute This compute is created by the thermo command. It must have been diff --git a/src/nbin.cpp b/src/nbin.cpp index 6aa37a4c43..6bd1ce322f 100644 --- a/src/nbin.cpp +++ b/src/nbin.cpp @@ -29,6 +29,7 @@ NBin::NBin(LAMMPS *lmp) : Pointers(lmp) maxbin = maxatom = 0; binhead = NULL; bins = NULL; + atom2bin = NULL; // geometry settings @@ -42,6 +43,7 @@ NBin::~NBin() { memory->destroy(binhead); memory->destroy(bins); + memory->destroy(atom2bin); } /* ---------------------------------------------------------------------- */ @@ -87,12 +89,15 @@ void NBin::bin_atoms_setup(int nall) memory->create(binhead,maxbin,"neigh:binhead"); } - // bins = per-atom vector + // bins and atom2bin = per-atom vectors + // for both local and ghost atoms if (nall > maxatom) { maxatom = nall; memory->destroy(bins); memory->create(bins,maxatom,"neigh:bins"); + memory->destroy(atom2bin); + memory->create(atom2bin,maxatom,"neigh:atom2bin"); } } @@ -148,6 +153,6 @@ bigint NBin::memory_usage() { bigint bytes = 0; bytes += maxbin*sizeof(int); - bytes += maxatom*sizeof(int); + bytes += 2*maxatom*sizeof(int); return bytes; } diff --git a/src/nbin.h b/src/nbin.h index 9871a229d8..30c74ff295 100644 --- a/src/nbin.h +++ b/src/nbin.h @@ -31,10 +31,11 @@ class NBin : protected Pointers { double binsizex,binsizey,binsizez; // bin sizes and inverse sizes double bininvx,bininvy,bininvz; - int *binhead; // index of first atom in each bin - int *bins; // index of next atom in same bin + int *binhead; // index of first atom in each bin + int *bins; // index of next atom in same bin + int *atom2bin; // bin assignment for each atom (local+ghost) - double cutoff_custom; // cutoff set by requestor + double cutoff_custom; // cutoff set by requestor NBin(class LAMMPS *); ~NBin(); diff --git a/src/nbin_standard.cpp b/src/nbin_standard.cpp index 2a72d996a5..e6941014f9 100644 --- a/src/nbin_standard.cpp +++ b/src/nbin_standard.cpp @@ -211,12 +211,14 @@ void NBinStandard::bin_atoms() for (i = nall-1; i >= nlocal; i--) { if (mask[i] & bitmask) { ibin = coord2bin(x[i]); + atom2bin[i] = ibin; bins[i] = binhead[ibin]; binhead[ibin] = i; } } for (i = atom->nfirst-1; i >= 0; i--) { ibin = coord2bin(x[i]); + atom2bin[i] = ibin; bins[i] = binhead[ibin]; binhead[ibin] = i; } @@ -224,6 +226,7 @@ void NBinStandard::bin_atoms() } else { for (i = nall-1; i >= 0; i--) { ibin = coord2bin(x[i]); + atom2bin[i] = ibin; bins[i] = binhead[ibin]; binhead[ibin] = i; } diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index 5e77c23077..bb691d00ba 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -50,6 +50,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) // default is no Intel-specific neighbor list build // default is no Kokkos neighbor list build // default is no Shardlow Splitting Algorithm (SSA) neighbor list build + // default is no list-specific cutoff // default is no storage of auxiliary floating point values occasional = 0; diff --git a/src/neighbor.cpp b/src/neighbor.cpp index a407c1cf6e..a5ff157a1f 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -640,6 +640,24 @@ int Neighbor::init_pair() delete [] neigh_stencil; delete [] neigh_pair; + // error check on requests + // do not allow occasional, ghost, bin list + // b/c it still uses variant of coord2bin() in NPair() method + // instead of atom2bin, this could cause error b/c stoms have + // moved out of proc domain by time occasional list is built + // solution would be to use a different NBin variant + // that used Npair::coord2bin(x,ix,iy,iz) (then delete it from NPair) + // and stored the ix,iy,iz values for all atoms (including ghosts) + // at time of binning when neighbor lists are rebuilt, + // similar to what vanilla Nbin::coord2atom() does now in atom2bin + + if (style == BIN) { + for (i = 0; i < nrequest; i++) + if (requests[i]->occasional && requests[i]->ghost) + error->all(FLERR,"Cannot request an occasional binned neighbor list " + "with ghost info"); + } + // morph requests in various ways // purpose is to avoid duplicate or inefficient builds // may add new requests if a needed request to derive from does not exist @@ -667,7 +685,7 @@ int Neighbor::init_pair() // create new lists, one per request including added requests // wait to allocate initial pages until copy lists are detected - // NOTE: can I allocation now, instead of down below? + // NOTE: can I allocate now, instead of down below? nlist = nrequest; @@ -1216,7 +1234,7 @@ void Neighbor::morph_copy() // check all other lists - for (j = 0; j < i; j++) { + for (j = 0; j < nrequest; j++) { if (i == j) continue; jrq = requests[j]; @@ -1224,10 +1242,13 @@ void Neighbor::morph_copy() if (jrq->copy && jrq->copylist == i) continue; - // parent list must be perpetual - // copied list can be perpetual or occasional + // other list (jrq) to copy from must be perpetual + // list that becomes a copy list (irq) can be perpetual or occasional + // if both lists are perpetual, require j < i + // to prevent circular dependence with 3 or more copies of a list if (jrq->occasional) continue; + if (!irq->occasional && j > i) continue; // both lists must be half, or both full @@ -1279,7 +1300,7 @@ void Neighbor::morph_copy() // turn list I into a copy of list J // do not copy a list from another copy list, but from its parent list - if (j < i) { + if (j < nrequest) { irq->copy = 1; if (jrq->copy) irq->copylist = jrq->copylist; else irq->copylist = j; @@ -1666,7 +1687,6 @@ int Neighbor::choose_stencil(NeighRequest *rq) else if (rq->newton == 1) newtflag = 1; else if (rq->newton == 2) newtflag = 0; - //printf("STENCIL RQ FLAGS: hff %d %d n %d g %d s %d newtflag %d\n", // rq->half,rq->full,rq->newton,rq->ghost,rq->ssa, // newtflag); @@ -2084,7 +2104,7 @@ void Neighbor::build(int topoflag) } // bin atoms for all NBin instances - // not just NBin associated with perpetual lists + // not just NBin associated with perpetual lists, also occasional lists // b/c cannot wait to bin occasional lists in build_one() call // if bin then, atoms may have moved outside of proc domain & bin extent, // leading to errors or even a crash @@ -2190,6 +2210,7 @@ void Neighbor::build_one(class NeighList *mylist, int preflag) // build the list + if (!mylist->copy) mylist->grow(atom->nlocal,atom->nlocal+atom->nghost); np->build_setup(); np->build(mylist); } diff --git a/src/npair.cpp b/src/npair.cpp index 6c3f8ac05e..3451cd6eae 100644 --- a/src/npair.cpp +++ b/src/npair.cpp @@ -128,6 +128,7 @@ void NPair::copy_bin_info() bininvy = nb->bininvy; bininvz = nb->bininvz; + atom2bin = nb->atom2bin; bins = nb->bins; binhead = nb->binhead; } @@ -198,53 +199,8 @@ int NPair::exclusion(int i, int j, int itype, int jtype, } /* ---------------------------------------------------------------------- - convert atom coords into local bin # - for orthogonal, only ghost atoms will have coord >= bboxhi or coord < bboxlo - take special care to insure ghosts are in correct bins even w/ roundoff - hi ghost atoms = nbin,nbin+1,etc - owned atoms = 0 to nbin-1 - lo ghost atoms = -1,-2,etc - this is necessary so that both procs on either side of PBC - treat a pair of atoms straddling the PBC in a consistent way - for triclinic, doesn't matter since stencil & neigh list built differently -------------------------------------------------------------------------- */ - -int NPair::coord2bin(double *x) -{ - int ix,iy,iz; - - if (!ISFINITE(x[0]) || !ISFINITE(x[1]) || !ISFINITE(x[2])) - error->one(FLERR,"Non-numeric positions - simulation unstable"); - - if (x[0] >= bboxhi[0]) - ix = static_cast ((x[0]-bboxhi[0])*bininvx) + nbinx; - else if (x[0] >= bboxlo[0]) { - ix = static_cast ((x[0]-bboxlo[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast ((x[0]-bboxlo[0])*bininvx) - 1; - - if (x[1] >= bboxhi[1]) - iy = static_cast ((x[1]-bboxhi[1])*bininvy) + nbiny; - else if (x[1] >= bboxlo[1]) { - iy = static_cast ((x[1]-bboxlo[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast ((x[1]-bboxlo[1])*bininvy) - 1; - - if (x[2] >= bboxhi[2]) - iz = static_cast ((x[2]-bboxhi[2])*bininvz) + nbinz; - else if (x[2] >= bboxlo[2]) { - iz = static_cast ((x[2]-bboxlo[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast ((x[2]-bboxlo[2])*bininvz) - 1; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); -} - -/* ---------------------------------------------------------------------- - same as coord2bin, but also return ix,iy,iz offsets in each dim + same as coord2bin in Nbin, but also return ix,iy,iz offsets in each dim + used by some of the ghost neighbor lists ------------------------------------------------------------------------- */ int NPair::coord2bin(double *x, int &ix, int &iy, int &iz) diff --git a/src/npair.h b/src/npair.h index 8f2bcb13bc..4e5e3f5dfd 100644 --- a/src/npair.h +++ b/src/npair.h @@ -77,7 +77,7 @@ class NPair : protected Pointers { int mbinx,mbiny,mbinz; int mbinxlo,mbinylo,mbinzlo; double bininvx,bininvy,bininvz; - int *bins; + int *atom2bin,*bins; int *binhead; // data from NStencil class diff --git a/src/npair_full_bin.cpp b/src/npair_full_bin.cpp index a29acb67ab..94a6af129c 100644 --- a/src/npair_full_bin.cpp +++ b/src/npair_full_bin.cpp @@ -80,7 +80,7 @@ void NPairFullBin::build(NeighList *list) // loop over all atoms in surrounding bins in stencil including self // skip i = j - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/npair_full_bin_atomonly.cpp b/src/npair_full_bin_atomonly.cpp index 8d4fc254b5..db84733f1c 100644 --- a/src/npair_full_bin_atomonly.cpp +++ b/src/npair_full_bin_atomonly.cpp @@ -64,7 +64,7 @@ void NPairFullBinAtomonly::build(NeighList *list) // loop over all atoms in surrounding bins in stencil including self // skip i = j - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/npair_full_bin_ghost.cpp b/src/npair_full_bin_ghost.cpp index 1e258cf518..2edd03cc9e 100644 --- a/src/npair_full_bin_ghost.cpp +++ b/src/npair_full_bin_ghost.cpp @@ -87,7 +87,7 @@ void NPairFullBinGhost::build(NeighList *list) // no molecular test when i = ghost atom if (i < nlocal) { - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (i == j) continue; diff --git a/src/npair_full_multi.cpp b/src/npair_full_multi.cpp index 628a706e7a..9a2490ac5d 100644 --- a/src/npair_full_multi.cpp +++ b/src/npair_full_multi.cpp @@ -83,7 +83,7 @@ void NPairFullMulti::build(NeighList *list) // skip if i,j neighbor cutoff is less than bin distance // skip i = j - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/npair_half_bin_atomonly_newton.cpp b/src/npair_half_bin_atomonly_newton.cpp index bc425cd22e..6bbef0700a 100644 --- a/src/npair_half_bin_atomonly_newton.cpp +++ b/src/npair_half_bin_atomonly_newton.cpp @@ -90,7 +90,8 @@ void NPairHalfBinAtomonlyNewton::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; + for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; diff --git a/src/npair_half_bin_newtoff.cpp b/src/npair_half_bin_newtoff.cpp index dd072508a9..4c44741ffe 100644 --- a/src/npair_half_bin_newtoff.cpp +++ b/src/npair_half_bin_newtoff.cpp @@ -84,7 +84,7 @@ void NPairHalfBinNewtoff::build(NeighList *list) // stores own/own pairs only once // stores own/ghost pairs on both procs - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/npair_half_bin_newtoff_ghost.cpp b/src/npair_half_bin_newtoff_ghost.cpp index f486df105a..72ec15e66a 100644 --- a/src/npair_half_bin_newtoff_ghost.cpp +++ b/src/npair_half_bin_newtoff_ghost.cpp @@ -92,7 +92,7 @@ void NPairHalfBinNewtoffGhost::build(NeighList *list) // no molecular test when i = ghost atom if (i < nlocal) { - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { diff --git a/src/npair_half_bin_newton.cpp b/src/npair_half_bin_newton.cpp index f1fc203403..3a387870e3 100644 --- a/src/npair_half_bin_newton.cpp +++ b/src/npair_half_bin_newton.cpp @@ -119,7 +119,7 @@ void NPairHalfBinNewton::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 3ef8c3260e..169e710e0e 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -84,7 +84,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/npair_half_multi_newtoff.cpp b/src/npair_half_multi_newtoff.cpp index 11e45d91ff..07b5c87a6c 100644 --- a/src/npair_half_multi_newtoff.cpp +++ b/src/npair_half_multi_newtoff.cpp @@ -87,7 +87,7 @@ void NPairHalfMultiNewtoff::build(NeighList *list) // stores own/own pairs only once // stores own/ghost pairs on both procs - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/npair_half_multi_newton.cpp b/src/npair_half_multi_newton.cpp index cd3a37821f..3d90979329 100644 --- a/src/npair_half_multi_newton.cpp +++ b/src/npair_half_multi_newton.cpp @@ -121,7 +121,7 @@ void NPairHalfMultiNewton::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair // skip if i,j neighbor cutoff is less than bin distance - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index f9aaeb0414..909c69246b 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -88,7 +88,7 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; s = stencil_multi[itype]; distsq = distsq_multi[itype]; cutsq = cutneighsq[itype]; diff --git a/src/npair_half_respa_bin_newtoff.cpp b/src/npair_half_respa_bin_newtoff.cpp index 39f68a289d..11246b4af8 100644 --- a/src/npair_half_respa_bin_newtoff.cpp +++ b/src/npair_half_respa_bin_newtoff.cpp @@ -101,7 +101,7 @@ void NPairHalfRespaBinNewtoff::build(NeighList *list) xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; if (moltemplate) { imol = molindex[i]; iatom = molatom[i]; diff --git a/src/npair_half_respa_bin_newton.cpp b/src/npair_half_respa_bin_newton.cpp index 537a72d0c1..db76678036 100644 --- a/src/npair_half_respa_bin_newton.cpp +++ b/src/npair_half_respa_bin_newton.cpp @@ -160,7 +160,7 @@ void NPairHalfRespaBinNewton::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index 9c5fd39fbe..38621224c4 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -113,7 +113,7 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/npair_half_size_bin_newtoff.cpp b/src/npair_half_size_bin_newtoff.cpp index e98923cd11..571b2484ea 100644 --- a/src/npair_half_size_bin_newtoff.cpp +++ b/src/npair_half_size_bin_newtoff.cpp @@ -105,7 +105,7 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list) ytmp = x[i][1]; ztmp = x[i][2]; radi = radius[i]; - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; // loop over all atoms in surrounding bins in stencil including self // only store pair if i < j diff --git a/src/npair_half_size_bin_newton.cpp b/src/npair_half_size_bin_newton.cpp index 2cd0943ac2..a8be3ce691 100644 --- a/src/npair_half_size_bin_newton.cpp +++ b/src/npair_half_size_bin_newton.cpp @@ -156,7 +156,7 @@ void NPairHalfSizeBinNewton::build(NeighList *list) // loop over all atoms in other bins in stencil, store every pair - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp index 054487d31f..1107f73026 100644 --- a/src/npair_half_size_bin_newton_tri.cpp +++ b/src/npair_half_size_bin_newton_tri.cpp @@ -112,7 +112,7 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - ibin = coord2bin(x[i]); + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (x[j][2] < ztmp) continue; diff --git a/src/output.cpp b/src/output.cpp index a2275b74be..5e56ccfebc 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -652,6 +652,21 @@ void Output::delete_dump(char *id) ndump--; } +/* ---------------------------------------------------------------------- + find a dump by ID + return index of dump or -1 if not found +------------------------------------------------------------------------- */ + +int Output::find_dump(const char *id) +{ + if (id == NULL) return -1; + int idump; + for (idump = 0; idump < ndump; idump++) + if (strcmp(id,dump[idump]->id) == 0) break; + if (idump == ndump) return -1; + return idump; +} + /* ---------------------------------------------------------------------- set thermo output frequency from input script ------------------------------------------------------------------------- */ diff --git a/src/output.h b/src/output.h index de5eaaa70b..5354759343 100644 --- a/src/output.h +++ b/src/output.h @@ -76,6 +76,7 @@ class Output : protected Pointers { void add_dump(int, char **); // add a Dump to Dump list void modify_dump(int, char **); // modify a Dump void delete_dump(char *); // delete a Dump from Dump list + int find_dump(const char *); // find a Dump ID void set_thermo(int, char **); // set thermo output freqquency void create_thermo(int, char **); // create a thermo style diff --git a/src/pair.h b/src/pair.h index f21a53536e..140156069e 100644 --- a/src/pair.h +++ b/src/pair.h @@ -194,8 +194,8 @@ class Pair : protected Pointers { int num_tally_compute; class Compute **list_tally_compute; public: - void add_tally_callback(class Compute *); - void del_tally_callback(class Compute *); + virtual void add_tally_callback(class Compute *); + virtual void del_tally_callback(class Compute *); protected: int instance_me; // which Pair class instantiation I am diff --git a/src/pair_beck.cpp b/src/pair_beck.cpp index e3e8b0c5cc..36e44e5c2e 100644 --- a/src/pair_beck.cpp +++ b/src/pair_beck.cpp @@ -181,10 +181,8 @@ void PairBeck::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) { - cut[i][j] = cut_global; - } + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_born.cpp b/src/pair_born.cpp index 5fc26e2529..6d420fb36b 100644 --- a/src/pair_born.cpp +++ b/src/pair_born.cpp @@ -185,7 +185,7 @@ void PairBorn::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_born_coul_dsf.cpp b/src/pair_born_coul_dsf.cpp index 87c2a14baa..caec95759a 100644 --- a/src/pair_born_coul_dsf.cpp +++ b/src/pair_born_coul_dsf.cpp @@ -226,9 +226,8 @@ void PairBornCoulDSF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) - cut_lj[i][j] = cut_lj_global; + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/pair_born_coul_wolf.cpp b/src/pair_born_coul_wolf.cpp index 31c0cc715c..bad0c5ed3e 100644 --- a/src/pair_born_coul_wolf.cpp +++ b/src/pair_born_coul_wolf.cpp @@ -229,7 +229,7 @@ void PairBornCoulWolf::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } diff --git a/src/pair_buck.cpp b/src/pair_buck.cpp index ac15e82020..e4da772e0a 100644 --- a/src/pair_buck.cpp +++ b/src/pair_buck.cpp @@ -176,7 +176,7 @@ void PairBuck::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_buck_coul_cut.cpp b/src/pair_buck_coul_cut.cpp index 7c948f58a8..c052c3100a 100644 --- a/src/pair_buck_coul_cut.cpp +++ b/src/pair_buck_coul_cut.cpp @@ -205,7 +205,7 @@ void PairBuckCoulCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/pair_coul_cut.cpp b/src/pair_coul_cut.cpp index fec592bb19..b505dcb02c 100644 --- a/src/pair_coul_cut.cpp +++ b/src/pair_coul_cut.cpp @@ -155,7 +155,7 @@ void PairCoulCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_coul_debye.cpp b/src/pair_coul_debye.cpp index dcb84d7e2d..df4555753f 100644 --- a/src/pair_coul_debye.cpp +++ b/src/pair_coul_debye.cpp @@ -126,7 +126,7 @@ void PairCoulDebye::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_dpd.cpp b/src/pair_dpd.cpp index b5b959f85b..61f700a33e 100644 --- a/src/pair_dpd.cpp +++ b/src/pair_dpd.cpp @@ -207,7 +207,7 @@ void PairDPD::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_dpd_tstat.cpp b/src/pair_dpd_tstat.cpp index 6d8f75d95d..0a5ebd33f8 100644 --- a/src/pair_dpd_tstat.cpp +++ b/src/pair_dpd_tstat.cpp @@ -159,7 +159,7 @@ void PairDPDTstat::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_gauss.cpp b/src/pair_gauss.cpp index c8f6afdacc..c66cfc2c80 100644 --- a/src/pair_gauss.cpp +++ b/src/pair_gauss.cpp @@ -173,7 +173,7 @@ void PairGauss::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 5ff48a2d08..48364a86c4 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; PairHybrid::PairHybrid(LAMMPS *lmp) : Pair(lmp), styles(NULL), keywords(NULL), multiple(NULL), nmap(NULL), - map(NULL), special_lj(NULL), special_coul(NULL) + map(NULL), special_lj(NULL), special_coul(NULL), compute_tally(NULL) { nstyles = 0; @@ -59,6 +59,7 @@ PairHybrid::~PairHybrid() delete [] special_lj; delete [] special_coul; + delete [] compute_tally; delete [] svector; @@ -166,6 +167,23 @@ void PairHybrid::compute(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); } + +/* ---------------------------------------------------------------------- */ + +void PairHybrid::add_tally_callback(Compute *ptr) +{ + for (int m = 0; m < nstyles; m++) + if (compute_tally[m]) styles[m]->add_tally_callback(ptr); +} + +/* ---------------------------------------------------------------------- */ + +void PairHybrid::del_tally_callback(Compute *ptr) +{ + for (int m = 0; m < nstyles; m++) + if (compute_tally[m]) styles[m]->del_tally_callback(ptr); +} + /* ---------------------------------------------------------------------- */ void PairHybrid::compute_inner() @@ -250,6 +268,8 @@ void PairHybrid::settings(int narg, char **arg) special_lj = new double*[narg]; special_coul = new double*[narg]; + compute_tally = new int[narg]; + // allocate each sub-style // allocate uses suffix, but don't store suffix version in keywords, // else syntax in coeff() will not match @@ -269,6 +289,7 @@ void PairHybrid::settings(int narg, char **arg) styles[nstyles] = force->new_pair(arg[iarg],1,dummy); force->store_style(keywords[nstyles],arg[iarg],0); special_lj[nstyles] = special_coul[nstyles] = NULL; + compute_tally[nstyles] = 1; jarg = iarg + 1; while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++; @@ -779,6 +800,20 @@ void PairHybrid::modify_params(int narg, char **arg) iarg += 5; } + // if 2nd keyword (after pair) is compute/tally: + // set flag to register USER-TALLY computes accordingly + + if (iarg < narg && strcmp(arg[iarg],"compute/tally") == 0) { + if (narg < iarg+2) + error->all(FLERR,"Illegal pair_modify compute/tally command"); + if (strcmp(arg[iarg+1],"yes") == 0) { + compute_tally[m] = 1; + } else if (strcmp(arg[iarg+1],"no") == 0) { + compute_tally[m] = 0; + } else error->all(FLERR,"Illegal pair_modify compute/tally command"); + iarg += 2; + } + // apply the remaining keywords to the base pair style itself and the // sub-style except for "pair" and "special". // the former is important for some keywords like "tail" or "compute" diff --git a/src/pair_hybrid.h b/src/pair_hybrid.h index 7b92505dc4..2364b16f46 100644 --- a/src/pair_hybrid.h +++ b/src/pair_hybrid.h @@ -55,6 +55,9 @@ class PairHybrid : public Pair { int check_ijtype(int, int, char *); + virtual void add_tally_callback(class Compute *); + virtual void del_tally_callback(class Compute *); + protected: int nstyles; // # of sub-styles Pair **styles; // list of Pair style classes @@ -69,6 +72,7 @@ class PairHybrid : public Pair { int ***map; // list of sub-styles itype,jtype points to double **special_lj; // list of per style LJ exclusion factors double **special_coul; // list of per style Coulomb exclusion factors + int *compute_tally; // list of on/off flags for tally computes void allocate(); void flags(); diff --git a/src/pair_lj96_cut.cpp b/src/pair_lj96_cut.cpp index 1f79226e64..f4b2747d40 100644 --- a/src/pair_lj96_cut.cpp +++ b/src/pair_lj96_cut.cpp @@ -442,7 +442,7 @@ void PairLJ96Cut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_lj_cubic.cpp b/src/pair_lj_cubic.cpp index 633c12019e..c96d4490cb 100644 --- a/src/pair_lj_cubic.cpp +++ b/src/pair_lj_cubic.cpp @@ -179,14 +179,8 @@ void PairLJCubic::settings(int narg, char **arg) { if (narg != 0) error->all(FLERR,"Illegal pair_style command"); - // reset cutoffs that have been explicitly set - - if (allocated) { - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - if (setflag[i][j]) cut[i][j] = 0.0; - } + // NOTE: lj/cubic has no global cutoff. instead the cutoff is + // inferred from the lj parameters. so we must not reset cutoffs here. } /* ---------------------------------------------------------------------- diff --git a/src/pair_lj_cut.cpp b/src/pair_lj_cut.cpp index bffdd7fff4..a3ebf414c9 100644 --- a/src/pair_lj_cut.cpp +++ b/src/pair_lj_cut.cpp @@ -436,7 +436,7 @@ void PairLJCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_lj_cut_coul_cut.cpp b/src/pair_lj_cut_coul_cut.cpp index 0d2bff3c9f..0d62c43dc3 100644 --- a/src/pair_lj_cut_coul_cut.cpp +++ b/src/pair_lj_cut_coul_cut.cpp @@ -198,7 +198,7 @@ void PairLJCutCoulCut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_lj[i][j] = cut_lj_global; cut_coul[i][j] = cut_coul_global; diff --git a/src/pair_lj_cut_coul_dsf.cpp b/src/pair_lj_cut_coul_dsf.cpp index 538336d8e6..09293a6f4c 100644 --- a/src/pair_lj_cut_coul_dsf.cpp +++ b/src/pair_lj_cut_coul_dsf.cpp @@ -224,7 +224,7 @@ void PairLJCutCoulDSF::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } diff --git a/src/pair_lj_expand.cpp b/src/pair_lj_expand.cpp index 90f1ae0df2..2fd780472a 100644 --- a/src/pair_lj_expand.cpp +++ b/src/pair_lj_expand.cpp @@ -179,7 +179,7 @@ void PairLJExpand::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_lj_gromacs.cpp b/src/pair_lj_gromacs.cpp index bb0a6e647e..3375c6c4e2 100644 --- a/src/pair_lj_gromacs.cpp +++ b/src/pair_lj_gromacs.cpp @@ -204,7 +204,7 @@ void PairLJGromacs::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/pair_lj_smooth.cpp b/src/pair_lj_smooth.cpp index c59b35aebf..1afaef9235 100644 --- a/src/pair_lj_smooth.cpp +++ b/src/pair_lj_smooth.cpp @@ -206,7 +206,7 @@ void PairLJSmooth::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) { cut_inner[i][j] = cut_inner_global; cut[i][j] = cut_global; diff --git a/src/pair_lj_smooth_linear.cpp b/src/pair_lj_smooth_linear.cpp index 189475aa71..415ca7b6d3 100644 --- a/src/pair_lj_smooth_linear.cpp +++ b/src/pair_lj_smooth_linear.cpp @@ -175,7 +175,7 @@ void PairLJSmoothLinear::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } diff --git a/src/pair_mie_cut.cpp b/src/pair_mie_cut.cpp index 3c13c19a3b..312fb7bc70 100644 --- a/src/pair_mie_cut.cpp +++ b/src/pair_mie_cut.cpp @@ -447,7 +447,7 @@ void PairMIECut::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_morse.cpp b/src/pair_morse.cpp index 2144ad5008..5f90642065 100644 --- a/src/pair_morse.cpp +++ b/src/pair_morse.cpp @@ -165,7 +165,7 @@ void PairMorse::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_soft.cpp b/src/pair_soft.cpp index 8ffd139307..b05058b4dc 100644 --- a/src/pair_soft.cpp +++ b/src/pair_soft.cpp @@ -158,7 +158,7 @@ void PairSoft::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pair_yukawa.cpp b/src/pair_yukawa.cpp index a38e2aa880..0e5fd36cd6 100644 --- a/src/pair_yukawa.cpp +++ b/src/pair_yukawa.cpp @@ -162,7 +162,7 @@ void PairYukawa::settings(int narg, char **arg) if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) + for (j = i; j <= atom->ntypes; j++) if (setflag[i][j]) cut[i][j] = cut_global; } } diff --git a/src/pointers.h b/src/pointers.h index dd528c3a74..82b49c1dad 100644 --- a/src/pointers.h +++ b/src/pointers.h @@ -56,7 +56,8 @@ class Pointers { infile(ptr->infile), screen(ptr->screen), logfile(ptr->logfile), - atomKK(ptr->atomKK) {} + atomKK(ptr->atomKK), + python(ptr->python) {} virtual ~Pointers() {} protected: @@ -83,6 +84,7 @@ class Pointers { FILE *&logfile; class AtomKokkos *&atomKK; + class Python *&python; }; } diff --git a/src/python.cpp b/src/python.cpp new file mode 100644 index 0000000000..e32e2a161c --- /dev/null +++ b/src/python.cpp @@ -0,0 +1,115 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "python.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +Python::Python(LAMMPS *lmp) : Pointers(lmp) +{ + // implementation of Python interface is only loaded on demand + // and only if PYTHON package has been installed and compiled into binary + impl = NULL; +} + +/* ---------------------------------------------------------------------- */ + +Python::~Python() +{ + delete impl; +} + +/* ---------------------------------------------------------------------- */ + +PythonInterface::~PythonInterface() +{ +} + +/* ---------------------------------------------------------------------- */ + +void Python::init() +{ +#if LMP_PYTHON + if (!impl) impl = new PythonImpl(lmp); +#else + error->all(FLERR,"Python support missing! Compile with PYTHON package installed!"); +#endif +} + +/* ---------------------------------------------------------------------- */ +bool Python::is_enabled() const { +#if LMP_PYTHON + return true; +#else + return false; +#endif +} + +/* ---------------------------------------------------------------------- */ + +void Python::command(int narg, char **arg) +{ + init(); + impl->command(narg, arg); +} + +/* ------------------------------------------------------------------ */ + +void Python::invoke_function(int ifunc, char *result) +{ + init(); + impl->invoke_function(ifunc, result); +} + +/* ------------------------------------------------------------------ */ + +int Python::find(char *name) +{ + init(); + return impl->find(name); +} + +/* ------------------------------------------------------------------ */ + +int Python::variable_match(char *name, char *varname, int numeric) +{ + init(); + return impl->variable_match(name, varname, numeric); +} + +/* ------------------------------------------------------------------ */ + +char * Python::long_string(int ifunc) +{ + init(); + return impl->long_string(ifunc); +} + +/* ------------------------------------------------------------------ */ + +int Python::execute_string(char *cmd) +{ + init(); + return impl->execute_string(cmd); +} + +/* ------------------------------------------------------------------ */ + +int Python::execute_file(char *fname) +{ + init(); + return impl->execute_file(fname); +} diff --git a/src/PYTHON/python.h b/src/python.h similarity index 82% rename from src/PYTHON/python.h rename to src/python.h index 5f65e3970b..190fd6ddb6 100644 --- a/src/PYTHON/python.h +++ b/src/python.h @@ -18,49 +18,46 @@ namespace LAMMPS_NS { -class Python : protected Pointers { - public: - int python_exists; +class PythonInterface { +public: + virtual ~PythonInterface(); + virtual void command(int, char **) = 0; + virtual void invoke_function(int, char *) = 0; + virtual int find(char *) = 0; + virtual int variable_match(char *, char *, int) = 0; + virtual char * long_string(int ifunc) = 0; + virtual int execute_string(char *) = 0; + virtual int execute_file(char *) = 0; +}; +class Python : protected Pointers { +public: Python(class LAMMPS *); ~Python(); + void command(int, char **); void invoke_function(int, char *); int find(char *); int variable_match(char *, char *, int); - char *long_string(int); + char * long_string(int ifunc); + int execute_string(char *); + int execute_file(char *); - private: - int ninput,noutput,length_longstr; - char **istr; - char *ostr,*format; - void *pyMain; + bool is_enabled() const; + void init(); - struct PyFunc { - char *name; - int ninput,noutput; - int *itype,*ivarflag; - int *ivalue; - double *dvalue; - char **svalue; - int otype; - char *ovarname; - char *longstr; - int length_longstr; - void *pFunc; - }; - - PyFunc *pfuncs; - int nfunc; - - int create_entry(char *); - void deallocate(int); +private: + PythonInterface * impl; }; } #endif +#if LMP_PYTHON +#include "python_impl.h" +#endif + /* ERROR/WARNING messages: E: Invalid python command diff --git a/src/respa.cpp b/src/respa.cpp index 7646115fa9..5d51ff64ee 100644 --- a/src/respa.cpp +++ b/src/respa.cpp @@ -398,24 +398,27 @@ void Respa::init() setup before run ------------------------------------------------------------------------- */ -void Respa::setup() +void Respa::setup(int flag) { if (comm->me == 0 && screen) { fprintf(screen,"Setting up r-RESPA run ...\n"); - fprintf(screen," Unit style : %s\n", update->unit_style); - fprintf(screen," Current step : " BIGINT_FORMAT "\n", update->ntimestep); - fprintf(screen," Time steps :"); - for (int ilevel=0; ilevel < nlevels; ++ilevel) - fprintf(screen," %d:%g",ilevel+1, step[ilevel]); - fprintf(screen,"\n r-RESPA fixes :"); - for (int l=0; l < modify->n_post_force_respa; ++l) { - Fix *f = modify->fix[modify->list_post_force_respa[l]]; - if (f->respa_level >= 0) - fprintf(screen," %d:%s[%s]", - MIN(f->respa_level+1,nlevels),f->style,f->id); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + fprintf(screen," Time steps :"); + for (int ilevel=0; ilevel < nlevels; ++ilevel) + fprintf(screen," %d:%g",ilevel+1, step[ilevel]); + fprintf(screen,"\n r-RESPA fixes :"); + for (int l=0; l < modify->n_post_force_respa; ++l) { + Fix *f = modify->fix[modify->list_post_force_respa[l]]; + if (f->respa_level >= 0) + fprintf(screen," %d:%s[%s]", + MIN(f->respa_level+1,nlevels),f->style,f->id); + } + fprintf(screen,"\n"); + timer->print_timeout(screen); } - fprintf(screen,"\n"); - timer->print_timeout(screen); } update->setupflag = 1; @@ -482,7 +485,7 @@ void Respa::setup() sum_flevel_f(); modify->setup(vflag); - output->setup(); + output->setup(flag); update->setupflag = 0; } diff --git a/src/respa.h b/src/respa.h index 3355cd2eb7..0b08b12bd7 100644 --- a/src/respa.h +++ b/src/respa.h @@ -48,7 +48,7 @@ class Respa : public Integrate { Respa(class LAMMPS *, int, char **); virtual ~Respa(); virtual void init(); - virtual void setup(); + virtual void setup(int flag=1); virtual void setup_minimal(int); virtual void run(int); virtual void cleanup(); diff --git a/src/thermo.cpp b/src/thermo.cpp index dbbeff4998..18deecb1a8 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1145,6 +1145,22 @@ int Thermo::evaluate_keyword(char *word, double *answer) compute_atoms(); dvalue = bivalue; + } else if (strcmp(word,"bonds") == 0) { + compute_bonds(); + dvalue = bivalue; + + } else if (strcmp(word,"angles") == 0) { + compute_angles(); + dvalue = bivalue; + + } else if (strcmp(word,"dihedrals") == 0) { + compute_dihedrals(); + dvalue = bivalue; + + } else if (strcmp(word,"impropers") == 0) { + compute_impropers(); + dvalue = bivalue; + } else if (strcmp(word,"temp") == 0) { if (!temperature) error->all(FLERR,"Thermo keyword in variable requires " @@ -1369,11 +1385,6 @@ int Thermo::evaluate_keyword(char *word, double *answer) else if (strcmp(word,"ylat") == 0) compute_ylat(); else if (strcmp(word,"zlat") == 0) compute_zlat(); - else if (strcmp(word,"bonds") == 0) compute_bonds(); - else if (strcmp(word,"angles") == 0) compute_angles(); - else if (strcmp(word,"dihedrals") == 0) compute_dihedrals(); - else if (strcmp(word,"impropers") == 0) compute_impropers(); - else if (strcmp(word,"pxx") == 0) { if (!pressure) error->all(FLERR,"Thermo keyword in variable requires " diff --git a/src/variable.cpp b/src/variable.cpp index 3eea50a463..6e16597c63 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -34,7 +34,7 @@ #include "random_mars.h" #include "math_const.h" #include "atom_masks.h" -#include "python_wrapper.h" +#include "python.h" #include "memory.h" #include "info.h" #include "error.h" @@ -111,10 +111,6 @@ Variable::Variable(LAMMPS *lmp) : Pointers(lmp) precedence[MULTIPLY] = precedence[DIVIDE] = precedence[MODULO] = 6; precedence[CARAT] = 7; precedence[UNARY] = precedence[NOT] = 8; - - // Python wrapper, real or dummy - - python = new Python(lmp); } /* ---------------------------------------------------------------------- */ @@ -144,7 +140,6 @@ Variable::~Variable() delete randomequal; delete randomatom; - delete python; } /* ---------------------------------------------------------------------- @@ -464,7 +459,7 @@ void Variable::set(int narg, char **arg) } else if (strcmp(arg[1],"python") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); - if (!python->python_exists) + if (!python->is_enabled()) error->all(FLERR,"LAMMPS is not built with Python embedded"); int ivar = find(arg[0]); if (ivar >= 0) { @@ -735,7 +730,7 @@ void Variable::set_arrays(int i) void Variable::python_command(int narg, char **arg) { - if (!python->python_exists) + if (!python->is_enabled()) error->all(FLERR,"LAMMPS is not built with Python embedded"); python->command(narg,arg); } diff --git a/src/variable.h b/src/variable.h index 76607e96b4..886dd7b422 100644 --- a/src/variable.h +++ b/src/variable.h @@ -78,8 +78,6 @@ class Variable : protected Pointers { int precedence[18]; // precedence level of math operators // set length to include up to XOR in enum - class Python *python; // ptr to embedded Python interpreter - struct Tree { // parse tree for atom-style or vector-style vars double value; // single scalar double *array; // per-atom or per-type list of doubles diff --git a/src/verlet.cpp b/src/verlet.cpp index 915648040e..b242b00722 100644 --- a/src/verlet.cpp +++ b/src/verlet.cpp @@ -85,14 +85,17 @@ void Verlet::init() setup before run ------------------------------------------------------------------------- */ -void Verlet::setup() +void Verlet::setup(int flag) { if (comm->me == 0 && screen) { fprintf(screen,"Setting up Verlet run ...\n"); - fprintf(screen," Unit style : %s\n", update->unit_style); - fprintf(screen," Current step : " BIGINT_FORMAT "\n", update->ntimestep); - fprintf(screen," Time step : %g\n", update->dt); - timer->print_timeout(screen); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + fprintf(screen," Time step : %g\n", update->dt); + timer->print_timeout(screen); + } } if (lmp->kokkos) @@ -148,7 +151,7 @@ void Verlet::setup() if (force->newton) comm->reverse_comm(); modify->setup(vflag); - output->setup(); + output->setup(flag); update->setupflag = 0; } diff --git a/src/verlet.h b/src/verlet.h index 0e2a333fab..29bd3f16b3 100644 --- a/src/verlet.h +++ b/src/verlet.h @@ -29,7 +29,7 @@ class Verlet : public Integrate { Verlet(class LAMMPS *, int, char **); virtual ~Verlet() {} virtual void init(); - virtual void setup(); + virtual void setup(int flag=1); virtual void setup_minimal(int); virtual void run(int); void cleanup(); diff --git a/src/version.h b/src/version.h index 0d5dc11b4a..dc0ebe76b8 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "31 Mar 2017" +#define LAMMPS_VERSION "19 May 2017" diff --git a/src/write_restart.cpp b/src/write_restart.cpp index f1ee4a4472..77e2cb05d9 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -297,6 +297,9 @@ void WriteRestart::write(char *file) // communication buffer for my atom info // max_size = largest buffer needed by any proc + // NOTE: are assuming size_restart() returns 32-bit int + // for a huge one-proc problem, nlocal could be 32-bit + // but nlocal * doubles-peratom could oveflow int max_size; int send_size = atom->avec->size_restart(); diff --git a/tools/msi2lmp/README b/tools/msi2lmp/README index a20f6e893f..db9b1aca5e 100644 --- a/tools/msi2lmp/README +++ b/tools/msi2lmp/README @@ -1,98 +1,50 @@ -Axel Kohlmeyer is the current maintainer of the msi2lmp tool. -Please send any inquiries about msi2lmp to the lammps-users mailing list. -06 Oct 2016 Axel Kohlmeyer + msi2lmp.exe -Improved whitespace handling in parsing topology and force field -files to avoid bogus warnings about type name truncation. +This code has several known limitations listed below under "LIMITATIONS" +(and possibly some unknown ones, too) and is no longer under active +development. Only the occasional bugfix is applied. -24 Oct 2015 Axel Kohlmeyer +Please send any inquiries about msi2lmp to the lammps-users +mailing list and not to individual people. -Added check to make certain that force field files -are consistent with the notation of non-bonded parameters -that the msi2lmp code expects. For Class 1 and OPLS-AA -the A-B notation with geometric mixing is expected and for -Class 2 the r-eps notation with sixthpower mixing. +------------------------------------------------------------------------ -11 Sep 2014 Axel Kohlmeyer +OVERVIEW -Refactored ReadMdfFile.c so it more consistently honors -the MAX_NAME and MAX_STRING string length defines and -potentially handles inputs with long names better. +This is the third version of a program that generates a LAMMPS data file +based on the information in MSI .car (atom coordinates), .mdf (molecular +topology) and .frc (forcefield) files. The .car and .mdf files are +specific to a molecular system while the .frc file is specific to a +forcefield version. The only coherency needed between .frc and +.car/.mdf files are the atom types. -27 May 2014 Axel Kohlmeyer +The first version was written by Steve Lustig at Dupont, but required +using Discover to derive internal coordinates and forcefield parameters -Added TopoTools style type hints as comments to all Mass, PairCoeff, -BondCoeff, AngleCoeff, DihedralCoeff, ImproperCoeff entries. -This should make it easier to identify force field entries with -the structure and force field map in the data file later. +The second version was written by Michael Peachey while an intern in the +Cray Chemistry Applications Group managed by John Carpenter. This +version derived internal coordinates from the mdf file and looked up +parameters in the frc file thus eliminating the need for Discover. -06 Mar 2014 Axel Kohlmeyer +The third version was written by John Carpenter to optimize the +performance of the program for large molecular systems (the original +code for deriving atom numbers was quadratic in time) and to make the +program fully dynamic. The second version used fixed dimension arrays +for the internal coordinates. -Fixed a bug in handling of triclinic cells, where the matrices to -convert to and from fractional coordinates were incorrectly built. +The third version was revised in Fall 2011 by Stephanie Teich-McGoldrick +to add support non-orthogonal cells. -26 Oct 2013 Axel Kohlmeyer +The next revision was started in Summer/Fall 2013 by Axel Kohlmeyer to +improve portability to Windows compilers, clean up command line parsing +and improve compatibility with the then current LAMMPS versions. This +revision removes compatibility with the obsolete LAMMPS version written +in Fortran 90. -Implemented writing out force field style hints in generated data -files for improved consistency checking when reading those files. -Also added writing out CGCMM style comments to identify atom types. +INSTALLATION & USAGE -08 Oct 2013 Axel Kohlmeyer - -Fixed a memory access violation with Class 2 force fields. -Free all allocated memory to better detection of memory errors. -Print out version number and data with all print levels > 0. -Added valgrind checks to the regression tests - -08 Oct 2013 Axel Kohlmeyer - -Fixed a memory access violation with Class 2 force fields. -Free all allocated memory to better detection of memory errors. -Print out version number and data with all print levels > 0. -Added valgrind checks to the regression tests - -02 Aug 2013 Axel Kohlmeyer - -Added rudimentary support for OPLS-AA based on -input provided by jeff greathouse. - -18 Jul 2013 Axel Kohlmeyer - -Added support for writing out image flags -Improved accuracy of atom masses -Added flag for shifting the entire system -Fixed some minor logic bugs and prepared -for supporting other force fields and morse style bonds. - -12 Jul 2013 Axel Kohlmeyer - -Fixed the bug that caused improper coefficients to be wrong -Cleaned up the handling of box parameters and center the box -by default around the system/molecule. Added a flag to make -this step optional and center the box around the origin instead. -Added a regression test script with examples. - -1 Jul 2013 Axel Kohlmeyer - -Cleanup and improved port to windows. -Removed some more static string limits. -Added print level 3 for additional output. -Make code stop at missing force field parameters -and added -i flag to override this. -Safer argument checking. -Provide short versions for all flags. - -23 Sep 2011 - -added support for triclinic boxes -see msi2lmp/TriclinicModification.pdf doc for details - ------------------------------ - - msi2lmp V3.6 4/10/2005 - - This program uses the .car and .mdf files from MSI/Biosyms's INSIGHT +This program uses the .car and .mdf files from MSI/Biosyms's INSIGHT program to produce a LAMMPS data file. 1. Building msi2lmp @@ -178,50 +130,111 @@ see msi2lmp/TriclinicModification.pdf doc for details -- the LAMMPS data file is written to .data protocol and error information is written to the screen. -**************************************************************** -* -* msi2lmp -* -* This is the third version of a program that generates a LAMMPS -* data file based on the information in MSI .car (atom -* coordinates), .mdf (molecular topology) and .frc (forcefield) -* files. The .car and .mdf files are specific to a molecular -* system while the .frc file is specific to a forcefield version. -* The only coherency needed between .frc and .car/.mdf files are -* the atom types. -* -* The first version was written by Steve Lustig at Dupont, but -* required using Discover to derive internal coordinates and -* forcefield parameters -* -* The second version was written by Michael Peachey while an -* intern in the Cray Chemistry Applications Group managed -* by John Carpenter. This version derived internal coordinates -* from the mdf file and looked up parameters in the frc file -* thus eliminating the need for Discover. -* -* The third version was written by John Carpenter to optimize -* the performance of the program for large molecular systems -* (the original code for deriving atom numbers was quadratic in time) -* and to make the program fully dynamic. The second version used -* fixed dimension arrays for the internal coordinates. -* -* The current maintainer is only reluctantly doing so because John Mayo no longer -* needs this code. -* -* V3.2 corresponds to adding code to MakeLists.c to gracefully deal with -* systems that may only be molecules of 1 to 3 atoms. In V3.1, the values -* for number_of_dihedrals, etc. could be unpredictable in these systems. -* -* V3.3 was generated in response to a strange error reading a MDF file generated by -* Accelys' Materials Studio GUI. Simply rewriting the input part of ReadMdfFile.c -* seems to have fixed the problem. -* -* V3.4 and V3.5 are minor upgrades to fix bugs associated mostly with .car and .mdf files -* written by Accelys' Materials Studio GUI. -* -* V3.6 outputs to LAMMPS 2005 (C++ version). -* -* Contact: Kelly L. Anderson, kelly.anderson@cantab.net -* -* April 2005 +------------------------------------------------------------------------ + +LIMITATIONS + +msi2lmp has the following known limitations: + +- there is no support to select morse bonds over harmonic bonds +- there is no support for auto-equivalences to supplement fully + parameterized interactions with heuristic ones +- there is no support for bond increments + +------------------------------------------------------------------------ + +CHANGELOG + +06 Oct 2016 Axel Kohlmeyer + +Improved whitespace handling in parsing topology and force field +files to avoid bogus warnings about type name truncation. + +24 Oct 2015 Axel Kohlmeyer + +Added check to make certain that force field files are consistent with +the notation of non-bonded parameters that the msi2lmp code expects. +For Class 1 and OPLS-AA the A-B notation with geometric mixing is +expected and for Class 2 the r-eps notation with sixthpower mixing. + +11 Sep 2014 Axel Kohlmeyer + +Refactored ReadMdfFile.c so it more consistently honors the MAX_NAME +and MAX_STRING string length defines and potentially handles inputs +with long names better. + +27 May 2014 Axel Kohlmeyer + +Added TopoTools style type hints as comments to all Mass, PairCoeff, +BondCoeff, AngleCoeff, DihedralCoeff, ImproperCoeff entries. +This should make it easier to identify force field entries with +the structure and force field map in the data file later. + +06 Mar 2014 Axel Kohlmeyer + +Fixed a bug in handling of triclinic cells, where the matrices to +convert to and from fractional coordinates were incorrectly built. + +26 Oct 2013 Axel Kohlmeyer + +Implemented writing out force field style hints in generated data +files for improved consistency checking when reading those files. +Also added writing out CGCMM style comments to identify atom types. + +08 Oct 2013 Axel Kohlmeyer + +Fixed a memory access violation with Class 2 force fields. Free all +allocated memory to better detection of memory errors. Print out +version number and data with all print levels > 0. Added valgrind +checks to the regression tests. + +02 Aug 2013 Axel Kohlmeyer + +Added rudimentary support for OPLS-AA based on input provided +by jeff greathouse. + +18 Jul 2013 Axel Kohlmeyer + +Added support for writing out image flags. Improved accuracy of atom +masses. Added flag for shifting the entire system. Fixed some minor +logic bugs and prepared for supporting other force fields and morse +style bonds. + +12 Jul 2013 Axel Kohlmeyer + +Fixed the bug that caused improper coefficients to be wrong. Cleaned up +the handling of box parameters and center the box by default around the +system/molecule. Added a flag to make this step optional and center the +box around the origin instead. Added a regression test script with +examples. + +1 Jul 2013 Axel Kohlmeyer + +Cleanup and improved port to windows. Removed some more static string +limits. Added print level 3 for additional output. Make code stop at +missing force field parameters and added -i flag to override this. +Safer argument checking. Provide short versions for all flags. + +23 Sep 2011 + +added support for triclinic boxes + +V3.6 outputs to LAMMPS 2005 (C++ version). + +Contact: Kelly L. Anderson, kelly.anderson@cantab.net + +V3.4 and V3.5 are minor upgrades to fix bugs associated mostly with .car + and .mdf files written by Accelys' Materials Studio GUI. April 2005 + +V3.3 was generated in response to a strange error reading a MDF file +generated by Accelys' Materials Studio GUI. Simply rewriting the input +part of ReadMdfFile.c seems to have fixed the problem. + +V3.2 corresponds to adding code to MakeLists.c to gracefully deal with +systems that may only be molecules of 1 to 3 atoms. In V3.1, the values +for number_of_dihedrals, etc. could be unpredictable in these systems. + +----------------------------- + + msi2lmp v3.9.8 6/10/2016 + diff --git a/tools/msi2lmp/src/GetParameters.c b/tools/msi2lmp/src/GetParameters.c index e183c529e0..192b4d296c 100644 --- a/tools/msi2lmp/src/GetParameters.c +++ b/tools/msi2lmp/src/GetParameters.c @@ -136,7 +136,7 @@ void GetParameters() if (forcefield & (FF_TYPE_CLASS1|FF_TYPE_OPLSAA)) { bondtypes[i].params[0] = ff_bond.data[k].ff_param[1]; bondtypes[i].params[1] = ff_bond.data[k].ff_param[0]; - } + } if (forcefield & FF_TYPE_CLASS2) { for (j=0; j < 4; j++) diff --git a/tools/msi2lmp/src/InitializeItems.c b/tools/msi2lmp/src/InitializeItems.c index 4df9fd0f10..1e33636913 100644 --- a/tools/msi2lmp/src/InitializeItems.c +++ b/tools/msi2lmp/src/InitializeItems.c @@ -68,7 +68,7 @@ void InitializeItems(void) if (forcefield & (FF_TYPE_CLASS1|FF_TYPE_OPLSAA)) { strcpy(ff_tor.keyword,"#torsion_1"); ff_tor.number_of_parameters = 3; - } + } if (forcefield & FF_TYPE_CLASS2) { strcpy(ff_tor.keyword,"#torsion_3"); diff --git a/tools/msi2lmp/src/WriteDataFile.c b/tools/msi2lmp/src/WriteDataFile.c index 498978406f..c03eba71c5 100644 --- a/tools/msi2lmp/src/WriteDataFile.c +++ b/tools/msi2lmp/src/WriteDataFile.c @@ -144,7 +144,7 @@ void WriteDataFile(char *nameroot) else if (forcefield & FF_TYPE_CLASS2) fputs(" # class2\n\n",DatF); } else fputs("\n\n",DatF); - + for (i=0; i < no_angle_types; i++) { fprintf(DatF, " %3i", i+1); for ( j = 0; j < m; j++) diff --git a/tools/msi2lmp/src/msi2lmp.c b/tools/msi2lmp/src/msi2lmp.c index c94d4b4d73..15cfddd258 100644 --- a/tools/msi2lmp/src/msi2lmp.c +++ b/tools/msi2lmp/src/msi2lmp.c @@ -142,9 +142,6 @@ * and to make the program fully dynamic. The second version used * fixed dimension arrays for the internal coordinates. * -* John Carpenter can be contacted by sending email to -* jec374@earthlink.net -* * November 2000 */ @@ -356,7 +353,7 @@ int main (int argc, char *argv[]) if (centerflag) puts(" Output is recentered around geometrical center"); if (hintflag) puts(" Output contains style flag hints"); else puts(" Style flag hints disabled"); - printf(" System translated by: %g %g %g\n",shift[0],shift[1],shift[2]); + printf(" System translated by: %g %g %g\n",shift[0],shift[1],shift[2]); } n = 0; @@ -374,7 +371,7 @@ int main (int argc, char *argv[]) if (n == 0) { if (iflag > 0) fputs(" WARNING",stderr); else fputs(" Error ",stderr); - + fputs("- forcefield name and class appear to be inconsistent\n\n",stderr); if (iflag == 0) return 7; } diff --git a/tools/msi2lmp/src/msi2lmp.h b/tools/msi2lmp/src/msi2lmp.h index 377ab1a6c3..4716f719d6 100644 --- a/tools/msi2lmp/src/msi2lmp.h +++ b/tools/msi2lmp/src/msi2lmp.h @@ -24,13 +24,13 @@ * and to make the program fully dynamic. The second version used * fixed dimension arrays for the internal coordinates. * -* The thrid version was revised in Fall 2011 by +* The third version was revised in Fall 2011 by * Stephanie Teich-McGoldrick to add support non-orthogonal cells. * * The next revision was started in Summer/Fall 2013 by * Axel Kohlmeyer to improve portability to Windows compilers, * clean up command line parsing and improve compatibility with -* the then current LAMMPS versions. This revision removes +* the then current LAMMPS versions. This revision removes * compatibility with the obsolete LAMMPS version written in Fortran 90. */