diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 0e5bf70a41..cc1e051629 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -17,6 +17,8 @@ if(PKG_KOKKOS) ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp diff --git a/doc/src/Commands_bond.txt b/doc/src/Commands_bond.txt index 40c2d0283a..aecbf4cca0 100644 --- a/doc/src/Commands_bond.txt +++ b/doc/src/Commands_bond.txt @@ -108,7 +108,7 @@ OPT. "class2 (ko)"_dihedral_class2.html, "cosine/shift/exp (o)"_dihedral_cosine_shift_exp.html, "fourier (io)"_dihedral_fourier.html, -"harmonic (io)"_dihedral_harmonic.html, +"harmonic (iko)"_dihedral_harmonic.html, "helix (o)"_dihedral_helix.html, "multi/harmonic (o)"_dihedral_multi_harmonic.html, "nharmonic (o)"_dihedral_nharmonic.html, diff --git a/doc/src/Run_options.txt b/doc/src/Run_options.txt index 87b272a866..df1a57421a 100644 --- a/doc/src/Run_options.txt +++ b/doc/src/Run_options.txt @@ -126,9 +126,10 @@ are intended for computational work like running LAMMPS. By default Ng = 1 and Ns is not set. Depending on which flavor of MPI you are running, LAMMPS will look for -one of these 3 environment variables +one of these 4 environment variables SLURM_LOCALID (various MPI variants compiled with SLURM support) +MPT_LRANK (HPE MPI) MV2_COMM_WORLD_LOCAL_RANK (Mvapich) OMPI_COMM_WORLD_LOCAL_RANK (OpenMPI) :pre diff --git a/doc/src/comm_modify.txt b/doc/src/comm_modify.txt index 5f03636c1d..9a792f6cc6 100644 --- a/doc/src/comm_modify.txt +++ b/doc/src/comm_modify.txt @@ -40,11 +40,12 @@ coordinates and other properties are exchanged between neighboring processors and stored as properties of ghost atoms. NOTE: These options apply to the currently defined comm style. When -you specify a "comm_style"_comm_style.html command, all communication -settings are restored to their default values, including those +you specify a "comm_style"_comm_style.html or +"read_restart"_read_restart.html command, all communication settings +are restored to their default or stored values, including those previously reset by a comm_modify command. Thus if your input script -specifies a comm_style command, you should use the comm_modify command -after it. +specifies a comm_style or read_restart command, you should use the +comm_modify command after it. The {mode} keyword determines whether a single or multiple cutoff distances are used to determine which atoms to communicate. diff --git a/doc/src/compute_bond_local.txt b/doc/src/compute_bond_local.txt index 6055d28770..889741753a 100644 --- a/doc/src/compute_bond_local.txt +++ b/doc/src/compute_bond_local.txt @@ -15,10 +15,11 @@ compute ID group-ID bond/local value1 value2 ... keyword args ... :pre ID, group-ID are documented in "compute"_compute.html command :ulb,l bond/local = style name of this compute command :l one or more values may be appended :l -value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l +value = {dist} or {engpot} or {force} or {fx} or {fy} or {fz} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l {dist} = bond distance {engpot} = bond potential energy {force} = bond force :pre + {fx},{fy},{fz} = components of bond force {engvib} = bond kinetic energy of vibration {engrot} = bond kinetic energy of rotation {engtrans} = bond kinetic energy of translation @@ -38,6 +39,7 @@ keyword = {set} :l compute 1 all bond/local engpot compute 1 all bond/local dist engpot force :pre +compute 1 all bond/local dist fx fy fz :pre compute 1 all angle/local dist v_distsq set dist d :pre [Description:] @@ -59,6 +61,9 @@ based on the current separation of the pair of atoms in the bond. The value {force} is the magnitude of the force acting between the pair of atoms in the bond. +The values {fx}, {fy}, and {fz} are the xyz components of +{force} between the pair of atoms in the bond. + The remaining properties are all computed for motion of the two atoms relative to the center of mass (COM) velocity of the 2 atoms in the bond. diff --git a/doc/src/compute_orientorder_atom.txt b/doc/src/compute_orientorder_atom.txt index 7327a7b1d3..da14c866bd 100644 --- a/doc/src/compute_orientorder_atom.txt +++ b/doc/src/compute_orientorder_atom.txt @@ -19,6 +19,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components} {cutoff} value = distance cutoff {nnn} value = number of nearest neighbors {degrees} values = nlvalues, l1, l2,... + {wl} value = yes or no + {wl/hat} value = yes or no {components} value = ldegree :pre :ule @@ -27,7 +29,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components} compute 1 all orientorder/atom compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5 -compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre +compute 1 all orientorder/atom wl/hat yes +compute 1 all orientorder/atom components 6 :pre [Description:] @@ -48,7 +51,7 @@ neighbors of the central atom. The angles theta and phi are the standard spherical polar angles defining the direction of the bond vector {rij}. The second equation defines {Ql}, which is a -rotationally invariant scalar quantity obtained by summing +rotationally invariant non-negative amplitude obtained by summing over all the components of degree {l}. The optional keyword {cutoff} defines the distance cutoff @@ -63,7 +66,7 @@ specified distance cutoff are used. The optional keyword {degrees} defines the list of order parameters to be computed. The first argument {nlvalues} is the number of order -parameters. This is followed by that number of integers giving the +parameters. This is followed by that number of non-negative integers giving the degree of each order parameter. Because {Q}2 and all odd-degree order parameters are zero for atoms in cubic crystals (see "Steinhardt"_#Steinhardt), the default order parameters are {Q}4, @@ -71,7 +74,20 @@ parameters are zero for atoms in cubic crystals (see = sqrt(7/3)/8 = 0.19094.... The numerical values of all order parameters up to {Q}12 for a range of commonly encountered high-symmetry structures are given in Table I of "Mickel et -al."_#Mickel. +al."_#Mickel, and these can be reproduced with this compute + +The optional keyword {wl} will output the third-order invariants {Wl} +(see Eq. 1.4 in "Steinhardt"_#Steinhardt) for the same degrees as +for the {Ql} parameters. For the FCC crystal with {nnn} =12, +{W}4 = -sqrt(14/143).(49/4096)/Pi^1.5 = -0.0006722136... + +The optional keyword {wl/hat} will output the normalized third-order +invariants {Wlhat} (see Eq. 2.2 in "Steinhardt"_#Steinhardt) +for the same degrees as for the {Ql} parameters. For the FCC crystal +with {nnn} =12, {W}4hat = -7/3*sqrt(2/429) = -0.159317...The numerical +values of {Wlhat} for a range of commonly encountered high-symmetry +structures are given in Table I of "Steinhardt"_#Steinhardt, and these +can be reproduced with this keyword. The optional keyword {components} will output the components of the normalized complex vector {Ybar_lm} of degree {ldegree}, which must be @@ -82,7 +98,7 @@ particles, as discussed in "ten Wolde"_#tenWolde2. The value of {Ql} is set to zero for atoms not in the specified compute group, as well as for atoms that have less than -{nnn} neighbors within the distance cutoff. +{nnn} neighbors within the distance cutoff, unless {nnn} is NULL. The neighbor list needed to compute this quantity is constructed each time the calculation is performed (i.e. each time a snapshot of atoms @@ -108,6 +124,12 @@ This compute calculates a per-atom array with {nlvalues} columns, giving the {Ql} values for each atom, which are real numbers on the range 0 <= {Ql} <= 1. +If the keyword {wl} is set to yes, then the {Wl} values for each +atom will be added to the output array, which are real numbers. + +If the keyword {wl/hat} is set to yes, then the {Wl_hat} +values for each atom will be added to the output array, which are real numbers. + If the keyword {components} is set, then the real and imaginary parts of each component of (normalized) {Ybar_lm} will be added to the output array in the following order: Re({Ybar_-m}) Im({Ybar_-m}) @@ -130,7 +152,8 @@ hexorder/atom"_compute_hexorder_atom.html [Default:] The option defaults are {cutoff} = pair style cutoff, {nnn} = 12, -{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12. +{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12, +{wl} = no, {wl/hat} = no, and {components} off :line diff --git a/doc/src/dihedral_harmonic.txt b/doc/src/dihedral_harmonic.txt index 27bc04f9df..b4071c863c 100644 --- a/doc/src/dihedral_harmonic.txt +++ b/doc/src/dihedral_harmonic.txt @@ -8,6 +8,7 @@ dihedral_style harmonic command :h3 dihedral_style harmonic/intel command :h3 +dihedral_style harmonic/kk command :h3 dihedral_style harmonic/omp command :h3 [Syntax:] diff --git a/doc/src/min_style.txt b/doc/src/min_style.txt index c46c1492b4..e27682cf97 100644 --- a/doc/src/min_style.txt +++ b/doc/src/min_style.txt @@ -11,7 +11,7 @@ min_style command :h3 min_style style :pre -style = {cg} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul +style = {cg} or {cg/kk} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul [Examples:] @@ -74,9 +74,34 @@ defined via the "timestep"_timestep.html command. Often they will converge more quickly if you use a timestep about 10x larger than you would normally use for dynamics simulations. -NOTE: The {quickmin}, {fire}, and {hftn} styles do not yet support the -use of the "fix box/relax"_fix_box_relax.html command or minimizations -involving the electron radius in "eFF"_pair_eff.html models. +NOTE: The {quickmin}, {fire}, {hftn}, and {cg/kk} styles do not yet +support the use of the "fix box/relax"_fix_box_relax.html command or +minimizations involving the electron radius in "eFF"_pair_eff.html +models. + +:line + +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed on the "Speed packages"_Speed_packages.html doc +page. The accelerated styles take the same arguments and should +produce the same results, except for round-off and precision issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Build +package"_Build_package.html doc page for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Run_options.html when you invoke LAMMPS, or you can use the +"suffix"_suffix.html command in your input script. + +See the "Speed packages"_Speed_packages.html doc page for more +instructions on how to use the accelerated styles effectively. + +:line [Restrictions:] none diff --git a/doc/src/minimize.txt b/doc/src/minimize.txt index ecf1ad0fcf..42315705e5 100644 --- a/doc/src/minimize.txt +++ b/doc/src/minimize.txt @@ -7,6 +7,7 @@ :line minimize command :h3 +minimize/kk command :h3 [Syntax:] @@ -256,6 +257,28 @@ info in the Restrictions section below. :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed on the "Speed packages"_Speed_packages.html doc +page. The accelerated styles take the same arguments and should +produce the same results, except for round-off and precision issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Build +package"_Build_package.html doc page for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Run_options.html when you invoke LAMMPS, or you can use the +"suffix"_suffix.html command in your input script. + +See the "Speed packages"_Speed_packages.html doc page for more +instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] Features that are not yet implemented are listed here, in case someone diff --git a/examples/steinhardt/in.bcc b/examples/steinhardt/in.bcc new file mode 100644 index 0000000000..a0348fdcd9 --- /dev/null +++ b/examples/steinhardt/in.bcc @@ -0,0 +1,91 @@ +# Steinhardt-Nelson bond orientational order parameters for BCC + +variable rcut equal 3.0 + +boundary p p p + +atom_style atomic +neighbor 0.3 bin +neigh_modify delay 5 + +# create geometry + +lattice bcc 1.0 +region box block 0 3 0 3 0 3 +create_box 1 box +create_atoms 1 box + +mass 1 1.0 + +# LJ potentials + +pair_style lj/cut ${rcut} +pair_coeff * * 1.0 1.0 ${rcut} + +# 14 neighbors, perfect crystal + +compute qlwlhat all orientorder/atom degrees 6 2 4 6 8 10 12 nnn 14 wl/hat yes +compute avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] c_qlwlhat[6] +compute avwlhat all reduce ave c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] c_qlwlhat[11] c_qlwlhat[12] + +thermo_style custom step temp epair etotal c_avql[*] c_avwlhat[*] + +run 0 + +# check Q_l values + +print " " +print "*******************************************************************" +print " " +print "Comparison with reference values of Q_l " +print " [Table I in W. Mickel, S. C. Kapfer," +print " G. E. Schroeder-Turkand, K. Mecke, " +print " J. Chem. Phys. 138, 044501 (2013).]" +print " " + +variable q2ref equal 0.0 +variable q4ref equal 0.036 +variable q6ref equal 0.511 +variable q8ref equal 0.429 +variable q10ref equal 0.195 +variable q12ref equal 0.405 + +variable q2 equal c_avql[1] +variable q4 equal c_avql[2] +variable q6 equal c_avql[3] +variable q8 equal c_avql[4] +variable q10 equal c_avql[5] +variable q12 equal c_avql[6] + +print "q2 = $(v_q2:%10.6f) delta = $(v_q2-v_q2ref:%10.4f)" +print "q4 = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)" +print "q6 = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)" +print "q8 = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)" +print "q10 = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)" +print "q12 = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)" + +# check W_l_hat values + +print " " +print "Comparison with reference values of W_l_hat" +print " [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, " +print " Phys. Rev. B 28, 784 (1983).]" +print " " + +variable w4hatref equal 0.159317 +variable w6hatref equal 0.013161 +variable w8hatref equal -0.058455 +variable w10hatref equal -0.090130 + +variable w4hat equal c_avwlhat[2] +variable w6hat equal c_avwlhat[3] +variable w8hat equal c_avwlhat[4] +variable w10hat equal c_avwlhat[5] + +print "w4hat = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)" +print "w6hat = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)" +print "w8hat = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)" +print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)" +print " " +print "*******************************************************************" +print " " diff --git a/examples/steinhardt/in.fcc b/examples/steinhardt/in.fcc new file mode 100644 index 0000000000..6d2775d0bb --- /dev/null +++ b/examples/steinhardt/in.fcc @@ -0,0 +1,88 @@ +# Steinhardt-Nelson bond orientational order parameters for FCC + +variable rcut equal 3.0 + +boundary p p p + +atom_style atomic +neighbor 0.3 bin +neigh_modify delay 5 + +# create geometry + +lattice fcc 1.0 +region box block 0 3 0 3 0 3 +create_box 1 box +create_atoms 1 box + +mass 1 1.0 + +# LJ potentials + +pair_style lj/cut ${rcut} +pair_coeff * * 1.0 1.0 ${rcut} + +# 12 neighbors, perfect crystal + +compute qlwlhat all orientorder/atom wl/hat yes +compute avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] +compute avwlhat all reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] + +thermo_style custom step temp epair etotal c_avql[*] c_avwlhat[*] + +run 0 + +# check Q_l values + +print " " +print "*******************************************************************" +print " " +print "Comparison with reference values of Q_l " +print " [Table I in W. Mickel, S. C. Kapfer," +print " G. E. Schroeder-Turkand, K. Mecke, " +print " J. Chem. Phys. 138, 044501 (2013).]" +print " " + +variable q4ref equal 0.190 +variable q6ref equal 0.575 +variable q8ref equal 0.404 +variable q10ref equal 0.013 +variable q12ref equal 0.600 + +variable q4 equal c_avql[1] +variable q6 equal c_avql[2] +variable q8 equal c_avql[3] +variable q10 equal c_avql[4] +variable q12 equal c_avql[5] + +print "q4 = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)" +print "q6 = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)" +print "q8 = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)" +print "q10 = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)" +print "q12 = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)" + +# check W_l_hat values + +print " " +print "Comparison with reference values of W_l_hat" +print " [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, " +print " Phys. Rev. B 28, 784 (1983).]" +print " " + +variable w4hatref equal -0.159316 +variable w6hatref equal -0.013161 +variable w8hatref equal 0.058454 +variable w10hatref equal -0.090128 + +variable w4hat equal c_avwlhat[1] +variable w6hat equal c_avwlhat[2] +variable w8hat equal c_avwlhat[3] +variable w10hat equal c_avwlhat[4] + +print "w4hat = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)" +print "w6hat = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)" +print "w8hat = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)" +print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)" +print " " +print "*******************************************************************" +print " " diff --git a/examples/steinhardt/in.icos b/examples/steinhardt/in.icos new file mode 100644 index 0000000000..d0d61a902d --- /dev/null +++ b/examples/steinhardt/in.icos @@ -0,0 +1,106 @@ +# Steinhardt-Nelson bond orientational order parameters for icosahedral cluster +# W_6_hat is sensitive to icosohedral order + +variable rcut equal 1.2 # a bit bigger than LJ Rmin +variable rcutred equal 0.75 # a bit bigger than 1/sqrt(2) + +# create a perfect fcc crystallite + +atom_style atomic +boundary s s s +lattice fcc 1.0 # neighbors at LJ Rmin +region box block 0 2 0 2 0 2 +create_box 1 box +create_atoms 1 box +mass 1 1.0 + +region centralatom sphere 1 1 1 0.0 side in +group centralatom region centralatom + +region mysphere sphere 1 1 1 ${rcutred} side out +delete_atoms region mysphere + +# LJ potential + +pair_style lj/cut 100.0 +pair_coeff * * 1.0 1.0 100.0 + +# define output for central atom + +compute qlwlhat all orientorder/atom wl/hat yes cutoff ${rcut} nnn NULL +compute avql centralatom reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] +compute avwlhat centralatom reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] +variable q6 equal c_avql[2] +variable w6hat equal c_avwlhat[2] + +compute mype all pe/atom +compute centralatompe centralatom reduce ave c_mype + +# gently equilibrate the crystallite + +velocity all create 0.001 482748 +fix 1 all nve +neighbor 0.3 bin +neigh_modify every 1 check no delay 0 +timestep 0.003 +thermo_style custom step temp epair etotal c_centralatompe v_q6 v_w6hat +thermo 10 + +run 10 + +# quench to icosehedral cluster + +minimize 1.0e-10 1.0e-6 100 1000 + +# check Q_l values + +print " " +print "*******************************************************************" +print " " +print "Comparison with reference values of Q_l " +print " [Table I in W. Mickel, S. C. Kapfer," +print " G. E. Schroeder-Turkand, K. Mecke, " +print " J. Chem. Phys. 138, 044501 (2013).]" +print " " + +variable q4ref equal 0.0 +variable q6ref equal 0.663 +variable q8ref equal 0.0 +variable q10ref equal 0.363 +variable q12ref equal 0.585 + +variable q4 equal c_avql[1] +variable q6 equal c_avql[2] +variable q8 equal c_avql[3] +variable q10 equal c_avql[4] +variable q12 equal c_avql[5] + +print "q4 = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)" +print "q6 = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)" +print "q8 = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)" +print "q10 = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)" +print "q12 = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)" + +# check W_l_hat values + +print " " +print "Comparison with reference values of W_l_hat" +print " [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, " +print " Phys. Rev. B 28, 784 (1983).]" +print " " + +variable w6hatref equal -0.169754 +variable w10hatref equal -0.093967 + +variable w4hat equal c_avwlhat[1] +variable w6hat equal c_avwlhat[2] +variable w8hat equal c_avwlhat[3] +variable w10hat equal c_avwlhat[4] +variable w12hat equal c_avwlhat[5] + +print "w6hat = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)" +print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)" +print " " +print "*******************************************************************" +print " " + diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 7c465128d8..8f3744bbf4 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -91,6 +91,8 @@ action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp action dihedral_class2_kokkos.h dihedral_class2.h +action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp +action dihedral_harmonic_kokkos.h dihedral_harmonic.h action dihedral_opls_kokkos.cpp dihedral_opls.cpp action dihedral_opls_kokkos.h dihedral_opls.h action domain_kokkos.cpp @@ -107,6 +109,8 @@ action fix_gravity_kokkos.cpp action fix_gravity_kokkos.h action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h +action fix_minimize_kokkos.cpp +action fix_minimize_kokkos.h action fix_neigh_history_kokkos.cpp action fix_neigh_history_kokkos.h action fix_nh_kokkos.cpp @@ -179,6 +183,12 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp action nbin_ssa_kokkos.h nbin_ssa.h action math_special_kokkos.cpp action math_special_kokkos.h +action min_cg_kokkos.cpp +action min_cg_kokkos.h +action min_kokkos.cpp +action min_kokkos.h +action min_linesearch_kokkos.cpp +action min_linesearch_kokkos.h action pair_buck_coul_cut_kokkos.cpp action pair_buck_coul_cut_kokkos.h action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp diff --git a/src/KOKKOS/dihedral_harmonic_kokkos.cpp b/src/KOKKOS/dihedral_harmonic_kokkos.cpp new file mode 100644 index 0000000000..dd77bc605b --- /dev/null +++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp @@ -0,0 +1,537 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "dihedral_harmonic_kokkos.h" +#include +#include +#include "atom_kokkos.h" +#include "comm.h" +#include "neighbor_kokkos.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +template +DihedralHarmonicKokkos::DihedralHarmonicKokkos(LAMMPS *lmp) : DihedralHarmonic(lmp) +{ + atomKK = (AtomKokkos *) atom; + neighborKK = (NeighborKokkos *) neighbor; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.view(); + h_warning_flag = k_warning_flag.h_view; +} + +/* ---------------------------------------------------------------------- */ + +template +DihedralHarmonicKokkos::~DihedralHarmonicKokkos() +{ + if (!copymode) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void DihedralHarmonicKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); + d_vatom = k_vatom.view(); + } + + k_k.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_sign.template sync(); + k_multiplicity.template sync(); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + neighborKK->k_dihedrallist.template sync(); + dihedrallist = neighborKK->k_dihedrallist.view(); + int ndihedrallist = neighborKK->ndihedrallist; + nlocal = atom->nlocal; + newton_bond = force->newton_bond; + + h_warning_flag() = 0; + k_warning_flag.template modify(); + k_warning_flag.template sync(); + + copymode = 1; + + // loop over neighbors of my atoms + + EV_FLOAT ev; + + if (evflag) { + if (newton_bond) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,ev); + } + } else { + if (newton_bond) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } + } + + // error check + + k_warning_flag.template modify(); + k_warning_flag.template sync(); + if (h_warning_flag()) + error->warning(FLERR,"Dihedral problem",0); + + if (eflag_global) energy += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralHarmonicKokkos::operator()(TagDihedralHarmonicCompute, const int &n, EV_FLOAT& ev) const { + + // The f array is atomic + Kokkos::View > a_f = f; + + const int i1 = dihedrallist(n,0); + const int i2 = dihedrallist(n,1); + const int i3 = dihedrallist(n,2); + const int i4 = dihedrallist(n,3); + const int type = dihedrallist(n,4); + + // 1st bond + + const F_FLOAT vb1x = x(i1,0) - x(i2,0); + const F_FLOAT vb1y = x(i1,1) - x(i2,1); + const F_FLOAT vb1z = x(i1,2) - x(i2,2); + + // 2nd bond + + const F_FLOAT vb2x = x(i3,0) - x(i2,0); + const F_FLOAT vb2y = x(i3,1) - x(i2,1); + const F_FLOAT vb2z = x(i3,2) - x(i2,2); + + const F_FLOAT vb2xm = -vb2x; + const F_FLOAT vb2ym = -vb2y; + const F_FLOAT vb2zm = -vb2z; + + // 3rd bond + + const F_FLOAT vb3x = x(i4,0) - x(i3,0); + const F_FLOAT vb3y = x(i4,1) - x(i3,1); + const F_FLOAT vb3z = x(i4,2) - x(i3,2); + + // c,s calculation + + const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym; + const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm; + const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm; + const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym; + const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm; + const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm; + + const F_FLOAT rasq = ax*ax + ay*ay + az*az; + const F_FLOAT rbsq = bx*bx + by*by + bz*bz; + const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + const F_FLOAT rg = sqrt(rgsq); + + F_FLOAT rginv,ra2inv,rb2inv; + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + const F_FLOAT rabinv = sqrt(ra2inv*rb2inv); + + F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv; + const F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + Kokkos::atomic_fetch_add(&d_warning_flag(),1); + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + const int m = d_multiplicity[type]; + F_FLOAT p = 1.0; + F_FLOAT ddf1,df1; + ddf1 = df1 = 0.0; + + for (int i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*d_cos_shift[type] + df1*d_sin_shift[type]; + df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + d_cos_shift[type]; + df1 = 0.0; + } + + E_FLOAT edihedral = 0.0; + if (eflag) edihedral = d_k[type] * p; + + const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + const F_FLOAT fga = fg*ra2inv*rginv; + const F_FLOAT hgb = hg*rb2inv*rginv; + const F_FLOAT gaa = -ra2inv*rg; + const F_FLOAT gbb = rb2inv*rg; + + const F_FLOAT dtfx = gaa*ax; + const F_FLOAT dtfy = gaa*ay; + const F_FLOAT dtfz = gaa*az; + const F_FLOAT dtgx = fga*ax - hgb*bx; + const F_FLOAT dtgy = fga*ay - hgb*by; + const F_FLOAT dtgz = fga*az - hgb*bz; + const F_FLOAT dthx = gbb*bx; + const F_FLOAT dthy = gbb*by; + const F_FLOAT dthz = gbb*bz; + + const F_FLOAT df = -d_k[type] * df1; + + const F_FLOAT sx2 = df*dtgx;; + const F_FLOAT sy2 = df*dtgy;; + const F_FLOAT sz2 = df*dtgz;; + + F_FLOAT f1[3],f2[3],f3[3],f4[3]; + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + a_f(i1,0) += f1[0]; + a_f(i1,1) += f1[1]; + a_f(i1,2) += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + a_f(i2,0) += f2[0]; + a_f(i2,1) += f2[1]; + a_f(i2,2) += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + a_f(i3,0) += f3[0]; + a_f(i3,1) += f3[1]; + a_f(i3,2) += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + a_f(i4,0) += f4[0]; + a_f(i4,1) += f4[1]; + a_f(i4,2) += f4[2]; + } + + if (EVFLAG) + ev_tally(ev,i1,i2,i3,i4,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralHarmonicKokkos::operator()(TagDihedralHarmonicCompute, const int &n) const { + EV_FLOAT ev; + this->template operator()(TagDihedralHarmonicCompute(), n, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +void DihedralHarmonicKokkos::allocate() +{ + DihedralHarmonic::allocate(); + + int n = atom->ndihedraltypes; + k_k = DAT::tdual_ffloat_1d("DihedralHarmonic::k",n+1); + k_cos_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::cos_shift",n+1); + k_sin_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::sin_shift",n+1); + k_sign = DAT::tdual_int_1d("DihedralHarmonic::sign",n+1); + k_multiplicity = DAT::tdual_int_1d("DihedralHarmonic::multiplicity",n+1); + + d_k = k_k.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_sign = k_sign.template view(); + d_multiplicity = k_multiplicity.template view(); +} + +/* ---------------------------------------------------------------------- + set coeffs for one type +------------------------------------------------------------------------- */ + +template +void DihedralHarmonicKokkos::coeff(int narg, char **arg) +{ + DihedralHarmonic::coeff(narg, arg); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_sign.h_view[i] = sign[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + } + + k_k.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_sign.template modify(); + k_multiplicity.template modify(); +} + +/* ---------------------------------------------------------------------- + proc 0 reads coeffs from restart file, bcasts them +------------------------------------------------------------------------- */ + +template +void DihedralHarmonicKokkos::read_restart(FILE *fp) +{ + DihedralHarmonic::read_restart(fp); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_sign.h_view[i] = sign[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + } + + k_k.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_sign.template modify(); + k_multiplicity.template modify(); +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +template +//template +KOKKOS_INLINE_FUNCTION +void DihedralHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const +{ + E_FLOAT edihedralquarter; + F_FLOAT v[6]; + + // The eatom and vatom arrays are atomic + Kokkos::View > v_eatom = k_eatom.view(); + Kokkos::View > v_vatom = k_vatom.view(); + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) ev.evdwl += edihedral; + else { + edihedralquarter = 0.25*edihedral; + if (i1 < nlocal) ev.evdwl += edihedralquarter; + if (i2 < nlocal) ev.evdwl += edihedralquarter; + if (i3 < nlocal) ev.evdwl += edihedralquarter; + if (i4 < nlocal) ev.evdwl += edihedralquarter; + } + } + if (eflag_atom) { + edihedralquarter = 0.25*edihedral; + if (newton_bond || i1 < nlocal) v_eatom[i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) v_eatom[i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) v_eatom[i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) v_eatom[i4] += edihedralquarter; + } + } + + if (vflag_either) { + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (vflag_global) { + if (newton_bond) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } else { + if (i1 < nlocal) { + ev.v[0] += 0.25*v[0]; + ev.v[1] += 0.25*v[1]; + ev.v[2] += 0.25*v[2]; + ev.v[3] += 0.25*v[3]; + ev.v[4] += 0.25*v[4]; + ev.v[5] += 0.25*v[5]; + } + if (i2 < nlocal) { + ev.v[0] += 0.25*v[0]; + ev.v[1] += 0.25*v[1]; + ev.v[2] += 0.25*v[2]; + ev.v[3] += 0.25*v[3]; + ev.v[4] += 0.25*v[4]; + ev.v[5] += 0.25*v[5]; + } + if (i3 < nlocal) { + ev.v[0] += 0.25*v[0]; + ev.v[1] += 0.25*v[1]; + ev.v[2] += 0.25*v[2]; + ev.v[3] += 0.25*v[3]; + ev.v[4] += 0.25*v[4]; + ev.v[5] += 0.25*v[5]; + } + if (i4 < nlocal) { + ev.v[0] += 0.25*v[0]; + ev.v[1] += 0.25*v[1]; + ev.v[2] += 0.25*v[2]; + ev.v[3] += 0.25*v[3]; + ev.v[4] += 0.25*v[4]; + ev.v[5] += 0.25*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i1 < nlocal) { + v_vatom(i1,0) += 0.25*v[0]; + v_vatom(i1,1) += 0.25*v[1]; + v_vatom(i1,2) += 0.25*v[2]; + v_vatom(i1,3) += 0.25*v[3]; + v_vatom(i1,4) += 0.25*v[4]; + v_vatom(i1,5) += 0.25*v[5]; + } + if (newton_bond || i2 < nlocal) { + v_vatom(i2,0) += 0.25*v[0]; + v_vatom(i2,1) += 0.25*v[1]; + v_vatom(i2,2) += 0.25*v[2]; + v_vatom(i2,3) += 0.25*v[3]; + v_vatom(i2,4) += 0.25*v[4]; + v_vatom(i2,5) += 0.25*v[5]; + } + if (newton_bond || i3 < nlocal) { + v_vatom(i3,0) += 0.25*v[0]; + v_vatom(i3,1) += 0.25*v[1]; + v_vatom(i3,2) += 0.25*v[2]; + v_vatom(i3,3) += 0.25*v[3]; + v_vatom(i3,4) += 0.25*v[4]; + v_vatom(i3,5) += 0.25*v[5]; + } + if (newton_bond || i4 < nlocal) { + v_vatom(i4,0) += 0.25*v[0]; + v_vatom(i4,1) += 0.25*v[1]; + v_vatom(i4,2) += 0.25*v[2]; + v_vatom(i4,3) += 0.25*v[3]; + v_vatom(i4,4) += 0.25*v[4]; + v_vatom(i4,5) += 0.25*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class DihedralHarmonicKokkos; +#ifdef KOKKOS_ENABLE_CUDA +template class DihedralHarmonicKokkos; +#endif +} + diff --git a/src/KOKKOS/dihedral_harmonic_kokkos.h b/src/KOKKOS/dihedral_harmonic_kokkos.h new file mode 100644 index 0000000000..f90e8944a4 --- /dev/null +++ b/src/KOKKOS/dihedral_harmonic_kokkos.h @@ -0,0 +1,104 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(harmonic/kk,DihedralHarmonicKokkos) +DihedralStyle(harmonic/kk/device,DihedralHarmonicKokkos) +DihedralStyle(harmonic/kk/host,DihedralHarmonicKokkos) + +#else + +#ifndef LMP_DIHEDRAL_HARMONIC_KOKKOS_H +#define LMP_DIHEDRAL_HARMONIC_KOKKOS_H + +#include "dihedral_harmonic.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +struct TagDihedralHarmonicCompute{}; + +template +class DihedralHarmonicKokkos : public DihedralHarmonic { + public: + typedef DeviceType device_type; + typedef EV_FLOAT value_type; + typedef ArrayTypes AT; + + DihedralHarmonicKokkos(class LAMMPS *); + virtual ~DihedralHarmonicKokkos(); + void compute(int, int); + void coeff(int, char **); + void read_restart(FILE *); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralHarmonicCompute, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralHarmonicCompute, const int&) const; + + //template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const; + + protected: + + class NeighborKokkos *neighborKK; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_2d dihedrallist; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename ArrayTypes::t_efloat_1d d_eatom; + typename ArrayTypes::t_virial_array d_vatom; + + int nlocal,newton_bond; + int eflag,vflag; + + DAT::tdual_int_scalar k_warning_flag; + typename AT::t_int_scalar d_warning_flag; + HAT::t_int_scalar h_warning_flag; + + DAT::tdual_ffloat_1d k_k; + DAT::tdual_ffloat_1d k_cos_shift; + DAT::tdual_ffloat_1d k_sin_shift; + DAT::tdual_int_1d k_sign; + DAT::tdual_int_1d k_multiplicity; + + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_cos_shift; + typename AT::t_ffloat_1d d_sin_shift; + typename AT::t_int_1d d_sign; + typename AT::t_int_1d d_multiplicity; + + void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/fix_minimize_kokkos.cpp b/src/KOKKOS/fix_minimize_kokkos.cpp new file mode 100644 index 0000000000..e7b10dbb8e --- /dev/null +++ b/src/KOKKOS/fix_minimize_kokkos.cpp @@ -0,0 +1,257 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_minimize_kokkos.h" +#include "atom_kokkos.h" +#include "domain.h" +#include "memory_kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) : + FixMinimize(lmp, narg, arg) +{ + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- */ + +FixMinimizeKokkos::~FixMinimizeKokkos() +{ + memoryKK->destroy_kokkos(k_vectors,vectors); + vectors = NULL; +} + +/* ---------------------------------------------------------------------- + allocate/initialize memory for a new vector with 3 elements per atom +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::add_vector_kokkos() +{ + int n = 3; + + memory->grow(peratom,nvector+1,"minimize:peratom"); + peratom[nvector] = n; + + // d_vectors needs to be LayoutRight for subviews + + k_vectors.sync(); + + memoryKK->grow_kokkos(k_vectors,vectors,nvector+1,atom->nmax*n, + "minimize:vectors"); + d_vectors = k_vectors.d_view; + h_vectors = k_vectors.h_view; + + k_vectors.modify(); + + nvector++; +} + +/* ---------------------------------------------------------------------- + return a pointer to the Mth vector +------------------------------------------------------------------------- */ + +DAT::t_ffloat_1d FixMinimizeKokkos::request_vector_kokkos(int m) +{ + k_vectors.sync(); + + return Kokkos::subview(d_vectors,m,Kokkos::ALL); +} + +/* ---------------------------------------------------------------------- + reset x0 for atoms that moved across PBC via reneighboring in line search + x0 = 1st vector + must do minimum_image using original box stored at beginning of line search + swap & set_global_box() change to original box, then restore current box +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::reset_coords() +{ + box_swap(); + domain->set_global_box(); + + int nlocal = atom->nlocal; + + atomKK->sync(Device,X_MASK); + k_vectors.sync(); + + { + // local variables for lambda capture + + auto triclinic = domain->triclinic; + auto xperiodic = domain->xperiodic; + auto xprd_half = domain->xprd_half; + auto xprd = domain->xprd; + auto yperiodic = domain->yperiodic; + auto yprd_half = domain->yprd_half; + auto yprd = domain->yprd; + auto zperiodic = domain->zperiodic; + auto zprd_half = domain->zprd_half; + auto zprd = domain->zprd; + auto xy = domain->xy; + auto xz = domain->xz; + auto yz = domain->yz; + auto l_x = atomKK->k_x.d_view; + auto l_x0 = Kokkos::subview(d_vectors,0,Kokkos::ALL); + + Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(const int& i) { + const int n = i*3; + double dx0 = l_x(i,0) - l_x0[n]; + double dy0 = l_x(i,1) - l_x0[n+1]; + double dz0 = l_x(i,2) - l_x0[n+2]; + double dx = dx0; + double dy = dy0; + double dz = dz0; + // domain->minimum_image(dx,dy,dz); + { + if (triclinic == 0) { + if (xperiodic) { + if (fabs(dx) > xprd_half) { + if (dx < 0.0) dx += xprd; + else dx -= xprd; + } + } + if (yperiodic) { + if (fabs(dy) > yprd_half) { + if (dy < 0.0) dy += yprd; + else dy -= yprd; + } + } + if (zperiodic) { + if (fabs(dz) > zprd_half) { + if (dz < 0.0) dz += zprd; + else dz -= zprd; + } + } + + } else { + if (zperiodic) { + if (fabs(dz) > zprd_half) { + if (dz < 0.0) { + dz += zprd; + dy += yz; + dx += xz; + } else { + dz -= zprd; + dy -= yz; + dx -= xz; + } + } + } + if (yperiodic) { + if (fabs(dy) > yprd_half) { + if (dy < 0.0) { + dy += yprd; + dx += xy; + } else { + dy -= yprd; + dx -= xy; + } + } + } + if (xperiodic) { + if (fabs(dx) > xprd_half) { + if (dx < 0.0) dx += xprd; + else dx -= xprd; + } + } + } + } // end domain->minimum_image(dx,dy,dz); + if (dx != dx0) l_x0[n] = l_x(i,0) - dx; + if (dy != dy0) l_x0[n+1] = l_x(i,1) - dy; + if (dz != dz0) l_x0[n+2] = l_x(i,2) - dz; + }); + } + k_vectors.modify(); + + box_swap(); + domain->set_global_box(); +} + +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::grow_arrays(int nmax) +{ + k_vectors.sync(); + memoryKK->grow_kokkos(k_vectors,vectors,nvector,3*nmax,"minimize:vector"); + d_vectors = k_vectors.d_view; + h_vectors = k_vectors.h_view; + k_vectors.modify(); +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/) +{ + int m,iper,nper,ni,nj; + + k_vectors.sync(); + + for (m = 0; m < nvector; m++) { + nper = 3; + ni = nper*i; + nj = nper*j; + for (iper = 0; iper < nper; iper++) h_vectors(m,nj++) = h_vectors(m,ni++); + } + + k_vectors.modify(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +int FixMinimizeKokkos::pack_exchange(int i, double *buf) +{ + int m,iper,nper,ni; + + k_vectors.sync(); + + int n = 0; + for (m = 0; m < nvector; m++) { + nper = peratom[m]; + ni = nper*i; + for (iper = 0; iper < nper; iper++) buf[n++] = h_vectors(m,ni++); + } + return n; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +int FixMinimizeKokkos::unpack_exchange(int nlocal, double *buf) +{ + int m,iper,nper,ni; + + k_vectors.sync(); + + int n = 0; + for (m = 0; m < nvector; m++) { + nper = peratom[m]; + ni = nper*nlocal; + for (iper = 0; iper < nper; iper++) h_vectors(m,ni++) = buf[n++]; + } + + k_vectors.modify(); + + return n; +} diff --git a/src/KOKKOS/fix_minimize_kokkos.h b/src/KOKKOS/fix_minimize_kokkos.h new file mode 100644 index 0000000000..921cb2fc5d --- /dev/null +++ b/src/KOKKOS/fix_minimize_kokkos.h @@ -0,0 +1,58 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(MINIMIZE/kk,FixMinimizeKokkos) +FixStyle(MINIMIZE/kk/device,FixMinimizeKokkos) +FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos) + +#else + +#ifndef LMP_FIX_MINIMIZE_KOKKOS_H +#define LMP_FIX_MINIMIZE_KOKKOS_H + +#include "fix_minimize.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class FixMinimizeKokkos : public FixMinimize { + friend class MinLineSearchKokkos; + + public: + FixMinimizeKokkos(class LAMMPS *, int, char **); + virtual ~FixMinimizeKokkos(); + void init() {} + + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + + void add_vector_kokkos(); + DAT::t_float_1d request_vector_kokkos(int); + void reset_coords(); + + DAT::tdual_float_2d k_vectors; + DAT::t_float_2d d_vectors; + HAT::t_float_2d h_vectors; +}; + +} + +#endif +#endif +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 32550e8285..18dff991b2 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -113,23 +113,37 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) } iarg += 2; + int set_flag = 0; char *str; if ((str = getenv("SLURM_LOCALID"))) { int local_rank = atoi(str); device = local_rank % ngpus; if (device >= skip_gpu) device++; + set_flag = 1; + } + if ((str = getenv("MPT_LRANK"))) { + int local_rank = atoi(str); + device = local_rank % ngpus; + if (device >= skip_gpu) device++; + set_flag = 1; } if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); device = local_rank % ngpus; if (device >= skip_gpu) device++; + set_flag = 1; } if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); device = local_rank % ngpus; if (device >= skip_gpu) device++; + set_flag = 1; } + if (ngpus > 1 && !set_flag) + error->all(FLERR,"Could not determine local MPI rank for multiple " + "GPUs with Kokkos CUDA because MPI library not recognized"); + } else if (strcmp(arg[iarg],"t") == 0 || strcmp(arg[iarg],"threads") == 0) { nthreads = atoi(arg[iarg+1]); diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index b9f1e66c68..7b605bee1e 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -74,6 +74,11 @@ E: Invalid Kokkos command-line args Self-explanatory. See Section 2.7 of the manual for details. +E: Could not determine local MPI rank for multiple GPUs with Kokkos CUDA +because MPI library not recognized + +The local MPI rank was not found in one of four supported environment variables. + E: GPUs are requested but Kokkos has not been compiled for CUDA Recompile Kokkos with CUDA support to use GPUs. diff --git a/src/KOKKOS/min_cg_kokkos.cpp b/src/KOKKOS/min_cg_kokkos.cpp new file mode 100644 index 0000000000..47a4513bc0 --- /dev/null +++ b/src/KOKKOS/min_cg_kokkos.cpp @@ -0,0 +1,178 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "min_cg_kokkos.h" +#include +#include +#include "update.h" +#include "output.h" +#include "timer.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "fix_minimize_kokkos.h" + +using namespace LAMMPS_NS; + +// EPS_ENERGY = minimum normalization for energy tolerance + +#define EPS_ENERGY 1.0e-8 + +/* ---------------------------------------------------------------------- */ + +MinCGKokkos::MinCGKokkos(LAMMPS *lmp) : MinLineSearchKokkos(lmp) +{ + atomKK = (AtomKokkos *) atom; + kokkosable = 1; +} + +/* ---------------------------------------------------------------------- + minimization via conjugate gradient iterations +------------------------------------------------------------------------- */ + +int MinCGKokkos::iterate(int maxiter) +{ + int fail,ntimestep; + double beta,gg,dot[2],dotall[2]; + + fix_minimize_kk->k_vectors.sync(); + fix_minimize_kk->k_vectors.modify(); + + // nlimit = max # of CG iterations before restarting + // set to ndoftotal unless too big + + int nlimit = static_cast (MIN(MAXSMALLINT,ndoftotal)); + + // initialize working vectors + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_h[i] = l_fvec[i]; + l_g[i] = l_fvec[i]; + }); + } + + gg = fnorm_sqr(); + + for (int iter = 0; iter < maxiter; iter++) { + + if (timer->check_timeout(niter)) + return TIMEOUT; + + ntimestep = ++update->ntimestep; + niter++; + + // line minimization along direction h from current atom->x + + eprevious = ecurrent; + fail = (this->*linemin)(ecurrent,alpha_final); + if (fail) return fail; + + // function evaluation criterion + + if (neval >= update->max_eval) return MAXEVAL; + + // energy tolerance criterion + + if (fabs(ecurrent-eprevious) < + update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY)) + return ETOL; + + // force tolerance criterion + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_g[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + + if (dotall[0] < update->ftol*update->ftol) return FTOL; + + // update new search direction h from new f = -Grad(x) and old g + // this is Polak-Ribieri formulation + // beta = dotall[0]/gg would be Fletcher-Reeves + // reinitialize CG every ndof iterations by setting beta = 0.0 + + beta = MAX(0.0,(dotall[0] - dotall[1])/gg); + if ((niter+1) % nlimit == 0) beta = 0.0; + gg = dotall[0]; + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + auto l_fvec = fvec; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_g[i] = l_fvec[i]; + l_h[i] = l_g[i] + beta*l_h[i]; + }); + } + + // reinitialize CG if new search direction h is not downhill + + double dot_0 = 0.0; + + { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& dot_0) { + dot_0 += l_g[i]*l_h[i]; + },dot_0); + } + dot[0] = dot_0; + MPI_Allreduce(dot,dotall,1,MPI_DOUBLE,MPI_SUM,world); + + if (dotall[0] <= 0.0) { + // local variables for lambda capture + + auto l_h = h; + auto l_g = g; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_h[i] = l_g[i]; + }); + } + + // output for thermo, dump, restart files + + if (output->next == ntimestep) { + atomKK->sync(Host,ALL_MASK); + + timer->stamp(); + output->write(ntimestep); + timer->stamp(Timer::OUTPUT); + } + } + + return MAXITER; +} diff --git a/src/KOKKOS/min_cg_kokkos.h b/src/KOKKOS/min_cg_kokkos.h new file mode 100644 index 0000000000..745bf702c7 --- /dev/null +++ b/src/KOKKOS/min_cg_kokkos.h @@ -0,0 +1,38 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef MINIMIZE_CLASS + +MinimizeStyle(cg/kk,MinCGKokkos) +MinimizeStyle(cg/kk/device,MinCGKokkos) +MinimizeStyle(cg/kk/host,MinCGKokkos) + +#else + +#ifndef LMP_MIN_CG_KOKKOS_H +#define LMP_MIN_CG_KOKKOS_H + +#include "min_linesearch_kokkos.h" + +namespace LAMMPS_NS { + +class MinCGKokkos : public MinLineSearchKokkos { + public: + MinCGKokkos(class LAMMPS *); + int iterate(int); +}; + +} + +#endif +#endif diff --git a/src/KOKKOS/min_kokkos.cpp b/src/KOKKOS/min_kokkos.cpp new file mode 100644 index 0000000000..79ddbbab84 --- /dev/null +++ b/src/KOKKOS/min_kokkos.cpp @@ -0,0 +1,631 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "min_kokkos.h" +#include +#include +#include +#include "atom_kokkos.h" +#include "atom_vec.h" +#include "domain.h" +#include "comm.h" +#include "update.h" +#include "modify.h" +#include "fix_minimize_kokkos.h" +#include "compute.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "output.h" +#include "thermo.h" +#include "timer.h" +#include "memory.h" +#include "error.h" +#include "kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +MinKokkos::MinKokkos(LAMMPS *lmp) : Min(lmp) +{ + atomKK = (AtomKokkos *) atom; + fix_minimize_kk = NULL; +} + +/* ---------------------------------------------------------------------- */ + +MinKokkos::~MinKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +void MinKokkos::init() +{ + Min::init(); + + fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize; +} + +/* ---------------------------------------------------------------------- + setup before run +------------------------------------------------------------------------- */ + +void MinKokkos::setup(int flag) +{ + if (comm->me == 0 && screen) { + fprintf(screen,"Setting up %s style minimization ...\n", + update->minimize_style); + if (flag) { + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", + update->ntimestep); + timer->print_timeout(screen); + } + } + update->setupflag = 1; + + // setup extra global dof due to fixes + // cannot be done in init() b/c update init() is before modify init() + + nextra_global = modify->min_dof(); + if (nextra_global) { + fextra = new double[nextra_global]; + if (comm->me == 0 && screen) + fprintf(screen,"WARNING: Energy due to %d extra global DOFs will" + " be included in minimizer energies\n",nextra_global); + } + + // compute for potential energy + + int id = modify->find_compute("thermo_pe"); + if (id < 0) error->all(FLERR,"Minimization could not find thermo_pe compute"); + pe_compute = modify->compute[id]; + + // style-specific setup does two tasks + // setup extra global dof vectors + // setup extra per-atom dof vectors due to requests from Pair classes + // cannot be done in init() b/c update init() is before modify/pair init() + + setup_style(); + + // ndoftotal = total dof for entire minimization problem + // dof for atoms, extra per-atom, extra global + + bigint ndofme = 3 * static_cast(atom->nlocal); + for (int m = 0; m < nextra_atom; m++) + ndofme += extra_peratom[m]*atom->nlocal; + MPI_Allreduce(&ndofme,&ndoftotal,1,MPI_LMP_BIGINT,MPI_SUM,world); + ndoftotal += nextra_global; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + atom->setup(); + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + if (atom->sortfreq > 0) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(1); + modify->setup_post_neighbor(); + neighbor->ncalls = 0; + + // remove these restriction eventually + + if (searchflag == 0) { + if (nextra_global) + error->all(FLERR, + "Cannot use a damped dynamics min style with fix box/relax"); + if (nextra_atom) + error->all(FLERR, + "Cannot use a damped dynamics min style with per-atom DOF"); + } + + if (strcmp(update->minimize_style,"hftn") == 0) { + if (nextra_global) + error->all(FLERR, "Cannot use hftn min style with fix box/relax"); + if (nextra_atom) + error->all(FLERR, "Cannot use hftn min style with per-atom DOF"); + } + + // atoms may have migrated in comm->exchange() + + reset_vectors(); + + // compute all forces + + force->setup(); + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atomKK->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if(force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); + } + + modify->setup_pre_reverse(eflag,vflag); + if (force->newton) comm->reverse_comm(); + + // update per-atom minimization variables stored by pair styles + + if (nextra_atom) + for (int m = 0; m < nextra_atom; m++) + requestor[m]->min_xf_get(m); + + lmp->kokkos->auto_sync = 0; + modify->setup(vflag); + output->setup(flag); + lmp->kokkos->auto_sync = 1; + update->setupflag = 0; + + // stats for initial thermo output + + ecurrent = pe_compute->compute_scalar(); + if (nextra_global) ecurrent += modify->min_energy(fextra); + if (output->thermo->normflag) ecurrent /= atom->natoms; + + einitial = ecurrent; + fnorm2_init = sqrt(fnorm_sqr()); + fnorminf_init = fnorm_inf(); +} + +/* ---------------------------------------------------------------------- + setup without output or one-time post-init setup + flag = 0 = just force calculation + flag = 1 = reneighbor and force calculation +------------------------------------------------------------------------- */ + +void MinKokkos::setup_minimal(int flag) +{ + update->setupflag = 1; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + if (flag) { + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(1); + modify->setup_post_neighbor(); + neighbor->ncalls = 0; + } + + // atoms may have migrated in comm->exchange() + + reset_vectors(); + + // compute all forces + + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atomKK->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if(force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); + } + + modify->setup_pre_reverse(eflag,vflag); + if (force->newton) comm->reverse_comm(); + + // update per-atom minimization variables stored by pair styles + + if (nextra_atom) + for (int m = 0; m < nextra_atom; m++) + requestor[m]->min_xf_get(m); + + lmp->kokkos->auto_sync = 0; + modify->setup(vflag); + lmp->kokkos->auto_sync = 1; + update->setupflag = 0; + + // stats for Finish to print + + ecurrent = pe_compute->compute_scalar(); + if (nextra_global) ecurrent += modify->min_energy(fextra); + if (output->thermo->normflag) ecurrent /= atom->natoms; + + einitial = ecurrent; + fnorm2_init = sqrt(fnorm_sqr()); + fnorminf_init = fnorm_inf(); +} + +/* ---------------------------------------------------------------------- + perform minimization, calling iterate() for N steps +------------------------------------------------------------------------- */ + +void MinKokkos::run(int n) +{ + if (nextra_global) + error->all(FLERR,"Cannot yet use extra global DOFs (e.g. fix box/relax) " + "with Kokkos minimize"); + + if (nextra_global || nextra_atom) + error->all(FLERR,"Cannot yet use extra atom DOFs (e.g. USER-AWPMD and USER-EFF packages) " + "with Kokkos minimize"); + + // minimizer iterations + + lmp->kokkos->auto_sync = 0; + atomKK->sync(Device,ALL_MASK); + + stop_condition = iterate(n); + stopstr = stopstrings(stop_condition); + + // if early exit from iterate loop: + // set update->nsteps to niter for Finish stats to print + // set output->next values to this timestep + // call energy_force() to insure vflag is set when forces computed + // output->write does final output for thermo, dump, restart files + // add ntimestep to all computes that store invocation times + // since are hardwiring call to thermo/dumps and computes may not be ready + + if (stop_condition != MAXITER) { + update->nsteps = niter; + + if (update->restrict_output == 0) { + for (int idump = 0; idump < output->ndump; idump++) + output->next_dump[idump] = update->ntimestep; + output->next_dump_any = update->ntimestep; + if (output->restart_flag) { + output->next_restart = update->ntimestep; + if (output->restart_every_single) + output->next_restart_single = update->ntimestep; + if (output->restart_every_double) + output->next_restart_double = update->ntimestep; + } + } + output->next_thermo = update->ntimestep; + + modify->addstep_compute_all(update->ntimestep); + ecurrent = energy_force(0); + + atomKK->sync(Host,ALL_MASK); + output->write(update->ntimestep); + } + + atomKK->sync(Host,ALL_MASK); + lmp->kokkos->auto_sync = 1; +} + +/* ---------------------------------------------------------------------- + evaluate potential energy and forces + may migrate atoms due to reneighboring + return new energy, which should include nextra_global dof + return negative gradient stored in atom->f + return negative gradient for nextra_global dof in fextra +------------------------------------------------------------------------- */ + +double MinKokkos::energy_force(int resetflag) +{ + // check for reneighboring + // always communicate since minimizer moved atoms + + int nflag = neighbor->decide(); + + if (nflag == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(Timer::COMM); + } else { + if (modify->n_min_pre_exchange) { + timer->stamp(); + modify->min_pre_exchange(); + timer->stamp(Timer::MODIFY); + } + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + if (domain->box_change) { + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + } + timer->stamp(); + comm->exchange(); + if (atom->sortfreq > 0 && + update->ntimestep >= atom->nextsort) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(Timer::COMM); + if (modify->n_min_pre_neighbor) { + modify->min_pre_neighbor(); + timer->stamp(Timer::MODIFY); + } + neighbor->build(1); + timer->stamp(Timer::NEIGH); + if (modify->n_min_post_neighbor) { + modify->min_post_neighbor(); + timer->stamp(Timer::MODIFY); + } + } + + ev_set(update->ntimestep); + force_clear(); + + timer->stamp(); + + if (modify->n_min_pre_force) { + modify->min_pre_force(vflag); + timer->stamp(Timer::MODIFY); + } + + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + force->pair->compute(eflag,vflag); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + timer->stamp(Timer::PAIR); + } + + if (atom->molecular) { + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + force->bond->compute(eflag,vflag); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + force->angle->compute(eflag,vflag); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + force->dihedral->compute(eflag,vflag); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + force->improper->compute(eflag,vflag); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + } + timer->stamp(Timer::BOND); + } + + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + force->kspace->compute(eflag,vflag); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + timer->stamp(Timer::KSPACE); + } + + if (modify->n_min_pre_reverse) { + modify->min_pre_reverse(eflag,vflag); + timer->stamp(Timer::MODIFY); + } + + if (force->newton) { + comm->reverse_comm(); + timer->stamp(Timer::COMM); + } + + // fixes that affect minimization + + if (modify->n_min_post_force) { + timer->stamp(); + modify->min_post_force(vflag); + timer->stamp(Timer::MODIFY); + } + + // compute potential energy of system + // normalize if thermo PE does + + atomKK->sync(pe_compute->execution_space,pe_compute->datamask_read); + double energy = pe_compute->compute_scalar(); + atomKK->modified(pe_compute->execution_space,pe_compute->datamask_modify); + if (nextra_global) energy += modify->min_energy(fextra); + if (output->thermo->normflag) energy /= atom->natoms; + + // if reneighbored, atoms migrated + // if resetflag = 1, update x0 of atoms crossing PBC + // reset vectors used by lo-level minimizer + + if (nflag) { + if (resetflag) fix_minimize_kk->reset_coords(); + reset_vectors(); + } + return energy; +} + +/* ---------------------------------------------------------------------- + clear force on own & ghost atoms + clear other arrays as needed +------------------------------------------------------------------------- */ + +void MinKokkos::force_clear() +{ + if (external_force_clear) return; + + // clear global force array + // if either newton flag is set, also include ghosts + + atomKK->k_f.clear_sync_state(); // ignore host forces/torques since device views + atomKK->k_torque.clear_sync_state(); // will be cleared below + + int nzero = atom->nlocal; + if (force->newton) nzero += atom->nghost; + + if (nzero) { + // local variables for lambda capture + + auto l_f = atomKK->k_f.d_view; + auto l_torque = atomKK->k_torque.d_view; + auto l_torqueflag = torqueflag; + + Kokkos::parallel_for(nzero, LAMMPS_LAMBDA(int i) { + l_f(i,0) = 0.0; + l_f(i,1) = 0.0; + l_f(i,2) = 0.0; + if (l_torqueflag) { + l_torque(i,0) = 0.0; + l_torque(i,1) = 0.0; + l_torque(i,2) = 0.0; + } + }); + } + atomKK->modified(Device,F_MASK); +} + +/* ---------------------------------------------------------------------- + compute and return ||force||_2^2 +------------------------------------------------------------------------- */ + +double MinKokkos::fnorm_sqr() +{ + + double local_norm2_sqr = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm2_sqr) { + local_norm2_sqr += l_fvec[i]*l_fvec[i]; + },local_norm2_sqr); + } + + double norm2_sqr = 0.0; + MPI_Allreduce(&local_norm2_sqr,&norm2_sqr,1,MPI_DOUBLE,MPI_SUM,world); + + return norm2_sqr; +} + +/* ---------------------------------------------------------------------- + compute and return ||force||_inf +------------------------------------------------------------------------- */ + +double MinKokkos::fnorm_inf() +{ + + double local_norm_inf = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm_inf) { + local_norm_inf = MAX(fabs(l_fvec[i]),local_norm_inf); + },Kokkos::Max(local_norm_inf)); + } + + double norm_inf = 0.0; + MPI_Allreduce(&local_norm_inf,&norm_inf,1,MPI_DOUBLE,MPI_MAX,world); + + return norm_inf; +} diff --git a/src/KOKKOS/min_kokkos.h b/src/KOKKOS/min_kokkos.h new file mode 100644 index 0000000000..49cd1d1849 --- /dev/null +++ b/src/KOKKOS/min_kokkos.h @@ -0,0 +1,58 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MIN_KOKKOS_H +#define LMP_MIN_KOKKOS_H + +#include "min.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class MinKokkos : public Min { + public: + MinKokkos(class LAMMPS *); + virtual ~MinKokkos(); + void init(); + void setup(int flag=1); + void setup_minimal(int); + void run(int); + double fnorm_sqr(); + double fnorm_inf(); + + virtual void init_style() {} + virtual void setup_style() = 0; + virtual void reset_vectors() = 0; + virtual int iterate(int) = 0; + + // possible return values of iterate() method + enum{MAXITER,MAXEVAL,ETOL,FTOL,DOWNHILL,ZEROALPHA,ZEROFORCE, + ZEROQUAD,TRSMALL,INTERROR,TIMEOUT}; + + //protected: // won't compile with CUDA + class FixMinimizeKokkos *fix_minimize_kk; // fix that stores auxiliary data + + DAT::t_ffloat_1d xvec; // variables for atomic dof, as 1d vector + DAT::t_ffloat_1d fvec; // force vector for atomic dof, as 1d vector + + double energy_force(int); + void force_clear(); +}; + +} + +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/min_linesearch_kokkos.cpp b/src/KOKKOS/min_linesearch_kokkos.cpp new file mode 100644 index 0000000000..cb07c7db86 --- /dev/null +++ b/src/KOKKOS/min_linesearch_kokkos.cpp @@ -0,0 +1,401 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "min_linesearch_kokkos.h" +#include +#include +#include "atom_kokkos.h" +#include "modify.h" +#include "fix_minimize_kokkos.h" +#include "pair.h" +#include "output.h" +#include "thermo.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +// ALPHA_MAX = max alpha allowed to avoid long backtracks +// ALPHA_REDUCE = reduction ratio, should be in range [0.5,1) +// BACKTRACK_SLOPE, should be in range (0,0.5] +// QUADRATIC_TOL = tolerance on alpha0, should be in range [0.1,1) +// EMACH = machine accuracy limit of energy changes (1.0e-8) +// EPS_QUAD = tolerance for quadratic projection + +#define ALPHA_MAX 1.0 +#define ALPHA_REDUCE 0.5 +#define BACKTRACK_SLOPE 0.4 +#define QUADRATIC_TOL 0.1 +//#define EMACH 1.0e-8 +#define EMACH 1.0e-8 +#define EPS_QUAD 1.0e-28 + +/* ---------------------------------------------------------------------- */ + +MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp) +{ + searchflag = 1; + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- */ + +MinLineSearchKokkos::~MinLineSearchKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +void MinLineSearchKokkos::init() +{ + MinKokkos::init(); + + if (linestyle == 1) linemin = &MinLineSearchKokkos::linemin_quadratic; + else error->all(FLERR,"Kokkos minimize only supports the 'min_modify line " + "quadratic' option"); +} + +/* ---------------------------------------------------------------------- */ + +void MinLineSearchKokkos::setup_style() +{ + // memory for x0,g,h for atomic dof + + fix_minimize_kk->add_vector_kokkos(); + fix_minimize_kk->add_vector_kokkos(); + fix_minimize_kk->add_vector_kokkos(); +} + +/* ---------------------------------------------------------------------- + set current vector lengths and pointers + called after atoms have migrated +------------------------------------------------------------------------- */ + +void MinLineSearchKokkos::reset_vectors() +{ + // atomic dof + + nvec = 3 * atom->nlocal; + atomKK->sync(Device,F_MASK|X_MASK); + auto d_x = atomKK->k_x.d_view; + auto d_f = atomKK->k_f.d_view; + + if (nvec) xvec = DAT::t_ffloat_1d(d_x.data(),d_x.size()); + if (nvec) fvec = DAT::t_ffloat_1d(d_f.data(),d_f.size()); + x0 = fix_minimize_kk->request_vector_kokkos(0); + g = fix_minimize_kk->request_vector_kokkos(1); + h = fix_minimize_kk->request_vector_kokkos(2); + + auto h_fvec = Kokkos::create_mirror_view(fvec); + Kokkos::deep_copy(h_fvec,fvec); +} + +/* ---------------------------------------------------------------------- + line minimization methods + find minimum-energy starting at x along h direction + input args: eoriginal = energy at initial x + input extra: n,x,x0,f,h for atomic, extra global, extra per-atom dof + output args: return 0 if successful move, non-zero alpha + return non-zero if failed + alpha = distance moved along h for x at min eng config + update neval counter of eng/force function evaluations + output extra: if fail, energy_force() of original x + if succeed, energy_force() at x + alpha*h + atom->x = coords at new configuration + atom->f = force at new configuration + ecurrent = energy of new configuration +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + // linemin: quadratic line search (adapted from Dennis and Schnabel) + // The objective function is approximated by a quadratic + // function in alpha, for sufficiently small alpha. + // This idea is the same as that used in the well-known secant + // method. However, since the change in the objective function + // (difference of two finite numbers) is not known as accurately + // as the gradient (which is close to zero), all the expressions + // are written in terms of gradients. In this way, we can converge + // the LAMMPS forces much closer to zero. + // + // We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev + // truncated at the quadratic term is: + // + // E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev + // + // and + // + // fh = fhprev - del_alpha*Hprev + // + // where del_alpha = alpha-alpha_prev + // + // We solve these two equations for Hprev and E=Esolve, giving: + // + // Esolve = Eprev - del_alpha*(f+fprev)/2 + // + // We define relerr to be: + // + // relerr = |(Esolve-E)/Eprev| + // = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev| + // + // If this is accurate to within a reasonable tolerance, then + // we go ahead and use a secant step to fh = 0: + // + // alpha0 = alpha - (alpha-alphaprev)*fh/delfh; + // +------------------------------------------------------------------------- */ + +int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha) +{ + double fdothall,fdothme,hme,hmaxall; + double de_ideal,de; + double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0; + double dot[2],dotall[2]; + double alphamax; + + fix_minimize_kk->k_vectors.sync(); + fix_minimize_kk->k_vectors.modify(); + + // fdothall = projection of search dir along downhill gradient + // if search direction is not downhill, exit with error + + fdothme = 0.0; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& fdothme) { + fdothme += l_fvec[i]*l_h[i]; + },fdothme); + } + MPI_Allreduce(&fdothme,&fdothall,1,MPI_DOUBLE,MPI_SUM,world); + if (output->thermo->normflag) fdothall /= atom->natoms; + + if (fdothall <= 0.0) return DOWNHILL; + + // set alphamax so no dof is changed by more than max allowed amount + // for atom coords, max amount = dmax + // for extra per-atom dof, max amount = extra_max[] + // for extra global dof, max amount is set by fix + // also insure alphamax <= ALPHA_MAX + // else will have to backtrack from huge value when forces are tiny + // if all search dir components are already 0.0, exit with error + + + hme = 0.0; + { + // local variables for lambda capture + + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& hme) { + hme = MAX(hme,fabs(l_h[i])); + },Kokkos::Max(hme)); + } + MPI_Allreduce(&hme,&hmaxall,1,MPI_DOUBLE,MPI_MAX,world); + alphamax = MIN(ALPHA_MAX,dmax/hmaxall); + + if (hmaxall == 0.0) return ZEROFORCE; + + // store box and values of all dof at start of linesearch + + { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_x0 = x0; + + fix_minimize_kk->store_box(); + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_x0[i] = l_xvec[i]; + }); + } + + // backtrack with alpha until energy decrease is sufficient + // or until get to small energy change, then perform quadratic projection + + alpha = alphamax; + fhprev = fdothall; + engprev = eoriginal; + alphaprev = 0.0; + + // // important diagnostic: test the gradient against energy + // double etmp; + // double alphatmp = alphamax*1.0e-4; + // etmp = alpha_step(alphatmp,1); + // printf("alpha = %g dele = %g dele_force = %g err = %g\n", + // alphatmp,etmp-eoriginal,-alphatmp*fdothall, + // etmp-eoriginal+alphatmp*fdothall); + // alpha_step(0.0,1); + + while (1) { + ecurrent = alpha_step(alpha,1); + + // compute new fh, alpha, delfh + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_h[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + ff = dotall[0]; + fh = dotall[1]; + if (output->thermo->normflag) { + ff /= atom->natoms; + fh /= atom->natoms; + } + + delfh = fh - fhprev; + + // if fh or delfh is epsilon, reset to starting point, exit with error + + if (fabs(fh) < EPS_QUAD || fabs(delfh) < EPS_QUAD) { + ecurrent = alpha_step(0.0,0); + return ZEROQUAD; + } + + // Check if ready for quadratic projection, equivalent to secant method + // alpha0 = projected alpha + + relerr = fabs(1.0-(0.5*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev); + alpha0 = alpha - (alpha-alphaprev)*fh/delfh; + + if (relerr <= QUADRATIC_TOL && alpha0 > 0.0 && alpha0 < alphamax) { + ecurrent = alpha_step(alpha0,1); + if (ecurrent - eoriginal < EMACH) { + return 0; + } + } + + // if backtracking energy change is better than ideal, exit with success + + de_ideal = -BACKTRACK_SLOPE*alpha*fdothall; + de = ecurrent - eoriginal; + + if (de <= de_ideal) + return 0; + + // save previous state + + fhprev = fh; + engprev = ecurrent; + alphaprev = alpha; + + // reduce alpha + + alpha *= ALPHA_REDUCE; + + // backtracked all the way to 0.0 + // reset to starting point, exit with error + + if (alpha <= 0.0 || de_ideal >= -EMACH) { + ecurrent = alpha_step(0.0,0); + return ZEROALPHA; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double MinLineSearchKokkos::alpha_step(double alpha, int resetflag) +{ + // reset to starting point + + atomKK->k_x.clear_sync_state(); // ignore if host positions since device + // positions will be reset below + { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_x0 = x0; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_xvec[i] = l_x0[i]; + }); + } + + // step forward along h + + if (alpha > 0.0) { + // local variables for lambda capture + + auto l_xvec = xvec; + auto l_h = h; + + Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) { + l_xvec[i] += alpha*l_h[i]; + }); + } + + atomKK->modified(Device,X_MASK); + + // compute and return new energy + + neval++; + return energy_force(resetflag); +} + +/* ---------------------------------------------------------------------- */ + +// compute projection of force on: itself and the search direction + +double MinLineSearchKokkos::compute_dir_deriv(double &ff) +{ + double dot[2],dotall[2]; + double fh; + + // compute new fh, alpha, delfh + + s_double2 sdot; + { + // local variables for lambda capture + + auto l_fvec = fvec; + auto l_h = h; + + Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) { + sdot.d0 += l_fvec[i]*l_fvec[i]; + sdot.d1 += l_fvec[i]*l_h[i]; + },sdot); + } + dot[0] = sdot.d0; + dot[1] = sdot.d1; + + MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world); + + ff = dotall[0]; + fh = dotall[1]; + if (output->thermo->normflag) { + ff /= atom->natoms; + fh /= atom->natoms; + } + + return fh; +} diff --git a/src/KOKKOS/min_linesearch_kokkos.h b/src/KOKKOS/min_linesearch_kokkos.h new file mode 100644 index 0000000000..70df13d1e7 --- /dev/null +++ b/src/KOKKOS/min_linesearch_kokkos.h @@ -0,0 +1,69 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MIN_LSRCH_KOKKOS_H +#define LMP_MIN_LSRCH_KOKKOS_H + +#include "min_kokkos.h" + +namespace LAMMPS_NS { + + struct s_double2 { + double d0, d1; + KOKKOS_INLINE_FUNCTION + s_double2() { + d0 = d1 = 0.0; + } + KOKKOS_INLINE_FUNCTION + s_double2& operator+=(const s_double2 &rhs){ + d0 += rhs.d0; + d1 += rhs.d1; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_double2 &rhs) volatile { + d0 += rhs.d0; + d1 += rhs.d1; + } + }; + //typedef s_double2 double2; + +class MinLineSearchKokkos : public MinKokkos { + public: + MinLineSearchKokkos(class LAMMPS *); + ~MinLineSearchKokkos(); + void init(); + void setup_style(); + void reset_vectors(); + + //protected: // won't compile with CUDA + // vectors needed by linesearch minimizers + // allocated and stored by fix_minimize + // x,f are stored by parent or Atom class or Pair class + + DAT::t_ffloat_1d x0; // coords at start of linesearch + DAT::t_ffloat_1d g; // old gradient vector + DAT::t_ffloat_1d h; // search direction vector + + typedef int (MinLineSearchKokkos::*FnPtr)(double, double &); + FnPtr linemin; + int linemin_quadratic(double, double &); + + double alpha_step(double, int); + double compute_dir_deriv(double &); +}; + +} + +#endif diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 2d2f0a9815..23952b71d8 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -143,7 +143,6 @@ void VerletKokkos::setup(int flag) } else if (force->pair) force->pair->compute_dummy(eflag,vflag); - if (atomKK->molecular) { if (force->bond) { atomKK->sync(force->bond->execution_space,force->bond->datamask_read); @@ -248,7 +247,6 @@ void VerletKokkos::setup_minimal(int flag) } else if (force->pair) force->pair->compute_dummy(eflag,vflag); - if (atomKK->molecular) { if (force->bond) { atomKK->sync(force->bond->execution_space,force->bond->datamask_read); @@ -285,7 +283,9 @@ void VerletKokkos::setup_minimal(int flag) if (force->newton) comm->reverse_comm(); + lmp->kokkos->auto_sync = 0; modify->setup(vflag); + lmp->kokkos->auto_sync = 1; update->setupflag = 0; } diff --git a/src/MOLECULE/dihedral_harmonic.cpp b/src/MOLECULE/dihedral_harmonic.cpp index f1e5811a84..4f6a88eca5 100644 --- a/src/MOLECULE/dihedral_harmonic.cpp +++ b/src/MOLECULE/dihedral_harmonic.cpp @@ -42,7 +42,7 @@ DihedralHarmonic::DihedralHarmonic(LAMMPS *lmp) : Dihedral(lmp) DihedralHarmonic::~DihedralHarmonic() { - if (allocated) { + if (allocated && !copymode) { memory->destroy(setflag); memory->destroy(k); memory->destroy(sign); diff --git a/src/MOLECULE/dihedral_harmonic.h b/src/MOLECULE/dihedral_harmonic.h index 938e59918b..6792d16a20 100644 --- a/src/MOLECULE/dihedral_harmonic.h +++ b/src/MOLECULE/dihedral_harmonic.h @@ -29,16 +29,16 @@ class DihedralHarmonic : public Dihedral { DihedralHarmonic(class LAMMPS *); virtual ~DihedralHarmonic(); virtual void compute(int, int); - void coeff(int, char **); + virtual void coeff(int, char **); void write_restart(FILE *); - void read_restart(FILE *); + virtual void read_restart(FILE *); void write_data(FILE *); protected: double *k,*cos_shift,*sin_shift; int *sign,*multiplicity; - void allocate(); + virtual void allocate(); }; } diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp index 8a51f6d00e..b2fbb74071 100644 --- a/src/REPLICA/tad.cpp +++ b/src/REPLICA/tad.cpp @@ -197,7 +197,7 @@ void TAD::command(int narg, char **arg) args = new char*[narg2]; args[0] = min_style; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); delete [] args; @@ -711,7 +711,7 @@ void TAD::perform_neb(int ievent) args = new char*[narg2]; args[0] = min_style_neb; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); delete [] args; @@ -777,7 +777,7 @@ void TAD::perform_neb(int ievent) args = new char*[narg2]; args[0] = min_style; - update->create_minimize(narg2,args); + update->create_minimize(narg2,args,1); update->etol = etol; update->ftol = ftol; diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index 912f529403..3e6130ac7c 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -34,25 +34,12 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; - -enum{SINGLE,MOLECULE,GROUP}; -enum{NONE,XYZ,XY,YZ,XZ}; -enum{ISO,ANISO,TRICLINIC}; - -#define MAXLINE 1024 -#define CHUNK 1024 -#define ATTRIBUTE_PERBODY 20 - -#define TOLERANCE 1.0e-6 -#define EPSILON 1.0e-7 - -#define SINERTIA 0.4 // moment of inertia prefactor for sphere -#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid -#define LINERTIA (1.0/12.0) // moment of inertia prefactor for line segment +using namespace RigidConst; /* ---------------------------------------------------------------------- */ @@ -605,21 +592,6 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : for (ibody = 0; ibody < nbody; ibody++) if (nrigid[ibody] <= 1) error->all(FLERR,"One or zero atoms in rigid body"); - // bitmasks for properties of extended particles - - POINT = 1; - SPHERE = 2; - ELLIPSOID = 4; - LINE = 8; - TRIANGLE = 16; - DIPOLE = 32; - OMEGA = 64; - ANGMOM = 128; - TORQUE = 256; - - MINUSPI = -MY_PI; - TWOPI = 2.0*MY_PI; - // wait to setup bodies until first init() using current atom properties setupflag = 0; @@ -1472,8 +1444,8 @@ void FixRigid::set_xv() if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]); else theta_body = -2.0*acos(quat[ibody][0]); theta = orient[i][0] + theta_body; - while (theta <= MINUSPI) theta += TWOPI; - while (theta > MY_PI) theta -= TWOPI; + while (theta <= -MY_PI) theta += MY_2PI; + while (theta > MY_PI) theta -= MY_2PI; lbonus[line[i]].theta = theta; omega_one[i][0] = omega[ibody][0]; omega_one[i][1] = omega[ibody][1]; @@ -2018,8 +1990,8 @@ void FixRigid::setup_bodies_static() if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]); else theta_body = -2.0*acos(quat[ibody][0]); orient[i][0] = lbonus[line[i]].theta - theta_body; - while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI; - while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI; + while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI; + while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI; if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0; } else if (eflags[i] & TRIANGLE) { quatatom = tbonus[tri[i]].quat; diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h index d9d7b07ce8..8c4ab9ed49 100644 --- a/src/RIGID/fix_rigid.h +++ b/src/RIGID/fix_rigid.h @@ -65,7 +65,6 @@ class FixRigid : public Fix { double dtv,dtf,dtq; double *step_respa; int triclinic; - double MINUSPI,TWOPI; char *inpfile; // file to read rigid body attributes from int rstyle; // SINGLE,MOLECULE,GROUP @@ -137,9 +136,6 @@ class FixRigid : public Fix { class AtomVecLine *avec_line; class AtomVecTri *avec_tri; - int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE; // bitmasks for eflags - int OMEGA,ANGMOM,TORQUE; - void image_shift(); void set_xv(); void set_v(); diff --git a/src/RIGID/fix_rigid_nh.cpp b/src/RIGID/fix_rigid_nh.cpp index 4738e253f6..6ffb997ffa 100644 --- a/src/RIGID/fix_rigid_nh.cpp +++ b/src/RIGID/fix_rigid_nh.cpp @@ -33,14 +33,11 @@ #include "kspace.h" #include "memory.h" #include "error.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; - -enum{NONE,XYZ,XY,YZ,XZ}; // same as in FixRigid -enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid - -#define EPSILON 1.0e-7 +using namespace RigidConst; /* ---------------------------------------------------------------------- */ diff --git a/src/RIGID/fix_rigid_nh_small.cpp b/src/RIGID/fix_rigid_nh_small.cpp index 1545e913c0..136796ce18 100644 --- a/src/RIGID/fix_rigid_nh_small.cpp +++ b/src/RIGID/fix_rigid_nh_small.cpp @@ -34,17 +34,12 @@ #include "kspace.h" #include "memory.h" #include "error.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathExtra; - -enum{NONE,XYZ,XY,YZ,XZ}; // same as in FixRigid -enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid - -#define EPSILON 1.0e-7 - -enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; +using namespace RigidConst; /* ---------------------------------------------------------------------- */ diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 14e230ab9c..ba570d5840 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -38,34 +38,17 @@ #include "hashlittle.h" #include "memory.h" #include "error.h" +#include "rigid_const.h" #include using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; +using namespace RigidConst; #define RVOUS 1 // 0 for irregular, 1 for all2all -#define MAXLINE 1024 -#define CHUNK 1024 -#define ATTRIBUTE_PERBODY 20 - -#define TOLERANCE 1.0e-6 -#define EPSILON 1.0e-7 -#define BIG 1.0e20 - -#define SINERTIA 0.4 // moment of inertia prefactor for sphere -#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid -#define LINERTIA (1.0/12.0) // moment of inertia prefactor for line segment - -#define DELTA_BODY 10000 - -enum{NONE,XYZ,XY,YZ,XZ}; // same as in FixRigid -enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid - -enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; - /* ---------------------------------------------------------------------- */ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : @@ -455,21 +438,6 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : comm_forward = 1 + bodysize; comm_reverse = 6; - // bitmasks for properties of extended particles - - POINT = 1; - SPHERE = 2; - ELLIPSOID = 4; - LINE = 8; - TRIANGLE = 16; - DIPOLE = 32; - OMEGA = 64; - ANGMOM = 128; - TORQUE = 256; - - MINUSPI = -MY_PI; - TWOPI = 2.0*MY_PI; - // atom style pointers to particles that store extra info avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid"); @@ -1384,8 +1352,8 @@ void FixRigidSmall::set_xv() if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]); else theta_body = -2.0*acos(b->quat[0]); theta = orient[i][0] + theta_body; - while (theta <= MINUSPI) theta += TWOPI; - while (theta > MY_PI) theta -= TWOPI; + while (theta <= -MY_PI) theta += MY_2PI; + while (theta > MY_PI) theta -= MY_2PI; lbonus[line[i]].theta = theta; omega[i][0] = b->omega[0]; omega[i][1] = b->omega[1]; @@ -2155,8 +2123,8 @@ void FixRigidSmall::setup_bodies_static() if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]); else theta_body = -2.0*acos(b->quat[0]); orient[i][0] = lbonus[line[i]].theta - theta_body; - while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI; - while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI; + while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI; + while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI; if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0; } else if (eflags[i] & TRIANGLE) { quatatom = tbonus[tri[i]].quat; diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 6dae443d1c..0d1cdb5963 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -72,7 +72,6 @@ class FixRigidSmall : public Fix { double dtv,dtf,dtq; double *step_respa; int triclinic; - double MINUSPI,TWOPI; char *inpfile; // file to read rigid body attributes from int setupflag; // 1 if body properties are setup, else 0 @@ -129,9 +128,6 @@ class FixRigidSmall : public Fix { int dorientflag; // 1 if particles store dipole orientation int reinitflag; // 1 if re-initialize rigid bodies between runs - int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE; // bitmasks for eflags - int OMEGA,ANGMOM,TORQUE; - class AtomVecEllipsoid *avec_ellipsoid; class AtomVecLine *avec_line; class AtomVecTri *avec_tri; diff --git a/src/RIGID/rigid_const.h b/src/RIGID/rigid_const.h new file mode 100644 index 0000000000..14db517fcd --- /dev/null +++ b/src/RIGID/rigid_const.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_RIGID_CONST_H +#define LMP_RIGID_CONST_H + +namespace LAMMPS_NS { + namespace RigidConst { + + enum{SINGLE,MOLECULE,GROUP}; + enum{NONE,XYZ,XY,YZ,XZ}; + enum{ISO,ANISO,TRICLINIC}; + enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; + + enum {POINT = 1<<0, + SPHERE = 1<<1, + ELLIPSOID = 1<<2, + LINE = 1<<3, + TRIANGLE = 1<<4, + DIPOLE = 1<<5, + OMEGA = 1<<6, + ANGMOM = 1<<7, + TORQUE = 1<<8 + }; + + static const double TOLERANCE = 1.0e-6; + static const double EPSILON = 1.0e-7; + static const double BIG = 1.0e20; + + // moment of inertia prefactor for sphere + static const double SINERTIA = 0.4; + // moment of inertia prefactor for ellipsoid + static const double EINERTIA = 0.2; + // moment of inertia prefactor for line segment + static const double LINERTIA = 1.0/12.0; + + static const int MAXLINE = 1024; + static const int CHUNK = 1024; + static const int DELTA_BODY = 10000; + static const int ATTRIBUTE_PERBODY = 20; + } +} + +#endif diff --git a/src/USER-OMP/fix_rigid_nh_omp.cpp b/src/USER-OMP/fix_rigid_nh_omp.cpp index 74b2a92775..866021b3f4 100644 --- a/src/USER-OMP/fix_rigid_nh_omp.cpp +++ b/src/USER-OMP/fix_rigid_nh_omp.cpp @@ -38,15 +38,12 @@ #include "math_extra.h" #include "math_const.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; - -enum{SINGLE,MOLECULE,GROUP}; // same as in FixRigid -enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid - -#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid +using namespace RigidConst; typedef struct { double x,y,z; } dbl3_t; @@ -770,8 +767,8 @@ void FixRigidNHOMP::set_xv_thr() if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]); else theta_body = -2.0*acos(quat[ibody][0]); theta = orient[i][0] + theta_body; - while (theta <= MINUSPI) theta += TWOPI; - while (theta > MY_PI) theta -= TWOPI; + while (theta <= -MY_PI) theta += MY_2PI; + while (theta > MY_PI) theta -= MY_2PI; lbonus[line[i]].theta = theta; omega_one[i][0] = omega[ibody][0]; omega_one[i][1] = omega[ibody][1]; diff --git a/src/USER-OMP/fix_rigid_omp.cpp b/src/USER-OMP/fix_rigid_omp.cpp index b807ddba7c..3baa9de20a 100644 --- a/src/USER-OMP/fix_rigid_omp.cpp +++ b/src/USER-OMP/fix_rigid_omp.cpp @@ -33,14 +33,12 @@ #include "math_extra.h" #include "math_const.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; - -enum{SINGLE,MOLECULE,GROUP}; // same as in FixRigid - -#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid +using namespace RigidConst; typedef struct { double x,y,z; } dbl3_t; @@ -488,8 +486,8 @@ void FixRigidOMP::set_xv_thr() if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]); else theta_body = -2.0*acos(quat[ibody][0]); theta = orient[i][0] + theta_body; - while (theta <= MINUSPI) theta += TWOPI; - while (theta > MY_PI) theta -= TWOPI; + while (theta <= -MY_PI) theta += MY_2PI; + while (theta > MY_PI) theta -= MY_2PI; lbonus[line[i]].theta = theta; omega_one[i][0] = omega[ibody][0]; omega_one[i][1] = omega[ibody][1]; diff --git a/src/USER-OMP/fix_rigid_small_omp.cpp b/src/USER-OMP/fix_rigid_small_omp.cpp index 41f6c53e40..523bd4df07 100644 --- a/src/USER-OMP/fix_rigid_small_omp.cpp +++ b/src/USER-OMP/fix_rigid_small_omp.cpp @@ -31,14 +31,12 @@ #include "math_extra.h" #include "math_const.h" +#include "rigid_const.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; - -#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid - -enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; +using namespace RigidConst; typedef struct { double x,y,z; } dbl3_t; @@ -427,8 +425,8 @@ void FixRigidSmallOMP::set_xv_thr() if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]); else theta_body = -2.0*acos(b.quat[0]); theta = orient[i][0] + theta_body; - while (theta <= MINUSPI) theta += TWOPI; - while (theta > MY_PI) theta -= TWOPI; + while (theta <= -MY_PI) theta += MY_2PI; + while (theta > MY_PI) theta -= MY_2PI; lbonus[line[i]].theta = theta; omega[i][0] = b.omega[0]; omega[i][1] = b.omega[1]; diff --git a/src/compute_bond_local.cpp b/src/compute_bond_local.cpp index d579772384..e14f188e62 100644 --- a/src/compute_bond_local.cpp +++ b/src/compute_bond_local.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; #define DELTA 10000 #define EPSILON 1.0e-12 -enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,VARIABLE}; +enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,FX,FY,FZ,VARIABLE}; /* ---------------------------------------------------------------------- */ @@ -64,6 +64,9 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) : if (strcmp(arg[iarg],"dist") == 0) bstyle[nvalues++] = DIST; else if (strcmp(arg[iarg],"engpot") == 0) bstyle[nvalues++] = ENGPOT; else if (strcmp(arg[iarg],"force") == 0) bstyle[nvalues++] = FORCE; + else if (strcmp(arg[iarg],"fx") == 0) bstyle[nvalues++] = FX; + else if (strcmp(arg[iarg],"fy") == 0) bstyle[nvalues++] = FY; + else if (strcmp(arg[iarg],"fz") == 0) bstyle[nvalues++] = FZ; else if (strcmp(arg[iarg],"engvib") == 0) bstyle[nvalues++] = ENGVIB; else if (strcmp(arg[iarg],"engrot") == 0) bstyle[nvalues++] = ENGROT; else if (strcmp(arg[iarg],"engtrans") == 0) bstyle[nvalues++] = ENGTRANS; @@ -127,7 +130,8 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) : singleflag = 0; velflag = 0; for (int i = 0; i < nvalues; i++) { - if (bstyle[i] == ENGPOT || bstyle[i] == FORCE) singleflag = 1; + if (bstyle[i] == ENGPOT || bstyle[i] == FORCE || bstyle[i] == FX || + bstyle[i] == FY || bstyle[i] == FZ) singleflag = 1; if (bstyle[i] == VELVIB || bstyle[i] == OMEGA || bstyle[i] == ENGTRANS || bstyle[i] == ENGVIB || bstyle[i] == ENGROT) velflag = 1; } @@ -393,6 +397,15 @@ int ComputeBondLocal::compute_bonds(int flag) case FORCE: ptr[n] = sqrt(rsq)*fbond; break; + case FX: + ptr[n] = dx*fbond; + break; + case FY: + ptr[n] = dy*fbond; + break; + case FZ: + ptr[n] = dz*fbond; + break; case ENGVIB: ptr[n] = engvib; break; diff --git a/src/compute_orientorder_atom.cpp b/src/compute_orientorder_atom.cpp index 266df575f9..0a78356127 100644 --- a/src/compute_orientorder_atom.cpp +++ b/src/compute_orientorder_atom.cpp @@ -43,12 +43,14 @@ using namespace std; #define MY_EPSILON (10.0*2.220446049250313e-16) #endif +#define QEPSILON 1.0e-6 + /* ---------------------------------------------------------------------- */ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg), qlist(NULL), distsq(NULL), nearest(NULL), rlist(NULL), - qnarray(NULL), qnm_r(NULL), qnm_i(NULL) + qnarray(NULL), qnm_r(NULL), qnm_i(NULL), cglist(NULL) { if (narg < 3 ) error->all(FLERR,"Illegal compute orientorder/atom command"); @@ -56,6 +58,8 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg nnn = 12; cutsq = 0.0; + wlflag = 0; + wlhatflag = 0; qlcompflag = 0; // specify which orders to request @@ -96,27 +100,39 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg if (iarg+nqlist > narg) error->all(FLERR,"Illegal compute orientorder/atom command"); qmax = 0; - for (int iw = 0; iw < nqlist; iw++) { - qlist[iw] = force->numeric(FLERR,arg[iarg+iw]); - if (qlist[iw] < 0) + for (int il = 0; il < nqlist; il++) { + qlist[il] = force->numeric(FLERR,arg[iarg+il]); + if (qlist[il] < 0) error->all(FLERR,"Illegal compute orientorder/atom command"); - if (qlist[iw] > qmax) qmax = qlist[iw]; + if (qlist[il] > qmax) qmax = qlist[il]; } iarg += nqlist; + } else if (strcmp(arg[iarg],"wl") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute orientorder/atom command"); + if (strcmp(arg[iarg+1],"yes") == 0) wlflag = 1; + else if (strcmp(arg[iarg+1],"no") == 0) wlflag = 0; + else error->all(FLERR,"Illegal compute orientorder/atom command"); + iarg += 2; + } else if (strcmp(arg[iarg],"wl/hat") == 0) { + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute orientorder/atom command"); + if (strcmp(arg[iarg+1],"yes") == 0) wlhatflag = 1; + else if (strcmp(arg[iarg+1],"no") == 0) wlhatflag = 0; + else error->all(FLERR,"Illegal compute orientorder/atom command"); + iarg += 2; } else if (strcmp(arg[iarg],"components") == 0) { qlcompflag = 1; if (iarg+2 > narg) error->all(FLERR,"Illegal compute orientorder/atom command"); qlcomp = force->numeric(FLERR,arg[iarg+1]); - if (qlcomp <= 0) - error->all(FLERR,"Illegal compute orientorder/atom command"); iqlcomp = -1; - for (int iw = 0; iw < nqlist; iw++) - if (qlcomp == qlist[iw]) { - iqlcomp = iw; + for (int il = 0; il < nqlist; il++) + if (qlcomp == qlist[il]) { + iqlcomp = il; break; } - if (iqlcomp < 0) + if (iqlcomp == -1) error->all(FLERR,"Illegal compute orientorder/atom command"); iarg += 2; } else if (strcmp(arg[iarg],"cutoff") == 0) { @@ -130,8 +146,10 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg } else error->all(FLERR,"Illegal compute orientorder/atom command"); } - if (qlcompflag) ncol = nqlist + 2*(2*qlcomp+1); - else ncol = nqlist; + ncol = nqlist; + if (wlflag) ncol += nqlist; + if (wlhatflag) ncol += nqlist; + if (qlcompflag) ncol += 2*(2*qlcomp+1); peratom_flag = 1; size_peratom_cols = ncol; @@ -151,7 +169,7 @@ ComputeOrientOrderAtom::~ComputeOrientOrderAtom() memory->destroy(qlist); memory->destroy(qnm_r); memory->destroy(qnm_i); - + memory->destroy(cglist); } /* ---------------------------------------------------------------------- */ @@ -166,8 +184,8 @@ void ComputeOrientOrderAtom::init() error->all(FLERR,"Compute orientorder/atom cutoff is " "longer than pairwise cutoff"); - memory->create(qnm_r,qmax,2*qmax+1,"orientorder/atom:qnm_r"); - memory->create(qnm_i,qmax,2*qmax+1,"orientorder/atom:qnm_i"); + memory->create(qnm_r,nqlist,2*qmax+1,"orientorder/atom:qnm_r"); + memory->create(qnm_i,nqlist,2*qmax+1,"orientorder/atom:qnm_i"); // need an occasional full neighbor list @@ -183,6 +201,8 @@ void ComputeOrientOrderAtom::init() if (strcmp(modify->compute[i]->style,"orientorder/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute orientorder/atom"); + + if (wlflag || wlhatflag) init_clebsch_gordan(); } /* ---------------------------------------------------------------------- */ @@ -274,8 +294,8 @@ void ComputeOrientOrderAtom::compute_peratom() // if not nnn neighbors, order parameter = 0; if ((ncount == 0) || (ncount < nnn)) { - for (int iw = 0; iw < nqlist; iw++) - qn[iw] = 0.0; + for (int jj = 0; jj < ncol; jj++) + qn[jj] = 0.0; continue; } @@ -287,6 +307,7 @@ void ComputeOrientOrderAtom::compute_peratom() } calc_boop(rlist, ncount, qn, qlist, nqlist); + } } } @@ -403,13 +424,12 @@ void ComputeOrientOrderAtom::select3(int k, int n, double *arr, int *iarr, doubl void ComputeOrientOrderAtom::calc_boop(double **rlist, int ncount, double qn[], int qlist[], int nqlist) { - for (int iw = 0; iw < nqlist; iw++) { - int n = qlist[iw]; - qn[iw] = 0.0; - for(int m = 0; m < 2*n+1; m++) { - qnm_r[iw][m] = 0.0; - qnm_i[iw][m] = 0.0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + for(int m = 0; m < 2*l+1; m++) { + qnm_r[il][m] = 0.0; + qnm_i[il][m] = 0.0; } } @@ -433,24 +453,24 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist, expphi_i *= rxymaginv; } - for (int iw = 0; iw < nqlist; iw++) { - int n = qlist[iw]; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; - qnm_r[iw][n] += polar_prefactor(n, 0, costheta); + qnm_r[il][l] += polar_prefactor(l, 0, costheta); double expphim_r = expphi_r; double expphim_i = expphi_i; - for(int m = 1; m <= +n; m++) { - double prefactor = polar_prefactor(n, m, costheta); + for(int m = 1; m <= +l; m++) { + double prefactor = polar_prefactor(l, m, costheta); double c_r = prefactor * expphim_r; double c_i = prefactor * expphim_i; - qnm_r[iw][m+n] += c_r; - qnm_i[iw][m+n] += c_i; + qnm_r[il][m+l] += c_r; + qnm_i[il][m+l] += c_i; if(m & 1) { - qnm_r[iw][-m+n] -= c_r; - qnm_i[iw][-m+n] += c_i; + qnm_r[il][-m+l] -= c_r; + qnm_i[il][-m+l] += c_i; } else { - qnm_r[iw][-m+n] += c_r; - qnm_i[iw][-m+n] -= c_i; + qnm_r[il][-m+l] += c_r; + qnm_i[il][-m+l] -= c_i; } double tmp_r = expphim_r*expphi_r - expphim_i*expphi_i; double tmp_i = expphim_r*expphi_i + expphim_i*expphi_r; @@ -461,30 +481,110 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist, } } - double fac = sqrt(MY_4PI) / ncount; - double normfac = 0.0; - for (int iw = 0; iw < nqlist; iw++) { - int n = qlist[iw]; - double qm_sum = 0.0; - for(int m = 0; m < 2*n+1; m++) { - qm_sum += qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m]; - // printf("Ylm^2 = %d %d %g\n",n,m, - // qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m]); - } - qn[iw] = fac * sqrt(qm_sum / (2*n+1)); - if (qlcompflag && iqlcomp == iw) normfac = 1.0/sqrt(qm_sum); + // convert sums to averages + double facn = 1.0 / ncount; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + for(int m = 0; m < 2*l+1; m++) { + qnm_r[il][m] *= facn; + qnm_i[il][m] *= facn; + } } - // output of the complex vector + // calculate Q_l + // NOTE: optional W_l_hat and components of Q_qlcomp use these stored Q_l values + + int jj = 0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + double qnormfac = sqrt(MY_4PI/(2*l+1)); + double qm_sum = 0.0; + for(int m = 0; m < 2*l+1; m++) + qm_sum += qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m]; + qn[jj++] = qnormfac * sqrt(qm_sum); + } + + // TODO: + // 1. [done]Need to allocate extra memory in qnarray[] for this option + // 2. [done]Need to add keyword option + // 3. [done]Need to caclulate Clebsch-Gordan/Wigner 3j coefficients + // (Can try getting them from boop.py first) + // 5. [done]Compare to bcc values in /Users/athomps/netapp/codes/MatMiner/matminer/matminer/featurizers/boop.py + // 6. [done]I get the right answer for W_l, but need to make sure that factor of 1/sqrt(l+1) is right for cglist + // 7. Add documentation + // 8. [done] run valgrind + // 9. [done] Add Wlhat + // 10. Update memory_usage() + // 11. Add exact FCC values for W_4, W_4_hat + + // calculate W_l + + if (wlflag) { + int idxcg_count = 0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + double wlsum = 0.0; + for(int m1 = 0; m1 < 2*l+1; m1++) { + for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) { + int m = m1 + m2 - l; + double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2]; + double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2]; + wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count]; + idxcg_count++; + } + } + qn[jj++] = wlsum/sqrt(2*l+1); + } + } + + // calculate W_l_hat + + if (wlhatflag) { + int idxcg_count = 0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + double wlsum = 0.0; + for(int m1 = 0; m1 < 2*l+1; m1++) { + for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) { + int m = m1 + m2 - l; + double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2]; + double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2]; + wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count]; + idxcg_count++; + } + } + // Whats = [w/(q/np.sqrt(np.pi * 4 / (2 * l + 1)))**3 if abs(q) > 1.0e-6 else 0.0 for l,q,w in zip(range(1,max_l+1),Qs,Ws)] + if (qn[il] < QEPSILON) + qn[jj++] = 0.0; + else { + double qnormfac = sqrt(MY_4PI/(2*l+1)); + double qnfac = qnormfac/qn[il]; + qn[jj++] = wlsum/sqrt(2*l+1)*(qnfac*qnfac*qnfac); + } + } + } + + // Calculate components of Q_l, for l=qlcomp if (qlcompflag) { - int j = nqlist; - for(int m = 0; m < 2*qlcomp+1; m++) { - qn[j++] = qnm_r[iqlcomp][m] * normfac; - qn[j++] = qnm_i[iqlcomp][m] * normfac; + int il = iqlcomp; + int l = qlcomp; + if (qn[il] < QEPSILON) + for(int m = 0; m < 2*l+1; m++) { + qn[jj++] = 0.0; + qn[jj++] = 0.0; + } + else { + double qnormfac = sqrt(MY_4PI/(2*l+1)); + double qnfac = qnormfac/qn[il]; + for(int m = 0; m < 2*l+1; m++) { + qn[jj++] = qnm_r[il][m] * qnfac; + qn[jj++] = qnm_i[il][m] * qnfac; + } } } + } /* ---------------------------------------------------------------------- @@ -542,3 +642,258 @@ double ComputeOrientOrderAtom::associated_legendre(int l, int m, double x) return p; } + +/* ---------------------------------------------------------------------- + assign Clebsch-Gordan coefficients + using the quasi-binomial formula VMK 8.2.1(3) + specialized for case j1=j2=j=l +------------------------------------------------------------------------- */ + +void ComputeOrientOrderAtom::init_clebsch_gordan() +{ + double sum,dcg,sfaccg, sfac1, sfac2; + int m, aa2, bb2, cc2; + int ifac, idxcg_count; + + idxcg_count = 0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + for(int m1 = 0; m1 < 2*l+1; m1++) + for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) + idxcg_count++; + } + idxcg_max = idxcg_count; + memory->create(cglist, idxcg_max, "computeorientorderatom:cglist"); + + idxcg_count = 0; + for (int il = 0; il < nqlist; il++) { + int l = qlist[il]; + for(int m1 = 0; m1 < 2*l+1; m1++) { + aa2 = m1 - l; + for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) { + bb2 = m2 - l; + m = aa2 + bb2 + l; + + sum = 0.0; + for (int z = MAX(0, MAX(-aa2, bb2)); + z <= MIN(l, MIN(l - aa2, l + bb2)); z++) { + ifac = z % 2 ? -1 : 1; + sum += ifac / + (factorial(z) * + factorial(l - z) * + factorial(l - aa2 - z) * + factorial(l + bb2 - z) * + factorial(aa2 + z) * + factorial(-bb2 + z)); + } + + cc2 = m - l; + sfaccg = sqrt(factorial(l + aa2) * + factorial(l - aa2) * + factorial(l + bb2) * + factorial(l - bb2) * + factorial(l + cc2) * + factorial(l - cc2) * + (2*l + 1)); + + sfac1 = factorial(3*l + 1); + sfac2 = factorial(l); + dcg = sqrt(sfac2*sfac2*sfac2 / sfac1); + + cglist[idxcg_count] = sum * dcg * sfaccg; + idxcg_count++; + } + } + } +} + +/* ---------------------------------------------------------------------- + factorial n, wrapper for precomputed table +------------------------------------------------------------------------- */ + +double ComputeOrientOrderAtom::factorial(int n) +{ + if (n < 0 || n > nmaxfactorial) { + char str[128]; + sprintf(str, "Invalid argument to factorial %d", n); + error->all(FLERR, str); + } + + return nfac_table[n]; +} + +/* ---------------------------------------------------------------------- + factorial n table, size SNA::nmaxfactorial+1 +------------------------------------------------------------------------- */ + +const double ComputeOrientOrderAtom::nfac_table[] = { + 1, + 1, + 2, + 6, + 24, + 120, + 720, + 5040, + 40320, + 362880, + 3628800, + 39916800, + 479001600, + 6227020800, + 87178291200, + 1307674368000, + 20922789888000, + 355687428096000, + 6.402373705728e+15, + 1.21645100408832e+17, + 2.43290200817664e+18, + 5.10909421717094e+19, + 1.12400072777761e+21, + 2.5852016738885e+22, + 6.20448401733239e+23, + 1.5511210043331e+25, + 4.03291461126606e+26, + 1.08888694504184e+28, + 3.04888344611714e+29, + 8.8417619937397e+30, + 2.65252859812191e+32, + 8.22283865417792e+33, + 2.63130836933694e+35, + 8.68331761881189e+36, + 2.95232799039604e+38, + 1.03331479663861e+40, + 3.71993326789901e+41, + 1.37637530912263e+43, + 5.23022617466601e+44, + 2.03978820811974e+46, + 8.15915283247898e+47, + 3.34525266131638e+49, + 1.40500611775288e+51, + 6.04152630633738e+52, + 2.65827157478845e+54, + 1.1962222086548e+56, + 5.50262215981209e+57, + 2.58623241511168e+59, + 1.24139155925361e+61, + 6.08281864034268e+62, + 3.04140932017134e+64, + 1.55111875328738e+66, + 8.06581751709439e+67, + 4.27488328406003e+69, + 2.30843697339241e+71, + 1.26964033536583e+73, + 7.10998587804863e+74, + 4.05269195048772e+76, + 2.35056133128288e+78, + 1.3868311854569e+80, + 8.32098711274139e+81, + 5.07580213877225e+83, + 3.14699732603879e+85, + 1.98260831540444e+87, + 1.26886932185884e+89, + 8.24765059208247e+90, + 5.44344939077443e+92, + 3.64711109181887e+94, + 2.48003554243683e+96, + 1.71122452428141e+98, + 1.19785716699699e+100, + 8.50478588567862e+101, + 6.12344583768861e+103, + 4.47011546151268e+105, + 3.30788544151939e+107, + 2.48091408113954e+109, + 1.88549470166605e+111, + 1.45183092028286e+113, + 1.13242811782063e+115, + 8.94618213078297e+116, + 7.15694570462638e+118, + 5.79712602074737e+120, + 4.75364333701284e+122, + 3.94552396972066e+124, + 3.31424013456535e+126, + 2.81710411438055e+128, + 2.42270953836727e+130, + 2.10775729837953e+132, + 1.85482642257398e+134, + 1.65079551609085e+136, + 1.48571596448176e+138, + 1.3520015276784e+140, + 1.24384140546413e+142, + 1.15677250708164e+144, + 1.08736615665674e+146, + 1.03299784882391e+148, + 9.91677934870949e+149, + 9.61927596824821e+151, + 9.42689044888324e+153, + 9.33262154439441e+155, + 9.33262154439441e+157, + 9.42594775983835e+159, + 9.61446671503512e+161, + 9.90290071648618e+163, + 1.02990167451456e+166, + 1.08139675824029e+168, + 1.14628056373471e+170, + 1.22652020319614e+172, + 1.32464181945183e+174, + 1.44385958320249e+176, + 1.58824554152274e+178, + 1.76295255109024e+180, + 1.97450685722107e+182, + 2.23119274865981e+184, + 2.54355973347219e+186, + 2.92509369349301e+188, + 3.3931086844519e+190, + 3.96993716080872e+192, + 4.68452584975429e+194, + 5.5745857612076e+196, + 6.68950291344912e+198, + 8.09429852527344e+200, + 9.8750442008336e+202, + 1.21463043670253e+205, + 1.50614174151114e+207, + 1.88267717688893e+209, + 2.37217324288005e+211, + 3.01266001845766e+213, + 3.8562048236258e+215, + 4.97450422247729e+217, + 6.46685548922047e+219, + 8.47158069087882e+221, + 1.118248651196e+224, + 1.48727070609069e+226, + 1.99294274616152e+228, + 2.69047270731805e+230, + 3.65904288195255e+232, + 5.01288874827499e+234, + 6.91778647261949e+236, + 9.61572319694109e+238, + 1.34620124757175e+241, + 1.89814375907617e+243, + 2.69536413788816e+245, + 3.85437071718007e+247, + 5.5502938327393e+249, + 8.04792605747199e+251, + 1.17499720439091e+254, + 1.72724589045464e+256, + 2.55632391787286e+258, + 3.80892263763057e+260, + 5.71338395644585e+262, + 8.62720977423323e+264, + 1.31133588568345e+267, + 2.00634390509568e+269, + 3.08976961384735e+271, + 4.78914290146339e+273, + 7.47106292628289e+275, + 1.17295687942641e+278, + 1.85327186949373e+280, + 2.94670227249504e+282, + 4.71472363599206e+284, + 7.59070505394721e+286, + 1.22969421873945e+289, + 2.0044015765453e+291, + 3.28721858553429e+293, + 5.42391066613159e+295, + 9.00369170577843e+297, + 1.503616514865e+300, // nmaxfactorial = 167 +}; + diff --git a/src/compute_orientorder_atom.h b/src/compute_orientorder_atom.h index d5905df63b..643875ccd0 100644 --- a/src/compute_orientorder_atom.h +++ b/src/compute_orientorder_atom.h @@ -33,7 +33,7 @@ class ComputeOrientOrderAtom : public Compute { void compute_peratom(); double memory_usage(); double cutsq; - int iqlcomp, qlcomp, qlcompflag; + int iqlcomp, qlcomp, qlcompflag, wlflag, wlhatflag; int *qlist; int nqlist; @@ -55,6 +55,13 @@ class ComputeOrientOrderAtom : public Compute { double polar_prefactor(int, int, double); double associated_legendre(int, int, double); + + static const int nmaxfactorial = 167; + static const double nfac_table[]; + double factorial(int); + void init_clebsch_gordan(); + double *cglist; // Clebsch-Gordan coeffs + int idxcg_max; }; } diff --git a/src/compute_pe.cpp b/src/compute_pe.cpp index fd7b74b43a..379de0f9de 100644 --- a/src/compute_pe.cpp +++ b/src/compute_pe.cpp @@ -26,6 +26,7 @@ #include "modify.h" #include "domain.h" #include "error.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -65,6 +66,9 @@ ComputePE::ComputePE(LAMMPS *lmp, int narg, char **arg) : iarg++; } } + + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_minimize.cpp b/src/fix_minimize.cpp index df2dfb02a7..93b13fac49 100644 --- a/src/fix_minimize.cpp +++ b/src/fix_minimize.cpp @@ -42,8 +42,10 @@ FixMinimize::~FixMinimize() // delete locally stored data memory->destroy(peratom); - for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]); - memory->sfree(vectors); + if (vectors) { + for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]); + memory->sfree(vectors); + } } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_minimize.h b/src/fix_minimize.h index 7b9f571628..615eb6a095 100644 --- a/src/fix_minimize.h +++ b/src/fix_minimize.h @@ -29,22 +29,22 @@ class FixMinimize : public Fix { public: FixMinimize(class LAMMPS *, int, char **); - ~FixMinimize(); + virtual ~FixMinimize(); int setmask(); - void init() {} + virtual void init() {} double memory_usage(); - void grow_arrays(int); - void copy_arrays(int, int, int); - int pack_exchange(int, double *); - int unpack_exchange(int, double *); + virtual void grow_arrays(int); + virtual void copy_arrays(int, int, int); + virtual int pack_exchange(int, double *); + virtual int unpack_exchange(int, double *); - void add_vector(int); + virtual void add_vector(int); double *request_vector(int); void store_box(); void reset_coords(); - private: + protected: int nvector; int *peratom; double **vectors; diff --git a/src/input.cpp b/src/input.cpp index 6871ee23f5..6adf37e847 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1672,7 +1672,7 @@ void Input::min_style() { if (domain->box_exist == 0) error->all(FLERR,"Min_style command before simulation box is defined"); - update->create_minimize(narg,arg); + update->create_minimize(narg,arg,1); } /* ---------------------------------------------------------------------- */ diff --git a/src/min.cpp b/src/min.cpp index 408954edb8..b6bed01c75 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -68,6 +68,8 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp) requestor = NULL; external_force_clear = 0; + + kokkosable = 0; } /* ---------------------------------------------------------------------- */ @@ -93,6 +95,10 @@ Min::~Min() void Min::init() { + if (lmp->kokkos && !kokkosable) + error->all(FLERR,"Must use a Kokkos-enabled min style (e.g. min_style cg/kk) " + "with Kokkos minimize"); + // create fix needed for storing atom-based quantities // will delete it at end of run diff --git a/src/min.h b/src/min.h index a63254231c..835e6b3ed5 100644 --- a/src/min.h +++ b/src/min.h @@ -31,16 +31,16 @@ class Min : protected Pointers { Min(class LAMMPS *); virtual ~Min(); virtual void init(); - void setup(int flag=1); - void setup_minimal(int); - void run(int); + virtual void setup(int flag=1); + virtual void setup_minimal(int); + virtual void run(int); void cleanup(); int request(class Pair *, int, double); virtual bigint memory_usage() {return 0;} void modify_params(int, char **); virtual int modify_param(int, char **) {return 0;} - double fnorm_sqr(); - double fnorm_inf(); + virtual double fnorm_sqr(); + virtual double fnorm_inf(); virtual void init_style() {} virtual void setup_style() = 0; @@ -97,10 +97,12 @@ class Min : protected Pointers { double *extra_max; // max allowed change per iter for atom's var class Pair **requestor; // Pair that stores/manipulates the variable + int kokkosable; // 1 if this min style supports Kokkos + int neigh_every,neigh_delay,neigh_dist_check; // neighboring params - double energy_force(int); - void force_clear(); + virtual double energy_force(int); + virtual void force_clear(); double compute_force_norm_sqr(); double compute_force_norm_inf(); diff --git a/src/minimize.cpp b/src/minimize.cpp index e1af924019..94fdcd8681 100644 --- a/src/minimize.cpp +++ b/src/minimize.cpp @@ -52,9 +52,6 @@ void Minimize::command(int narg, char **arg) if (update->laststep < 0) error->all(FLERR,"Too many iterations"); - if (lmp->kokkos) - error->all(FLERR,"Cannot yet use minimize with Kokkos"); - lmp->init(); timer->init_timeout(); update->minimize->setup(); diff --git a/src/update.cpp b/src/update.cpp index 8a3e6f0d1d..68a6a1a057 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -78,7 +78,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp) create_integrate(1,&str,1); str = (char *) "cg"; - create_minimize(1,&str); + create_minimize(1,&str,1); } /* ---------------------------------------------------------------------- */ @@ -376,22 +376,69 @@ Integrate *Update::integrate_creator(LAMMPS *lmp, int narg, char ** arg) /* ---------------------------------------------------------------------- */ -void Update::create_minimize(int narg, char **arg) +void Update::create_minimize(int narg, char **arg, int trysuffix) { - if (narg != 1) error->all(FLERR,"Illegal min_style command"); + if (narg < 1) error->all(FLERR,"Illegal run_style command"); delete [] minimize_style; delete minimize; - if (minimize_map->find(arg[0]) != minimize_map->end()) { - MinimizeCreator minimize_creator = (*minimize_map)[arg[0]]; - minimize = minimize_creator(lmp); - } - else error->all(FLERR,"Illegal min_style command"); + int sflag; + new_minimize(arg[0],narg-1,&arg[1],trysuffix,sflag); - int n = strlen(arg[0]) + 1; - minimize_style = new char[n]; - strcpy(minimize_style,arg[0]); + if (sflag) { + char estyle[256]; + if (sflag == 1) snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix); + else snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2); + int n = strlen(estyle) + 1; + minimize_style = new char[n]; + strcpy(minimize_style,estyle); + } else { + int n = strlen(arg[0]) + 1; + minimize_style = new char[n]; + strcpy(minimize_style,arg[0]); + } +} + +/* ---------------------------------------------------------------------- + create the Minimize style, first with suffix appended +------------------------------------------------------------------------- */ + +void Update::new_minimize(char *style, int narg, char **arg, + int trysuffix, int &sflag) +{ + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + snprintf(estyle,256,"%s/%s",style,lmp->suffix); + if (minimize_map->find(estyle) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[estyle]; + minimize = minimize_creator(lmp); + return; + } + } + + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); + if (minimize_map->find(estyle) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[estyle]; + minimize = minimize_creator(lmp); + return; + } + } + } + + sflag = 0; + if (minimize_map->find(style) != minimize_map->end()) { + MinimizeCreator minimize_creator = (*minimize_map)[style]; + minimize = minimize_creator(lmp); + return; + } + + error->all(FLERR,"Illegal minimize style"); } /* ---------------------------------------------------------------------- diff --git a/src/update.h b/src/update.h index d3602ef21e..e70325a498 100644 --- a/src/update.h +++ b/src/update.h @@ -63,7 +63,7 @@ class Update : protected Pointers { void init(); void set_units(const char *); void create_integrate(int, char **, int); - void create_minimize(int, char **); + void create_minimize(int, char **, int); void reset_timestep(int, char **); void reset_timestep(bigint); void update_time(); @@ -71,6 +71,7 @@ class Update : protected Pointers { private: void new_integrate(char *, int, char **, int, int &); + void new_minimize(char *, int, char **, int, int &); template static Integrate *integrate_creator(LAMMPS *, int, char **); template static Min *minimize_creator(LAMMPS *);