diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1a7a35e61f..86418574ce 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -101,7 +101,8 @@ src/group.* @sjplimp src/improper.* @sjplimp src/info.* @akohlmey src/kspace.* @sjplimp -src/lmptyp.h @sjplimp +src/lmptype.h @sjplimp +src/label_map.* @jrgissing @akohlmey src/library.* @sjplimp @akohlmey src/main.cpp @sjplimp src/min_*.* @sjplimp diff --git a/.github/release_steps.md b/.github/release_steps.md new file mode 100644 index 0000000000..bcd29fd818 --- /dev/null +++ b/.github/release_steps.md @@ -0,0 +1,58 @@ +# LAMMPS Release Steps + +The following notes chronicle the current steps for preparing and publishing LAMMPS releases. +For definition of what LAMMPS versions and the different kinds of releases mean, please +refer to [the corresponding section in the LAMMPS manual](https://docs.lammps.org/Manual_version.html). + +## LAMMPS Feature Release + +A LAMMPS feature release is currently prepared after about 500 to 750 commits to the +'develop' branch or after a period of four weeks up to two months. + +### Preparing a 'next\_release' branch + +Create a 'next\_release' branch off 'develop' and make the following changes: +- set the LAMMPS\_VERSION define to the planned release date in src/version.h in the format "D Mmm YYYY" or "DD Mmm YYYY" +- remove the LAMMPS\_UPDATE define in src/version.h +- update the release date in doc/lammps.1 +- update all TBD arguments for ..versionadded::, ..versionchanged:: ..deprecated:: to the + planned release date in the format "DMmmYYYY" or "DDMmmYYYY" + +Submit this pull request, rebase if needed. This is the last pull request merged for the release +and should not contain any other changes. (Exceptions: this document, last minute trivial(!) changes). +This PR shall not be merged before **all** pending tests have completed and cleared. If needed, a +bugfix pull request should be created and merged to clear all tests. + +## LAMMPS Stable Release + +A LAMMPS stable release is prepared about once per year in the months July, August, or September. +One (or two, if needed) feature releases before the stable release shall contain only bug fixes +or minor feature updates in optional packages. Also substantial changes to the core of the code +shall be applied rather toward the beginning of a development cycle between two stable releases +than toward the end. The intention is to stablilize significant change to the core and have +outside users and developers try them out during the development cycle; the sooner the changes +are included, the better chances for spotting peripheral bugs and issues. + +### Prerequesites + +Before making a stable release all remaining backported bugfixes shall be released as a (final) +stable update release (see below). + +A LAMMPS stable release process starts like a feature release (see above), only that this feature +release is called a "Stable Release Candidate" and no assets are uploaded to GitHub. + +### Synchronize 'maintenance' branch with 'release' + +The state of the 'release' branch is then transferred to the 'maintenance' branch (which will +have diverged significantly from 'release' due to the selectively backported bug fixes). + +### Fast-forward merge of 'maintenance' into 'stable' and apply tag + +At this point it should be possible to do a fast-forward merge of 'maintenance' to 'stable' +and then apply the stable\_DMmmYYYY tag. + +### Push branches and tags + + + +## LAMMPS Stable Update Release diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index 6238f15c93..b3e685806b 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -2,7 +2,7 @@ name: "Kokkos OpenMP Regression Test" on: - pull_request: + push: branches: - develop @@ -16,9 +16,9 @@ jobs: env: CCACHE_DIR: ${{ github.workspace }}/.ccache strategy: - max-parallel: 4 + max-parallel: 6 matrix: - idx: [ 'pair', 'fix', 'compute', 'misc' ] + idx: [ 'pair-0', 'pair-1', 'fix-0', 'fix-1', 'compute', 'misc' ] steps: - name: Checkout repository @@ -82,6 +82,7 @@ jobs: -D PKG_REAXFF=on \ -D PKG_REPLICA=on \ -D PKG_SRD=on \ + -D PKG_SPH=on \ -D PKG_VORONOI=on \ -G Ninja cmake --build build @@ -92,9 +93,10 @@ jobs: run: | source linuxenv/bin/activate python3 tools/regression-tests/get_kokkos_input.py \ - --examples-top-level=examples \ - --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap" + --examples-top-level=examples --batch-size=50 \ + --filter-out="balance;fire;gcmc;granregion;hyper;mc;mdi;mliap;neb;pace;prd;pour;python;rigid;snap;streitz;shear;ttm" + export OMP_PROC_BIND=false python3 tools/regression-tests/run_tests.py \ --lmp-bin=build/lmp \ --config-file=tools/regression-tests/config_kokkos_openmp.yaml \ @@ -102,7 +104,7 @@ jobs: --output-file=output-${{ matrix.idx }}.xml \ --progress-file=progress-${{ matrix.idx }}.yaml \ --log-file=run-${{ matrix.idx }}.log \ - --quick-max=100 --verbose + --quick-max=100 tar -cvf kokkos-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8d57e237b3..cf10e8b544 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT}" "-Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) @@ -588,13 +588,8 @@ endif() set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler and machine specific optimization flags (compilation only)") separate_arguments(CMAKE_TUNE_FLAGS) -foreach(_FLAG ${CMAKE_TUNE_FLAGS}) - target_compile_options(lammps PRIVATE ${_FLAG}) - # skip these flags when linking the main executable - if(NOT (("${_FLAG}" STREQUAL "-Xcudafe") OR (("${_FLAG}" STREQUAL "--diag_suppress=unrecognized_pragma")))) - target_compile_options(lmp PRIVATE ${_FLAG}) - endif() -endforeach() +target_compile_options(lammps PRIVATE ${CMAKE_TUNE_FLAGS}) +target_compile_options(lmp PRIVATE ${CMAKE_TUNE_FLAGS}) ######################################################################## # Basic system tests (standard libraries, headers, functions, types) # ######################################################################## diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index adb3abab6b..ddd2daefcd 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -130,6 +130,7 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/group_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index c4757b85e9..e02b0c77a3 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -10,7 +10,7 @@ Last change: 2022-12-30 In fall 2019, the LAMMPS documentation file format has changed from a home grown markup designed to generate HTML format files only, to -[reStructuredText](https://docutils.sourceforge.io/rst.html>. For a +[reStructuredText](https://docutils.sourceforge.io/rst.html>). For a transition period all files in the old .txt format were transparently converted to .rst and then processed. The `txt2rst tool` is still included in the distribution to obtain an initial .rst file for legacy @@ -45,8 +45,7 @@ what kind of information and sections are needed. ## Formatting conventions -For headlines we try to follow the conventions posted here: -https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings +For headlines we try to follow the conventions posted [here](https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). It seems to be sufficient to have this consistent only within any single file and it is not (yet) enforced strictly, but making this globally consistent makes it easier to move sections around. @@ -64,7 +63,7 @@ Groups of shell commands or LAMMPS input script or C/C++/Python source code should be typeset into a `.. code-block::` section. A syntax highlighting extension for LAMMPS input scripts is provided, so `LAMMPS` can be used to indicate the language in the code block in addition to -`bash`, `c`, `c++`, `console`, `csh`, `diff', `fortran`, `json`, `make`, +`bash`, `c`, `c++`, `console`, `csh`, `diff`, `fortran`, `json`, `make`, `perl`, `powershell`, `python`, `sh`, or `tcl`, `text`, or `yaml`. When no syntax style is indicated, no syntax highlighting is performed. When typesetting commands executed on the shell, please do not prefix @@ -84,7 +83,7 @@ block can be used, followed by multiple `.. tab::` blocks, one for each alternative. This is only used for HTML output. For other outputs, the `.. tabs::` directive is transparently removed and the individual `.. tab::` blocks will be replaced with an -`.. admonition::`` block. Thus in PDF and ePUB output those will +`.. admonition::` block. Thus in PDF and ePUB output those will be realized as sequential and plain notes. Special remarks can be highlighted with a `.. note::` block and diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md index e16ae82764..69bf00b707 100644 --- a/doc/github-development-workflow.md +++ b/doc/github-development-workflow.md @@ -6,7 +6,9 @@ choices the LAMMPS developers have agreed on. Git and GitHub provide the tools, but do not set policies, so it is up to the developers to come to an agreement as to how to define and interpret policies. This document is likely to change as our experiences and needs change, and we try to -adapt it accordingly. Last change 2023-02-10. +adapt it accordingly. + +Last change: 2023-02-10 ## Table of Contents @@ -72,7 +74,7 @@ be assigned to signal urgency to merge this pull request quickly. People can be assigned to review a pull request in two ways: * They can be assigned manually to review a pull request - by the submitter or a LAMMPS developer + by the submitter or a LAMMPS developer. * They can be automatically assigned, because a developer's GitHub handle matches a file pattern in the `.github/CODEOWNERS` file, which associates developers with the code they contributed and @@ -86,9 +88,9 @@ required before merging, in addition to passing all automated compilation and unit tests. Merging counts as implicit approval, so does submission of a pull request (by a LAMMPS developer). So the person doing the merge may not also submit an approving review. The GitHub -feature, that reviews from code owners are "hard" reviews (i.e. they -must all approve before merging is allowed), is currently disabled. -It is in the discretion of the merge maintainer to assess when a +feature that reviews from code owners are "hard" reviews (i.e. they +must all approve before merging is allowed) is currently disabled. +It is at the discretion of the merge maintainer to assess when a sufficient degree of approval has been reached, especially from external collaborators. Reviews may be (automatically) dismissed, when the reviewed code has been changed. Review may be requested a second time. @@ -147,7 +149,8 @@ only contain bug fixes, feature additions to peripheral functionality, and documentation updates. In between stable releases, bug fixes and infrastructure updates are back-ported from the "develop" branch to the "maintenance" branch and occasionally merged into "stable" and published -as update releases. +as update releases. Further explanation of LAMMPS versions can be found +[in the documentation](https://docs.lammps.org/Manual_version.html). ## Project Management diff --git a/doc/lammps.1 b/doc/lammps.1 index 75581bd008..cb52813a4d 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "29 August 2024" "2024-08-29" +.TH LAMMPS "1" "19 November 2024" "2024-11-19" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 29 August 2024 +\- Molecular Dynamics Simulator. Version 19 November 2024 .SH SYNOPSIS .B lmp diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 232a209613..04d1a9969a 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -43,7 +43,7 @@ OPT. * :doc:`brownian/asphere ` * :doc:`brownian/sphere ` * :doc:`charge/regulation ` - * :doc:`cmap ` + * :doc:`cmap (k) ` * :doc:`colvars ` * :doc:`controller ` * :doc:`damping/cundall ` @@ -134,7 +134,7 @@ OPT. * :doc:`nve/dot ` * :doc:`nve/dotc/langevin ` * :doc:`nve/eff ` - * :doc:`nve/limit ` + * :doc:`nve/limit (k) ` * :doc:`nve/line ` * :doc:`nve/manifold/rattle ` * :doc:`nve/noforce ` @@ -187,10 +187,11 @@ OPT. * :doc:`qeq/slater ` * :doc:`qmmm ` * :doc:`qtb ` + * :doc:`qtpie/reaxff ` * :doc:`rattle ` * :doc:`reaxff/bonds (k) ` * :doc:`reaxff/species (k) ` - * :doc:`recenter ` + * :doc:`recenter (k) ` * :doc:`restrain ` * :doc:`rheo ` * :doc:`rheo/oxidation ` @@ -268,7 +269,7 @@ OPT. * :doc:`wall/piston ` * :doc:`wall/reflect (k) ` * :doc:`wall/reflect/stochastic ` - * :doc:`wall/region ` + * :doc:`wall/region (k) ` * :doc:`wall/region/ees ` * :doc:`wall/srd ` * :doc:`wall/table ` diff --git a/doc/src/fix.rst b/doc/src/fix.rst index ee52be224e..9af607601b 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -366,6 +366,7 @@ accelerated styles exist. * :doc:`qeq/slater ` - charge equilibration via Slater method * :doc:`qmmm ` - functionality to enable a quantum mechanics/molecular mechanics coupling * :doc:`qtb ` - implement quantum thermal bath scheme +* :doc:`qtpie/reaxff ` - apply QTPIE charge equilibration * :doc:`rattle ` - RATTLE constraints on bonds and/or angles * :doc:`reaxff/bonds ` - write out ReaxFF bond information * :doc:`reaxff/species ` - write out ReaxFF molecule information diff --git a/doc/src/fix_acks2_reaxff.rst b/doc/src/fix_acks2_reaxff.rst index ebb1b48051..79a9cf8ea6 100644 --- a/doc/src/fix_acks2_reaxff.rst +++ b/doc/src/fix_acks2_reaxff.rst @@ -111,7 +111,8 @@ LAMMPS was built with that package. See the :doc:`Build package This fix does not correctly handle interactions involving multiple periodic images of the same atom. Hence, it should not be used for -periodic cell dimensions less than :math:`10~\AA`. +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, @@ -122,7 +123,8 @@ components in non-periodic directions. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff ` +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, +:doc:`fix qtpi/reaxff ` Default """"""" diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 1b5282f741..1ddf80cbdb 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -119,6 +119,14 @@ style supports it. Note that the :doc:`pair_style ` and to specify these parameters initially; the fix adapt command simply overrides the parameters. +.. note:: + + Pair_coeff settings must be made **explicitly** in order for fix + adapt to be able to change them. Settings inferred from mixing + are not suitable. If necessary all mixed settings can be output + to a file using the :doc:`write_coeff command ` and + then the desired mixed pair_coeff settings copied from that file. + The *pstyle* argument is the name of the pair style. If :doc:`pair_style hybrid or hybrid/overlay ` is used, *pstyle* should be a sub-style name. If there are multiple diff --git a/doc/src/fix_adapt_fep.rst b/doc/src/fix_adapt_fep.rst index 3bcdfc5035..981a1c5298 100644 --- a/doc/src/fix_adapt_fep.rst +++ b/doc/src/fix_adapt_fep.rst @@ -116,12 +116,22 @@ style supports it. Note that the :doc:`pair_style ` and to specify these parameters initially; the fix adapt command simply overrides the parameters. -The *pstyle* argument is the name of the pair style. If :doc:`pair_style hybrid or hybrid/overlay ` is used, *pstyle* should be -a sub-style name. For example, *pstyle* could be specified as "soft" -or "lubricate". The *pparam* argument is the name of the parameter to -change. This is the current list of pair styles and parameters that -can be varied by this fix. See the doc pages for individual pair -styles and their energy formulas for the meaning of these parameters: +.. note:: + + Pair_coeff settings must be made **explicitly** in order for fix + adapt/fep to be able to change them. Settings inferred from mixing + are not suitable. If necessary all mixed settings can be output + to a file using the :doc:`write_coeff command ` and + then the desired mixed pair_coeff settings copied from that file. + +The *pstyle* argument is the name of the pair style. If +:doc:`pair_style hybrid or hybrid/overlay ` is used, +*pstyle* should be a sub-style name. For example, *pstyle* could be +specified as "soft" or "lubricate". The *pparam* argument is the name +of the parameter to change. This is the current list of pair styles and +parameters that can be varied by this fix. See the doc pages for +individual pair styles and their energy formulas for the meaning of +these parameters: +------------------------------------------------------------------------------+-------------------------+------------+ | :doc:`born ` | a,b,c | type pairs | diff --git a/doc/src/fix_cmap.rst b/doc/src/fix_cmap.rst index 316ad5d038..ccca88232b 100644 --- a/doc/src/fix_cmap.rst +++ b/doc/src/fix_cmap.rst @@ -1,8 +1,11 @@ .. index:: fix cmap +.. index:: fix cmap/kk fix cmap command ================ +Accelerator Variants: *cmap/kk* + Syntax """""" @@ -141,6 +144,12 @@ outermost level. MUST not disable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst index 4730e29c67..21a9d06714 100644 --- a/doc/src/fix_colvars.rst +++ b/doc/src/fix_colvars.rst @@ -1,8 +1,11 @@ .. index:: fix colvars +.. index:: fix colvars/kk fix colvars command =================== +Accelerator Variants: *colvars/kk* + Syntax """""" @@ -118,6 +121,16 @@ thermostat target temperature. The *seed* keyword contains the seed for the random number generator that will be used in the colvars module. +---------- + +.. note:: + + Fix colvars/kk is not really ported to KOKKOS, since the colvars + library has not been ported to KOKKOS. It merely has some + optimizations to reduce the data transfers between host and device + for KOKKOS with GPUs. + +---------- Restarting """""""""" diff --git a/doc/src/fix_nve_limit.rst b/doc/src/fix_nve_limit.rst index 23517aea40..3533a63ebc 100644 --- a/doc/src/fix_nve_limit.rst +++ b/doc/src/fix_nve_limit.rst @@ -1,8 +1,11 @@ .. index:: fix nve/limit +.. index:: fix nve/limit/kk fix nve/limit command ===================== +Accelerator Variants: *nve/limit/kk* + Syntax """""" @@ -79,6 +82,12 @@ is "extensive". No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none diff --git a/doc/src/fix_qeq.rst b/doc/src/fix_qeq.rst index fd317666d0..06a1f98375 100644 --- a/doc/src/fix_qeq.rst +++ b/doc/src/fix_qeq.rst @@ -190,7 +190,7 @@ on atoms via the matrix inversion method. A tolerance of 1.0e-6 is usually a good number. Keyword *alpha* can be used to change the Slater type orbital exponent. -.. versionadded:: TBD +.. versionadded:: 19Nov2024 The *qeq/ctip* style describes partial charges on atoms in the same way as style *qeq/shielded* but also enables the definition of charge diff --git a/doc/src/fix_qeq_reaxff.rst b/doc/src/fix_qeq_reaxff.rst index e90842ea6a..e1a09c4fc3 100644 --- a/doc/src/fix_qeq_reaxff.rst +++ b/doc/src/fix_qeq_reaxff.rst @@ -124,7 +124,8 @@ LAMMPS was built with that package. See the :doc:`Build package This fix does not correctly handle interactions involving multiple periodic images of the same atom. Hence, it should not be used for -periodic cell dimensions less than 10 Angstroms. +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, @@ -138,7 +139,8 @@ as an atom-style variable using the *potential* keyword for `fix efield`. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/shielded ` +:doc:`pair_style reaxff `, :doc:`fix qeq/shielded `, +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff ` Default """"""" diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst new file mode 100644 index 0000000000..e96cbec459 --- /dev/null +++ b/doc/src/fix_qtpie_reaxff.rst @@ -0,0 +1,200 @@ +.. index:: fix qtpie/reaxff + +fix qtpie/reaxff command +======================== + +Syntax +"""""" + +.. code-block:: LAMMPS + + fix ID group-ID qtpie/reaxff Nevery cutlo cuthi tolerance params gfile args + +* ID, group-ID are documented in :doc:`fix ` command +* qtpie/reaxff = style name of this fix command +* Nevery = perform QTPIE every this many steps +* cutlo,cuthi = lo and hi cutoff for Taper radius +* tolerance = precision to which charges will be equilibrated +* params = reaxff or a filename +* gfile = the name of a file containing Gaussian orbital exponents +* one or more keywords or keyword/value pairs may be appended + + .. parsed-literal:: + + keyword = *maxiter* + *maxiter* N = limit the number of iterations to *N* + +Examples +"""""""" + +.. code-block:: LAMMPS + + fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie + fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie maxiter 500 + +Description +""""""""""" + +.. versionadded:: 19Nov2024 + +The QTPIE charge equilibration method is an extension of the QEq charge +equilibration method. With QTPIE, the partial charges on individual atoms +are computed by minimizing the electrostatic energy of the system in the +same way as the QEq method but where the absolute electronegativity, +:math:`\chi_i`, of each atom in the QEq charge equilibration scheme +:ref:`(Rappe and Goddard) ` is replaced with an effective +electronegativity given by :ref:`(Chen) ` + +.. math:: + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}} + {\sum_{m=1}^{N}S_{im}}, + +which acts to penalize long-range charge transfer seen with the QEq charge +equilibration scheme. In this equation, :math:`N` is the number of atoms in +the system and :math:`S_{ij}` is the overlap integral between atom :math:`i` +and atom :math:`j`. + +The effect of an external electric field can be incorporated into the QTPIE +method by modifying the absolute or effective electronegativities of each +atom :ref:`(Chen) `. This fix models the effect of an external +electric field by using the effective electronegativity given in +:ref:`(Gergs) `: + +.. math:: + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_i - \phi_j) S_{ij}} + {\sum_{m=1}^{N}S_{im}}, + +where :math:`\phi_i` and :math:`\phi_j` are the electric +potentials at the positions of atom :math:`i` and :math:`j` +due to the external electric field. + +This fix is typically used in conjunction with the ReaxFF force +field model as implemented in the :doc:`pair_style reaxff ` +command, but it can be used with any potential in LAMMPS, so long as it +defines and uses charges on each atom. For more technical details about the +charge equilibration performed by `fix qtpie/reaxff`, which is the same as in +:doc:`fix qeq/reaxff ` except for the use of +:math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) `. +To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in +:ref:`(Aktulga) ` with :math:`\chi_{\mathrm{eff},k}`. + +This fix requires the absolute electronegativity, :math:`\chi`, in eV, the +self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb +constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* setting above +is the word "reaxff", then these are extracted from the +:doc:`pair_style reaxff ` command and the ReaxFF force field +file it reads in. If a file name is specified for *params*, then the +parameters are taken from the specified file and the file must contain +one line for each atom type. The latter form must be used when performing +QTPIE with a non-ReaxFF potential. Each line should be formatted as follows, +ensuring that the parameters are given in units of eV, eV, and :math:`\AA^{-1}`, +respectively: + +.. parsed-literal:: + + itype chi eta gamma + +where *itype* is the atom type from 1 to Ntypes. Note that eta is +defined here as twice the eta value in the ReaxFF file. + +The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}` +are computed by using normalized 1s Gaussian type orbitals. The Gaussian +orbital exponents, :math:`\alpha`, that are needed to compute the overlap +integrals are taken from the file given by *gfile*. +This file must contain one line for each atom type and provide the Gaussian +orbital exponent for each atom type in units of inverse square Bohr radius. +Each line should be formatted as follows: + +.. parsed-literal:: + + itype alpha + +Empty lines or any text following the pound sign (#) are ignored. An example +*gfile* for a system with two atom types is + +.. parsed-literal:: + + # An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009). + # Theory and applications of fluctuating-charge models. + # The units of the exponents are 1 / (Bohr radius)^2 . + 1 0.2240 # O + 2 0.5434 # H + +The optional *maxiter* keyword allows changing the max number +of iterations in the linear solver. The default value is 200. + +.. note:: + + In order to solve the self-consistent equations for electronegativity + equalization, LAMMPS imposes the additional constraint that all the + charges in the fix group must add up to zero. The initial charge + assignments should also satisfy this constraint. LAMMPS will print a + warning if that is not the case. + +Restart, fix_modify, output, run start/stop, minimize info +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +No information about this fix is written to :doc:`binary restart files +`. This fix computes a global scalar (the number of +iterations) and a per-atom vector (the effective electronegativity), which +can be accessed by various :doc:`output commands `. +No parameter of this fix can be used with the *start/stop* keywords of +the :doc:`run ` command. + +This fix is invoked during :doc:`energy minimization `. + +Restrictions +"""""""""""" + +This fix is part of the REAXFF package. It is only enabled if +LAMMPS was built with that package. See the :doc:`Build package +` page for more info. + +This fix does not correctly handle interactions involving multiple +periodic images of the same atom. Hence, it should not be used for +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. + +This fix may be used in combination with :doc:`fix efield ` +and will apply the external electric field during charge equilibration, +but there may be only one fix efield instance used and the electric field +must be applied to all atoms in the system. Consequently, `fix efield` must +be used with *group-ID* all and must not be used with the keyword *region*. +Equal-style variables can be used for electric field vector +components without any further settings. Atom-style variables can be used +for spatially-varying electric field vector components, but the resulting +electric potential must be specified as an atom-style variable using +the *potential* keyword for `fix efield`. + +Related commands +"""""""""""""""" + +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, +:doc:`fix acks2/reaxff ` + +Default +""""""" + +maxiter 200 + +---------- + +.. _Rappe3: + +**(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95, +3358-3363 (1991). + +.. _qtpie-Chen: + +**(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models. +University of Illinois at Urbana-Champaign, 2009. + +.. _Gergs: + +**(Gergs)** Gergs, Dirkmann and Mussenbrock. +Journal of Applied Physics 123.24 (2018). + +.. _qeq-Aktulga2: + +**(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38, +245-259 (2012). diff --git a/doc/src/fix_recenter.rst b/doc/src/fix_recenter.rst index 9991904b37..60a8a674d0 100644 --- a/doc/src/fix_recenter.rst +++ b/doc/src/fix_recenter.rst @@ -1,8 +1,11 @@ .. index:: fix recenter +.. index:: fix recenter/kk fix recenter command ==================== +Accelerator Variants: *recenter/kk* + Syntax """""" @@ -113,6 +116,12 @@ The scalar and vector values calculated by this fix are "extensive". No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 4453fd61c5..a78e7575f8 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -13,7 +13,7 @@ Syntax * ID, group-ID are documented in :doc:`fix ` command * spring/self = style name of this fix command -* K = spring constant (force/distance units) +* K = spring constant (force/distance units), can be a variable (see below) * dir = xyz, xy, xz, yz, x, y, or z (optional, default: xyz) Examples @@ -22,6 +22,7 @@ Examples .. code-block:: LAMMPS fix tether boundary-atoms spring/self 10.0 + fix var all spring/self v_kvar fix zrest move spring/self 10.0 z Description @@ -42,6 +43,22 @@ directions, but it can be limited to the xy-, xz-, yz-plane and the x-, y-, or z-direction, thus restraining the atoms to a line or a plane, respectively. +The force constant *k* can be specified as an equal-style or atom-style +:doc:`variable `. If the value is a variable, it should be specified +as v_name, where name is the variable name. In this case, the variable +will be evaluated each time step, and its value(s) will be used as +force constant for the spring force. + +Equal-style variables can specify formulas with various mathematical +functions and include :doc:`thermo_style ` command +keywords for the simulation box parameters, time step, and elapsed time. +Thus, it is easy to specify a time-dependent force field. + +Atom-style variables can specify the same formulas as equal-style +variables but can also include per-atom values, such as atom +coordinates. Thus, it is easy to specify a spatially-dependent force +field with optional time-dependence as well. + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -89,7 +106,9 @@ invoked by the :doc:`minimize ` command. Restrictions """""""""""" - none + +The KOKKOS version, *fix spring/self/kk* may only be used with a constant +value of K, not a variable. Related commands """""""""""""""" diff --git a/doc/src/fix_wall_region.rst b/doc/src/fix_wall_region.rst index 466319c12e..67b9b9cdb2 100644 --- a/doc/src/fix_wall_region.rst +++ b/doc/src/fix_wall_region.rst @@ -1,8 +1,11 @@ .. index:: fix wall/region +.. index:: fix wall/region/kk fix wall/region command ======================= +Accelerator Variants: *wall/region/kk* + Syntax """""" @@ -234,6 +237,12 @@ invoked by the :doc:`minimize ` command. minimized), you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none diff --git a/doc/src/pair_coul.rst b/doc/src/pair_coul.rst index 17e9358754..77c0e0b18b 100644 --- a/doc/src/pair_coul.rst +++ b/doc/src/pair_coul.rst @@ -180,7 +180,7 @@ coulomb styles in :doc:`hybrid pair styles `. ---------- -.. versionadded:: TBD +.. versionadded:: 19Nov2024 Style *coul/ctip* computes the Coulomb interactions as described in :ref:`Plummer `. It uses the the damped shifted model as in diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst index 03d53d1ff4..495572dc0e 100644 --- a/doc/src/pair_reaxff.rst +++ b/doc/src/pair_reaxff.rst @@ -20,7 +20,7 @@ Syntax .. parsed-literal:: keyword = *checkqeq* or *lgvdw* or *safezone* or *mincap* or *minhbonds* or *tabulate* or *list/blocking* - *checkqeq* value = *yes* or *no* = whether or not to require qeq/reaxff or acks2/reaxff fix + *checkqeq* value = *yes* or *no* = whether or not to require one of fix qeq/reaxff, fix acks2/reaxff or fix qtpie/reaxff *enobonds* value = *yes* or *no* = whether or not to tally energy of atoms with no bonds *lgvdw* value = *yes* or *no* = whether or not to use a low gradient vdW correction *safezone* = factor used for array allocation @@ -120,20 +120,22 @@ up that process. The ReaxFF parameter files provided were created using a charge equilibration (QEq) model for handling the electrostatic interactions. -Therefore, by default, LAMMPS requires that either the -:doc:`fix qeq/reaxff ` or the -:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` -command be used with -*pair_style reaxff* when simulating a ReaxFF model, to equilibrate -the charges each timestep. +Therefore, by default, LAMMPS requires that +:doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` +or :doc:`fix acks2/reaxff ` +or :doc:`fix qtpie/reaxff ` +is used with *pair_style reaxff* when simulating a ReaxFF model, +to equilibrate the charges at each timestep. +See the :doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` +or :doc:`fix acks2/reaxff ` +or :doc:`fix qtpie/reaxff ` +command documentation for more details. Using the keyword *checkqeq* with the value *no* turns off the check for the QEq fixes, allowing a simulation to be run without charge equilibration. In this case, the static charges you assign to each atom will be used for computing the electrostatic interactions in -the system. See the :doc:`fix qeq/reaxff ` or -:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` -command documentation for more details. +the system. Using the optional keyword *lgvdw* with the value *yes* turns on the low-gradient correction of ReaxFF for long-range London Dispersion, @@ -372,8 +374,8 @@ Related commands """""""""""""""" :doc:`pair_coeff `, :doc:`fix qeq/reaxff `, -:doc:`fix acks2/reaxff `, :doc:`fix reaxff/bonds `, -:doc:`fix reaxff/species `, +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff `, +:doc:`fix reaxff/bonds `, :doc:`fix reaxff/species `, :doc:`compute reaxff/atom ` Default diff --git a/doc/src/region.rst b/doc/src/region.rst index 3a27c4b5ff..94feee6ad4 100644 --- a/doc/src/region.rst +++ b/doc/src/region.rst @@ -3,6 +3,8 @@ region command ============== +Accelerator Variants: *block/kk*, *sphere/kk* + Syntax """""" @@ -74,7 +76,7 @@ Syntax Rx,Ry,Rz = axis of rotation vector *open* value = integer from 1-6 corresponding to face index (see below) -* accelerated styles (with same args) = *block/kk* +* accelerated styles (with same args) = *block/kk, sphere/kk* Examples """""""" @@ -401,9 +403,9 @@ sub-regions can be defined with the *open* keyword. .. note:: - Currently, only *block* style regions are supported by Kokkos. The + Currently, only *block* and *sphere* style regions are supported by KOKKOS. The code using the region (such as a fix or compute) must also be - supported by Kokkos or no acceleration will occur. + supported by KOKKOS or no acceleration will occur. ---------- diff --git a/doc/utils/check-styles.py b/doc/utils/check-styles.py index 89c8920760..6e4d133cd4 100644 --- a/doc/utils/check-styles.py +++ b/doc/utils/check-styles.py @@ -73,8 +73,6 @@ omp = re.compile("(.+)/omp\\s*$") opt = re.compile("(.+)/opt\\s*$") removed = re.compile("(.*)Deprecated$") -accel_pattern = re.compile(r"^.. include::\s+accel_styles.rst$") - def require_accel_include(path): found = False needs = False @@ -94,6 +92,7 @@ def require_accel_include(path): if kokkos.match(line): needs = True if intel.match(line): needs = True if opt.match(line): needs = True + if path == "src/fix_colvars.rst": needs = False m = cmd_pattern.match(line) if m: if gpu.match(line): needs = True @@ -167,7 +166,9 @@ def check_style(filename, dirname, pattern, styles, name, suffix=False, skip=set # known undocumented aliases we need to skip if c in skip: continue s = c - if suffix: s = add_suffix(styles, c) + if suffix: + s = add_suffix(styles, c) + if s == 'colvars (k)' : continue if not s in matches: if not styles[c]['removed']: print(f"{name} style entry {s} is missing or incomplete in {filename}") @@ -300,7 +301,7 @@ for command_type, entries in index.items(): print("Total number of style index entries:", total_index) skip_angle = ('sdk') -skip_fix = ('python', 'NEIGH_HISTORY/omp','acks2/reax','qeq/reax','reax/c/bonds','reax/c/species', 'pimd') +skip_fix = ('python', 'NEIGH_HISTORY/omp','acks2/reax','qeq/reax','reax/c/bonds','reax/c/species', 'pimd', 'colvars/kk') skip_pair = ('meam/c','lj/sf','reax/c','lj/sdk','lj/sdk/coul/long','lj/sdk/coul/msm') skip_compute = ('pressure/cylinder') diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 0708a14882..8e601d6c16 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -815,6 +815,7 @@ dipoleflag dir Direc dirname +Dirkmann discoverable discretization discretized @@ -976,6 +977,7 @@ elaplong elastance Electroneg electronegative +electronegativities electronegativity electroneutral electroneutrality @@ -1293,6 +1295,7 @@ Geocomputing georg Georg Geotechnica +Gergs germain Germann Germano @@ -1305,6 +1308,7 @@ gettimeofday geturl gewald Gezelter +gfile Gflop gfortran ghostneigh @@ -1712,6 +1716,7 @@ Jewett jgissing ji Jiang +Jiahao Jiao jik JIK @@ -2366,6 +2371,7 @@ mui Mukherjee Mulders Müller +Mulliken mult multi multibody @@ -2390,6 +2396,7 @@ Murdick Murtola Murty Muser +Mussenbrock mutexes Muto muVT @@ -3073,6 +3080,7 @@ qqr qqrd Qsb qtb +qtpie quadratically quadrupolar quadrupole diff --git a/examples/reaxff/water/gauss_exp.txt b/examples/reaxff/water/gauss_exp.txt new file mode 100644 index 0000000000..4210471e9f --- /dev/null +++ b/examples/reaxff/water/gauss_exp.txt @@ -0,0 +1,5 @@ +# Gaussian orbital exponents (required for fix qtpie/reaxff) taken from Table 2.2 +# of Chen, J. (2009). Theory and applications of fluctuating-charge models. +# The units of the exponents are 1 / (Bohr radius)^2 . +1 0.2240 # O +2 0.5434 # H diff --git a/examples/reaxff/water/in.water.qtpie b/examples/reaxff/water/in.water.qtpie new file mode 100644 index 0000000000..a8f8759444 --- /dev/null +++ b/examples/reaxff/water/in.water.qtpie @@ -0,0 +1,29 @@ +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 diff --git a/examples/reaxff/water/in.water.qtpie.field b/examples/reaxff/water/in.water.qtpie.field new file mode 100644 index 0000000000..e5ac77484f --- /dev/null +++ b/examples/reaxff/water/in.water.qtpie.field @@ -0,0 +1,30 @@ +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 new file mode 100644 index 0000000000..33221ff080 --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 @@ -0,0 +1,127 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.056 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 10137.041 1 29915.273 + 10 296.09128 3564.7969 1 29915.273 + 20 293.04308 10299.201 1 29915.273 +Loop time of 10.7863 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.080 ns/day, 299.620 hours/ns, 1.854 timesteps/s, 5.563 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7275 | 4.7275 | 4.7275 | 0.0 | 43.83 +Neigh | 0.17533 | 0.17533 | 0.17533 | 0.0 | 1.63 +Comm | 0.0017376 | 0.0017376 | 0.0017376 | 0.0 | 0.02 +Output | 8.2065e-05 | 8.2065e-05 | 8.2065e-05 | 0.0 | 0.00 +Modify | 5.8812 | 5.8812 | 5.8812 | 0.0 | 54.52 +Other | | 0.0005226 | | | 0.00 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11077 ave 11077 max 11077 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971775 ave 971775 max 971775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971775 +Ave neighs/atom = 323.925 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:12 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 new file mode 100644 index 0000000000..07a348604e --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 @@ -0,0 +1,127 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 10137.041 1 29915.273 + 10 296.09128 3564.7969 1 29915.273 + 20 293.04308 10299.201 1 29915.273 +Loop time of 3.14492 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.275 ns/day, 87.359 hours/ns, 6.359 timesteps/s, 19.078 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.6557 | 1.6847 | 1.7281 | 2.1 | 53.57 +Neigh | 0.086503 | 0.086968 | 0.087627 | 0.2 | 2.77 +Comm | 0.003309 | 0.046699 | 0.075729 | 12.4 | 1.48 +Output | 5.0156e-05 | 5.483e-05 | 6.8111e-05 | 0.0 | 0.00 +Modify | 1.3254 | 1.3261 | 1.3266 | 0.0 | 42.16 +Other | | 0.0004552 | | | 0.01 + +Nlocal: 750 ave 760 max 735 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 6230.5 ave 6253 max 6193 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Neighs: 276995 ave 280886 max 271360 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 1107981 +Ave neighs/atom = 369.327 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:03 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 new file mode 100644 index 0000000000..1187a755ee --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 @@ -0,0 +1,126 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.055 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 10138.375 1 29915.273 + 10 295.97879 3575.2769 1 29915.273 + 20 292.76583 10309.128 1 29915.273 +Loop time of 10.8138 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.080 ns/day, 300.383 hours/ns, 1.849 timesteps/s, 5.548 katom-step/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7177 | 4.7177 | 4.7177 | 0.0 | 43.63 +Neigh | 0.17607 | 0.17607 | 0.17607 | 0.0 | 1.63 +Comm | 0.0017295 | 0.0017295 | 0.0017295 | 0.0 | 0.02 +Output | 8.5431e-05 | 8.5431e-05 | 8.5431e-05 | 0.0 | 0.00 +Modify | 5.9177 | 5.9177 | 5.9177 | 0.0 | 54.72 +Other | | 0.0004911 | | | 0.00 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11077 ave 11077 max 11077 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971830 ave 971830 max 971830 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971830 +Ave neighs/atom = 323.94333 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:12 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 new file mode 100644 index 0000000000..372156b6a2 --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 @@ -0,0 +1,126 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 10138.375 1 29915.273 + 10 295.97879 3575.2769 1 29915.273 + 20 292.76583 10309.128 1 29915.273 +Loop time of 3.13598 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.276 ns/day, 87.111 hours/ns, 6.378 timesteps/s, 19.133 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.6622 | 1.695 | 1.7252 | 2.2 | 54.05 +Neigh | 0.086543 | 0.087117 | 0.087848 | 0.2 | 2.78 +Comm | 0.0048192 | 0.035002 | 0.067754 | 15.4 | 1.12 +Output | 4.8033e-05 | 5.3375e-05 | 6.6893e-05 | 0.0 | 0.00 +Modify | 1.3176 | 1.3183 | 1.3189 | 0.0 | 42.04 +Other | | 0.0004753 | | | 0.02 + +Nlocal: 750 ave 760 max 735 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 6229.5 ave 6253 max 6191 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Neighs: 277011 ave 280900 max 271380 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 1108044 +Ave neighs/atom = 369.348 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:03 diff --git a/examples/wall/in.wall.block b/examples/wall/in.wall.block new file mode 100644 index 0000000000..7e18d9ab4c --- /dev/null +++ b/examples/wall/in.wall.block @@ -0,0 +1,40 @@ + +units real + +molecule water tip3p.mol +atom_style full + +variable radius equal 100.0 +region box block $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) +create_box 2 box bond/types 1 angle/types 1 & + extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 + +mass 1 15.9994 +mass 2 1.008 + +bond_style zero +bond_coeff 1 0.9574 +angle_style zero +angle_coeff 1 104.52 + +region block1 block -80 80 -80 80 -80 80 +region block2 block -70 70 -70 70 -70 70 +create_atoms 0 random 5000 12345 block2 mol water 12345 overlap 2 + +thermo 1 +thermo_style custom step time spcpu temp press etotal pe + +fix wall all wall/region block1 harmonic 1000.0 0.0 2.5 +fix_modify wall energy yes + +pair_style lj/cut/coul/cut 8.0 +pair_coeff 1 1 0.1521 3.1507 +pair_coeff 2 2 0.0 1.0 +velocity all create 300.0 12345 +fix 1 all nvt temp 300 300 100.0 +fix 2 all shake 0.001 10 10000 b 1 a 1 + +dump 2 all movie 10 wall.block.mpg type type size 1500 1500 fsaa yes +dump_modify 2 pad 4 acolor * white/red/green/blue/aqua/magenta + +run 100 diff --git a/examples/wall/in.wall.sphere b/examples/wall/in.wall.sphere new file mode 100644 index 0000000000..995a1b0f75 --- /dev/null +++ b/examples/wall/in.wall.sphere @@ -0,0 +1,40 @@ + +units real + +molecule water tip3p.mol +atom_style full + +variable radius equal 100.0 +region box block $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) +create_box 2 box bond/types 1 angle/types 1 & + extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 + +mass 1 15.9994 +mass 2 1.008 + +bond_style zero +bond_coeff 1 0.9574 +angle_style zero +angle_coeff 1 104.52 + +region sphere1 sphere 0 0 0 $(v_radius-10) side in +region sphere2 sphere 0 0 0 $(v_radius-20) side in +create_atoms 0 random 5000 12345 sphere2 mol water 12345 overlap 2 + +thermo 1 +thermo_style custom step time spcpu temp press etotal pe + +fix wall all wall/region sphere1 harmonic 1000.0 0.0 2.5 +fix_modify wall energy yes + +pair_style lj/cut/coul/cut 8.0 +pair_coeff 1 1 0.1521 3.1507 +pair_coeff 2 2 0.0 1.0 +velocity all create 300.0 12345 +fix 1 all nvt temp 300 300 100.0 +fix 2 all shake 0.001 10 10000 b 1 a 1 + +dump 2 all movie 10 wall.sphere.mpg type type size 1500 1500 fsaa yes +dump_modify 2 pad 4 acolor * white/red/green/blue/aqua/magenta + +run 100 diff --git a/examples/wall/tip3p.mol b/examples/wall/tip3p.mol new file mode 100644 index 0000000000..fe8410632e --- /dev/null +++ b/examples/wall/tip3p.mol @@ -0,0 +1,62 @@ +# Water molecule. TIP3P geometry + +3 atoms +2 bonds +1 angles + +Coords + +1 0.00000 -0.06556 0.00000 +2 0.75695 0.52032 0.00000 +3 -0.75695 0.52032 0.00000 + +Types + +1 1 +2 2 +3 2 + +Charges + +1 -0.834 +2 0.417 +3 0.417 + +Bonds + +1 1 1 2 +2 1 1 3 + +Angles + +1 1 2 1 3 + +Shake Flags + +1 1 +2 1 +3 1 + +Shake Atoms + +1 1 2 3 +2 1 2 3 +3 1 2 3 + +Shake Bond Types + +1 1 1 1 +2 1 1 1 +3 1 1 1 + +Special Bond Counts + +1 2 0 0 +2 1 1 0 +3 1 1 0 + +Special Bonds + +1 2 3 +2 1 3 +3 1 2 diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 288415e0e7..aca9b1d141 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -365,7 +365,9 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj, int i=ilist[ii]; three_ilist[i] = ii; } - three_ilist.update_device(inum,true); + // needs to transfer _max_atoms because three_ilist indexes all the atoms (local and ghost) + // not just inum (number of neighbor list items) + three_ilist.update_device(_max_atoms,true); } time_nbor.stop(); diff --git a/lib/linalg/dbdsdc.cpp b/lib/linalg/dbdsdc.cpp new file mode 100644 index 0000000000..7f362f3be9 --- /dev/null +++ b/lib/linalg/dbdsdc.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; +int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *d__, doublereal *e, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, + doublereal *work, integer *iwork, integer *info, ftnlen uplo_len, ftnlen compq_len) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + double d_lmp_sign(doublereal *, doublereal *), log(doublereal); + integer i__, j, k; + doublereal p, r__; + integer z__, ic, ii, kk; + doublereal cs; + integer is, iu; + doublereal sn; + integer nm1; + doublereal eps; + integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer poles, iuplo, nsize, start; + extern int dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, ftnlen); + integer icompq; + doublereal orgnrm; + integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + *info = 0; + iuplo = 0; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + iuplo = 1; + } + if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + iuplo = 2; + } + if (lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompq = 0; + } else if (lsame_(compq, (char *)"P", (ftnlen)1, (ftnlen)1)) { + icompq = 1; + } else if (lsame_(compq, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DBDSDC", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, (char *)"DBDSDC", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_lmp_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_lmp_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + if (icompq == 2) { + wstart = (*n << 1) - 1; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } + } + } + if (icompq == 0) { + dlasdq_((char *)"U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, + &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info, (ftnlen)1); + goto L40; + } + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], + ldu, &u[u_offset], ldu, &work[wstart], info, (ftnlen)1); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (qstart - 1) * *n], n, + &q[iu + (qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &work[wstart], + info, (ftnlen)1); + } + goto L40; + } + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + } + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &ierr, (ftnlen)1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7) * .9; + mlvl = (integer)(log((doublereal)(*n) / (doublereal)(smlsiz + 1)) / log(2.)) + 1; + smlszp = smlsiz + 1; + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_lmp_sign(&eps, &d__[i__]); + } + } + start = 1; + sqre = 0; + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + if (i__ < nm1) { + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + nsize = *n - start + 1; + } else { + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_lmp_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_lmp_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + start * u_dim1], ldu, + &vt[start + start * vt_dim1], ldvt, &smlsiz, &iwork[1], &work[wstart], + info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[start], + &q[start + (iu + qstart - 2) * *n], n, &q[start + (ivt + qstart - 2) * *n], + &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], + &q[start + (difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], + &q[start + (poles + qstart - 2) * *n], &iq[start + givptr * *n], + &iq[start + givcol * *n], n, &iq[start + perm * *n], + &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], + &q[start + (is + qstart - 2) * *n], &work[wstart], &iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } + } + dlascl_((char *)"G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); +L40: + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } + } + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + if (iuplo == 2 && icompq == 2) { + dlasr_((char *)"L", (char *)"V", (char *)"B", n, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dcombssq.cpp b/lib/linalg/dcombssq.cpp new file mode 100644 index 0000000000..179be8ad9e --- /dev/null +++ b/lib/linalg/dcombssq.cpp @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dcombssq_(doublereal *v1, doublereal *v2) +{ + doublereal d__1; + --v2; + --v1; + if (v1[1] >= v2[1]) { + if (v1[1] != 0.) { + d__1 = v2[1] / v1[1]; + v1[2] += d__1 * d__1 * v2[2]; + } else { + v1[2] += v2[2]; + } + } else { + d__1 = v1[1] / v2[1]; + v1[2] = v2[2] + d__1 * d__1 * v1[2]; + v1[1] = v2[1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebak.cpp b/lib/linalg/dgebak.cpp new file mode 100644 index 0000000000..ba0db07641 --- /dev/null +++ b/lib/linalg/dgebak.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, + integer *m, doublereal *v, integer *ldv, integer *info, ftnlen job_len, ftnlen side_len) +{ + integer v_dim1, v_offset, i__1; + integer i__, k; + doublereal s; + integer ii; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv; + extern int xerbla_(char *, integer *, ftnlen); + logical rightv; + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1); + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!rightv && !leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAK", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + return 0; + } + if (*ilo == *ihi) { + goto L30; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + } +L30: + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L40:; + } + } + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L50:; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebal.cpp b/lib/linalg/dgebal.cpp new file mode 100644 index 0000000000..c5301edcdd --- /dev/null +++ b/lib/linalg/dgebal.cpp @@ -0,0 +1,513 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +int dgebal_(char *job, integer *n, doublereal *a, integer *lda, integer *ilo, integer *ihi, + doublereal *scale, integer *info, ftnlen job_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + doublereal c__, f, g; + integer i__, j, k, l, m; + doublereal r__, s, ca, ra; + integer ica, ira, iexc; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + doublereal sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + logical noconv; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAL", &i__1, (ftnlen)6); + return 0; + } + k = 1; + l = *n; + if (*n == 0) { + goto L210; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + goto L210; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1)) { + goto L120; + } + goto L50; +L20: + scale[m] = (doublereal)j; + if (j == m) { + goto L30; + } + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); +L30: + switch (iexc) { + case 1: + goto L40; + case 2: + goto L80; + } +L40: + if (l == 1) { + goto L210; + } + --l; +L50: + for (j = l; j >= 1; --j) { + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } + L60:; + } + m = l; + iexc = 1; + goto L20; + L70:; + } + goto L90; +L80: + ++k; +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } + L100:; + } + m = k; + iexc = 2; + goto L20; + L110:; + } +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1)) { + goto L210; + } + sfmin1 = dlamch_((char *)"S", (ftnlen)1) / dlamch_((char *)"P", (ftnlen)1); + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + i__2 = l - k + 1; + c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1); + i__2 = l - k + 1; + r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda); + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; + L160: + d__1 = max(f, c__); + d__2 = min(r__, g); + if (c__ >= g || max(d__1, ca) >= sfmax2 || min(d__2, ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + *info = -3; + i__2 = -(*info); + xerbla_((char *)"DGEBAL", &i__2, (ftnlen)6); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + L170: + g = c__ / 2.; + L180: + d__1 = min(f, c__), d__1 = min(d__1, g); + if (g < r__ || max(r__, ra) >= sfmax2 || min(d__1, ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + L200:; + } + if (noconv) { + goto L140; + } +L210: + *ilo = k; + *ihi = l; + return 0; +} +int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *work, integer *lwork, integer *info, ftnlen jobvl_len, ftnlen jobvr_len) +{ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + double sqrt(doublereal); + integer i__, k; + doublereal r__, cs, sn; + integer ihi; + doublereal scl; + integer ilo; + doublereal dum[1], eps; + integer lwork_trevc__, ibal; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern int dlabad_(doublereal *, doublereal *), + dgebak_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen, ftnlen), + dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, ftnlen); + logical scalea; + extern doublereal dlamch_(char *, ftnlen); + doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + xerbla_(char *, integer *, ftnlen); + logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal bignum; + extern int dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen); + integer minwrk, maxwrk; + logical wantvl; + doublereal smlnum; + integer hswork; + logical lquery, wantvr; + extern int dtrevc3_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, (char *)"V", (ftnlen)1, (ftnlen)1); + wantvr = lsame_(jobvr, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantvl && !lsame_(jobvl, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!wantvr && !lsame_(jobvr, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + + *n * ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], + ldvl, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"L", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else if (wantvr) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"R", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else { + minwrk = *n * 3; + dhseqr_((char *)"E", (char *)"N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + } + maxwrk = max(maxwrk, minwrk); + } + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -13; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", n, n, &a[a_offset], lda, dum, (ftnlen)1); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + ibal = 1; + dgebal_((char *)"B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr, (ftnlen)1); + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); + if (wantvl) { + *(unsigned char *)side = 'L'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], ldvl, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + if (wantvr) { + *(unsigned char *)side = 'B'; + dlacpy_((char *)"F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, (ftnlen)1); + } + } else if (wantvr) { + *(unsigned char *)side = 'R'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } else { + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"E", (char *)"N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } + if (*info != 0) { + goto L50; + } + if (wantvl || wantvr) { + i__1 = *lwork - iwrk + 1; + dtrevc3_(side, (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], + ldvr, n, &nout, &work[iwrk], &i__1, &ierr, (ftnlen)1, (ftnlen)1); + } + if (wantvl) { + dgebak_((char *)"B", (char *)"L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vl[k + i__ * vl_dim1]; + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, + &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } + } + } + if (wantvr) { + dgebak_((char *)"B", (char *)"R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vr[k + i__ * vr_dim1]; + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, + &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } + } + } +L50: + if (scalea) { + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr, + (ftnlen)1); + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr, + (ftnlen)1); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr, (ftnlen)1); + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr, (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehd2.cpp b/lib/linalg/dgehd2.cpp new file mode 100644 index 0000000000..9eaa873bd3 --- /dev/null +++ b/lib/linalg/dgehd2.cpp @@ -0,0 +1,57 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + integer i__; + doublereal aii; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHD2", &i__1, (ftnlen)6); + return 0; + } + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi - i__; + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + i__2 = *ihi - i__; + dlarf_((char *)"Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[(i__ + 1) * a_dim1 + 1], lda, &work[1], (ftnlen)5); + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4); + a[i__ + 1 + i__ * a_dim1] = aii; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehrd.cpp b/lib/linalg/dgehrd.cpp new file mode 100644 index 0000000000..eb152b90ed --- /dev/null +++ b/lib/linalg/dgehrd.cpp @@ -0,0 +1,144 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static doublereal c_b25 = -1.; +static doublereal c_b26 = 1.; +int dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, j, ib; + doublereal ei; + integer nb, nh, nx, iwt; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer nbmin, iinfo; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dgehd2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), + dlahr2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -8; + } + if (*info == 0) { + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + lwkopt = *n * nb + 4160; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + i__1 = *n - 1; + for (i__ = max(1, *ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + nbmin = 2; + if (nb > 1 && nb < nh) { + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1, i__2); + if (nx < nh) { + if (*lwork < *n * nb + 4160) { + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + if (*lwork >= *n * nbmin + 4160) { + nb = (*lwork - 4160) / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + if (nb < nbmin || nb >= nh) { + i__ = *ilo; + } else { + iwt = *n * nb + 1; + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3, i__4); + dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &work[iwt], &c__65, + &work[1], &ldwork); + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_((char *)"No transpose", (char *)"Transpose", ihi, &i__3, &ib, &c_b25, &work[1], &ldwork, + &a[i__ + ib + i__ * a_dim1], lda, &c_b26, &a[(i__ + ib) * a_dim1 + 1], lda, + (ftnlen)12, (ftnlen)9); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + i__3 = ib - 1; + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork, (ftnlen)5, (ftnlen)5, + (ftnlen)9, (ftnlen)4); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + j + 1) * a_dim1 + 1], + &c__1); + } + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib, + &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &c__65, + &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork, (ftnlen)4, (ftnlen)9, + (ftnlen)7, (ftnlen)10); + } + } + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgesdd.cpp b/lib/linalg/dgesdd.cpp new file mode 100644 index 0000000000..59dbee7210 --- /dev/null +++ b/lib/linalg/dgesdd.cpp @@ -0,0 +1,788 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b63 = 0.; +static integer c__1 = 1; +static doublereal c_b84 = 1.; +int dgesdd_(char *jobz, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, + doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, + integer *lwork, integer *iwork, integer *info, ftnlen jobz_len) +{ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2, i__3; + double sqrt(doublereal); + integer lwork_dorglq_mn__, lwork_dorglq_nn__, lwork_dorgqr_mm__, lwork_dorgqr_mn__, i__, ie, + lwork_dorgbr_p_mm__, il, lwork_dorgbr_q_nn__, ir, iu, blk; + doublereal dum[1], eps; + integer ivt, iscl; + doublereal anrm; + integer idum[1], ierr, itau, lwork_dormbr_qln_mm__, lwork_dormbr_qln_mn__, + lwork_dormbr_qln_nn__, lwork_dormbr_prt_mm__, lwork_dormbr_prt_mn__, lwork_dormbr_prt_nn__; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + extern int dbdsdc_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen), + dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + integer bdspac; + extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen), + dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, ftnlen); + doublereal bignum; + extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen, ftnlen), + dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + doublereal smlnum; + logical wntqas, lquery; + integer lwork_dgebrd_mm__, lwork_dgebrd_mn__, lwork_dgebrd_nn__, lwork_dgelqf_mn__, + lwork_dgeqrf_mn__; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + *info = 0; + minmn = min(*m, *n); + wntqa = lsame_(jobz, (char *)"A", (ftnlen)1, (ftnlen)1); + wntqs = lsame_(jobz, (char *)"S", (ftnlen)1, (ftnlen)1); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, (char *)"O", (ftnlen)1, (ftnlen)1); + wntqn = lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!(wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + bdspac = 0; + mnthr = (integer)(minmn * 11. / 6.); + if (*m >= *n && minmn > 0) { + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_nn__ = (integer)dum[0]; + dgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dgeqrf_mn__ = (integer)dum[0]; + dorgbr_((char *)"Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_q_nn__ = (integer)dum[0]; + dorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mm__ = (integer)dum[0]; + dorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*m >= mnthr) { + if (wntqn) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + i__1 = *n * 3 + bdspac, i__2 = *n + *m; + minwrk = *n * *n + max(i__1, i__2); + } + } else { + wrkbl = *n * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } + } + } else if (minmn > 0) { + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mm__ = (integer)dum[0]; + dgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dgelqf_mn__ = (integer)dum[0]; + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_nn__ = (integer)dum[0]; + dorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dorglq_mn__ = (integer)dum[0]; + dorgbr_((char *)"P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_p_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*n >= mnthr) { + if (wntqn) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + i__1 = *m * 3 + bdspac, i__2 = *m + *n; + minwrk = *m * *m + max(i__1, i__2); + } + } else { + wrkbl = *m * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } + } + } + maxwrk = max(maxwrk, minwrk); + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -12; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = sqrt(dlamch_((char *)"S", (ftnlen)1)) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, dum, (ftnlen)1); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } else if (anrm > bignum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + if (*m >= *n) { + if (*m >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *n; + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ir = 1; + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + iu = nwork; + nwork = iu + *n * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &work[iu], n, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], n, + &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, (ftnlen)1); + } + } else if (wntqs) { + ir = 1; + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir], &ldwrkr, &c_b63, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + iu = 1; + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &a[a_offset], lda, &work[itauq], &work[iu], &ldwrku, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu], &ldwrku, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku, (ftnlen)1); + ir = -1; + } else { + ldwrku = *n; + nwork = iu + ldwrku * *n; + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], &ldwrku, &vt[vt_offset], ldvt, + dum, idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *n * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &work[iu], + &ldwrku, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[iu], &ldwrku, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], + &ldwrku, &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, + (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", m, m, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (*n + 1) * u_dim1], ldu, + (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + if (*n >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *m; + dbdsdc_((char *)"U", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ivt = 1; + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], m, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &work[ivt], m, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], m, &a[i__ * a_dim1 + 1], lda, + &c_b63, &work[il], &ldwrkl, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } else if (wntqs) { + il = 1; + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[a_offset], lda, &c_b63, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + ivt = 1; + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[vt_offset], ldvt, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"L", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt, (ftnlen)1); + nwork = ivt + ldwkvt * *n; + il = -1; + } else { + nwork = ivt + ldwkvt * *m; + il = nwork; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *m * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], &ldwkvt, + &a[i__ * a_dim1 + 1], lda, &c_b63, &work[il], m, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (*m + 1) * vt_dim1], + ldvt, (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + if (iscl == 1) { + if (anrm > bignum) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + if (anrm < smlnum) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dhseqr.cpp b/lib/linalg/dhseqr.cpp new file mode 100644 index 0000000000..2ac0219858 --- /dev/null +++ b/lib/linalg/dhseqr.cpp @@ -0,0 +1,145 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__12 = 12; +static integer c__2 = 2; +static integer c__49 = 49; +int dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info, ftnlen job_len, ftnlen compz_len) +{ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + doublereal d__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i__; + doublereal hl[2401]; + integer kbot, nmin; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical initz; + doublereal workl[49]; + logical wantt, wantz; + extern int dlaqr0_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical lquery; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + wantt = lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1); + initz = lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1); + wantz = initz || lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)max(1, *n); + lquery = *lwork == -1; + *info = 0; + if (!lsame_(job, (char *)"E", (ftnlen)1, (ftnlen)1) && !wantt) { + *info = -1; + } else if (!lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1) && !wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1, *n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1, *n)) { + *info = -11; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DHSEQR", &i__1, (ftnlen)6); + return 0; + } else if (*n == 0) { + return 0; + } else if (lquery) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + return 0; + } else { + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + if (initz) { + dlaset_((char *)"A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz, (ftnlen)1); + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_lmp_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, (char *)"DHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + if (*n > nmin) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, info); + if (*info > 0) { + kbot = *info; + if (*n >= 49) { + dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlacpy_((char *)"A", n, n, &h__[h_offset], ldh, hl, &c__49, (ftnlen)1); + hl[*n + 1 + *n * 49 - 50] = 0.; + i__1 = 49 - *n; + dlaset_((char *)"A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * 49 - 49], &c__49, + (ftnlen)1); + dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, workl, &c__49, info); + if (wantt || *info != 0) { + dlacpy_((char *)"A", n, n, hl, &c__49, &h__[h_offset], ldh, (ftnlen)1); + } + } + } + } + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + dlaset_((char *)"L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh, (ftnlen)1); + } + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaexc.cpp b/lib/linalg/dlaexc.cpp new file mode 100644 index 0000000000..9d528080cf --- /dev/null +++ b/lib/linalg/dlaexc.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__3 = 3; +int dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *j1, integer *n1, integer *n2, doublereal *work, integer *info) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + doublereal d__1, d__2, d__3; + doublereal d__[16]; + integer k; + doublereal u[3], x[4]; + integer j2, j3, j4; + doublereal u1[3], u2[3]; + integer nd; + doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; + integer ierr; + doublereal temp; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + doublereal scale, dnorm, xnorm; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlasy2_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + doublereal thresh, smlnum; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + if (*n1 == 1 && *n2 == 1) { + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + d__1 = t22 - t11; + dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], ldt, &cs, &sn); + } + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } else { + nd = *n1 + *n2; + dlacpy_((char *)"Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4, (ftnlen)4); + dnorm = dlange_((char *)"Max", &nd, &nd, d__, &c__4, &work[1], (ftnlen)3); + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + d__1 = eps * 10. * dnorm; + thresh = max(d__1, smlnum); + dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + (*n1 + 1 << 2) - 5], + &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, &scale, x, &c__2, &xnorm, &ierr); + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + } + L10: + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + dlarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.; + t11 = t[*j1 + *j1 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[10] - t11, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j3 + j3 * t_dim1] = t11; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L20: + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + dlarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.; + t33 = t[j3 + j3 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[0] - t33, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + dlarfx_((char *)"R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[1], (ftnlen)1); + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.; + t[j3 + *j1 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L30: + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.; + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.; + dlarfx_((char *)"L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1], (ftnlen)1); + d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1, d__2), d__2 = abs(d__[3]), + d__1 = max(d__1, d__2), d__2 = abs(d__[7]); + if (max(d__1, d__2) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j4 + *j1 * t_dim1] = 0.; + t[j4 + j2 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + dlarfx_((char *)"R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + L40: + if (*n2 == 2) { + dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + *j1 * t_dim1], + &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) * t_dim1], ldt, &cs, + &sn); + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + if (*n1 == 2) { + j3 = *j1 + *n2; + j4 = j3 + 1; + dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * t_dim1], + &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) * t_dim1], ldt, &cs, + &sn); + } + i__1 = j3 - 1; + drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + } + return 0; +L50: + *info = 1; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahqr.cpp b/lib/linalg/dlahqr.cpp new file mode 100644 index 0000000000..c2f2775b9b --- /dev/null +++ b/lib/linalg/dlahqr.cpp @@ -0,0 +1,311 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + double sqrt(doublereal); + integer i__, j, k, l, m; + doublereal s, v[3]; + integer i1, i2; + doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; + integer nh; + doublereal sn; + integer nr; + doublereal tr; + integer nz; + doublereal det, h21s; + integer its; + doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer itmax; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); + doublereal safmin, safmax, rtdisc, smlnum; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + *info = 0; + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.; + h__[j + 3 + j * h_dim1] = 0.; + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.; + } + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)nh / ulp); + if (*wantt) { + i1 = 1; + i2 = *n; + } + itmax = max(10, nh) * 30; + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + i__1 = itmax; + for (its = 0; its <= i__1; ++its) { + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { + goto L40; + } + tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[k + k * h_dim1], abs(d__2)); + if (tst == 0.) { + if (k - 2 >= *ilo) { + tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); + } + if (k + 1 <= *ihi) { + tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); + } + } + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ab = max(d__3, d__4); + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ba = min(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + aa = max(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + bb = min(d__3, d__4); + s = aa + ab; + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1, d__2)) { + goto L40; + } + } + } + L40: + l = k; + if (l > *ilo) { + h__[l + (l - 1) * h_dim1] = 0.; + } + if (l >= i__ - 1) { + goto L150; + } + if (!(*wantt)) { + i1 = l; + i2 = i__; + } + if (its == 10) { + s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + + (d__2 = h__[l + 2 + (l + 1) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[l + l * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else if (its == 20) { + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else { + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = abs(h11) + abs(h12) + abs(h21) + abs(h22); + if (s == 0.) { + rt1r = 0.; + rt1i = 0.; + rt2r = 0.; + rt2i = 0.; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((abs(det))); + if (det >= 0.) { + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs(d__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.; + rt2i = 0.; + } + } + i__2 = l; + for (m = i__ - 2; m >= i__2; --m) { + h21s = h__[m + 1 + m * h_dim1]; + s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + abs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + + (h__[m + m * h_dim1] - rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - + rt1i * (rt2i / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v[0]) + abs(v[1]) + abs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + abs(v[2])) <= + ulp * abs(v[0]) * + ((d__2 = h__[m - 1 + (m - 1) * h_dim1], abs(d__2)) + + (d__3 = h__[m + m * h_dim1], abs(d__3)) + + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs(d__4)))) { + goto L60; + } + } + L60: + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + i__3 = 3, i__4 = i__ - k + 1; + nr = min(i__3, i__4); + if (k > m) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { + h__[k + (k - 1) * h_dim1] *= 1. - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; + } + i__4 = k + 3; + i__3 = min(i__4, i__); + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1] + + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; + } + } + } else if (nr == 2) { + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + } + } + } + } + } + *info = i__; + return 0; +L150: + if (l == i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], + &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], + &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); + if (*wantt) { + if (i2 > i__) { + i__1 = i2 - i__; + drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, + &h__[i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, + &sn); + } + if (*wantz) { + drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + i__ * z_dim1], &c__1, + &cs, &sn); + } + } + i__ = l - 1; + goto L20; +L160: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahr2.cpp b/lib/linalg/dlahr2.cpp new file mode 100644 index 0000000000..36264e950f --- /dev/null +++ b/lib/linalg/dlahr2.cpp @@ -0,0 +1,121 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b38 = 0.; +int dlahr2_(integer *n, integer *k, integer *nb, doublereal *a, integer *lda, doublereal *tau, + doublereal *t, integer *ldt, doublereal *y, integer *ldy) +{ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; + doublereal d__1; + integer i__; + doublereal ei; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), + dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + if (*n <= 1) { + return 0; + } + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, + &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"Transpose", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)4); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * t_dim1 + 1], &c__1, + (ftnlen)9); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[*nb * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)8); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)4); + i__2 = i__ - 1; + daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ * a_dim1], &c__1); + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + i__2 = *n - *k - i__ + 1; + i__3 = *k + i__ + 1; + dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*k + 1 + i__ * y_dim1], &c__1, + (ftnlen)12); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, &t[i__ * t_dim1 + 1], + &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], &c__1, (ftnlen)12); + i__2 = *n - *k; + dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + i__2 = i__ - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"No Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)8); + t[i__ + i__ * t_dim1] = tau[i__]; + } + a[*k + *nb + *nb * a_dim1] = ei; + dlacpy_((char *)"ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy, (ftnlen)3); + dtrmm_((char *)"RIGHT", (char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", k, nb, &c_b5, &a[*k + 1 + a_dim1], lda, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + dgemm_((char *)"NO TRANSPOSE", (char *)"NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + 2) * a_dim1 + 1], lda, + &a[*k + 1 + *nb + a_dim1], lda, &c_b5, &y[y_offset], ldy, (ftnlen)12, (ftnlen)12); + } + dtrmm_((char *)"RIGHT", (char *)"Upper", (char *)"NO TRANSPOSE", (char *)"NON-UNIT", k, nb, &c_b5, &t[t_offset], ldt, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)8); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaln2.cpp b/lib/linalg/dlaln2.cpp new file mode 100644 index 0000000000..220eaae63a --- /dev/null +++ b/lib/linalg/dlaln2.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, + doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, + integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + static logical zswap[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical rswap[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + static integer ipivot[16] = {1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, 3, 2, 1}; + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + integer j; +#define ci (equiv_0) +#define cr (equiv_1) + doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, cr22, li21, csi, ui11, + lr21, ui12, ui22; +#define civ (equiv_0) + doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + integer icmax; + doublereal bnorm, cnorm, smini; + extern doublereal dlamch_(char *, ftnlen); + extern int dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + doublereal bignum, smlnum; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + smlnum = 2. * dlamch_((char *)"Safe minimum", (ftnlen)12); + bignum = 1. / smlnum; + smini = max(*smin, smlnum); + *info = 0; + *scale = 1.; + if (*na == 1) { + if (*nw == 1) { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } + } else { + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + if (*nw == 1) { + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } + } + if (cmax < smini) { + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[b_dim1 + 2], abs(d__2)); + bnorm = max(d__3, d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2, d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)); + icmax = j; + } + } + if (cmax < smini) { + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)), + d__6 = (d__3 = b[b_dim1 + 2], abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5, d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))), + d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1, d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + return 0; +} +#undef crv +#undef civ +#undef cr +#undef ci +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlanv2.cpp b/lib/linalg/dlanv2.cpp new file mode 100644 index 0000000000..29a511bf31 --- /dev/null +++ b/lib/linalg/dlanv2.cpp @@ -0,0 +1,106 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b3 = 1.; +int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, + doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + doublereal d__1, d__2; + double d_lmp_sign(doublereal *, doublereal *), sqrt(doublereal); + doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, scale, bcmax, bcmis, + sigma; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + eps = dlamch_((char *)"P", (ftnlen)1); + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + } else if (*b == 0.) { + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + } else if (*a - *d__ == 0. && d_lmp_sign(&c_b3, b) != d_lmp_sign(&c_b3, c__)) { + *cs = 1.; + *sn = 0.; + } else { + temp = *a - *d__; + p = temp * .5; + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1, d__2); + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1, d__2) * d_lmp_sign(&c_b3, b) * d_lmp_sign(&c_b3, c__); + d__1 = abs(p); + scale = max(d__1, bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + if (z__ >= eps * 4.) { + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_lmp_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + tau = dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + sigma = *b + *c__; + tau = dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_lmp_sign(&c_b3, &sigma); + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + if (*c__ != 0.) { + if (*b != 0.) { + if (d_lmp_sign(&c_b3, b) == d_lmp_sign(&c_b3, c__)) { + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_lmp_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + } + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr0.cpp b/lib/linalg/dlaqr0.cpp new file mode 100644 index 0000000000..31a265c3e9 --- /dev/null +++ b/lib/linalg/dlaqr0.cpp @@ -0,0 +1,306 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr3_(logical *, logical *, integer *, integer *, integer *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, integer *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + if (ns > nmin) { + dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr1.cpp b/lib/linalg/dlaqr1.cpp new file mode 100644 index 0000000000..292dce0f45 --- /dev/null +++ b/lib/linalg/dlaqr1.cpp @@ -0,0 +1,52 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, + doublereal *sr2, doublereal *si2, doublereal *v) +{ + integer h_dim1, h_offset; + doublereal d__1, d__2, d__3; + doublereal s, h21s, h31s; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + if (*n != 2 && *n != 3) { + return 0; + } + if (*n == 2) { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + + (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2); + } + } else { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs(d__3)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + v[3] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s) + + h__[(h_dim1 << 1) + 1] * h21s + h__[h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2) + + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - *sr2) + + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr2.cpp b/lib/linalg/dlaqr2.cpp new file mode 100644 index 0000000000..102433a90d --- /dev/null +++ b/lib/linalg/dlaqr2.cpp @@ -0,0 +1,359 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b12 = 0.; +static doublereal c_b13 = 1.; +static logical c_true = TRUE_; +int dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2; + doublereal beta; + integer kend, kcol, info, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax; + extern int dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + lwkopt = jw + max(lwk1, lwk2); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b12, &c_b13, &v[v_offset], ldv, (ftnlen)1); + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b12, &c_b12, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b13, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b12, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr3.cpp b/lib/linalg/dlaqr3.cpp new file mode 100644 index 0000000000..5711a3e349 --- /dev/null +++ b/lib/linalg/dlaqr3.cpp @@ -0,0 +1,375 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static logical c_true = TRUE_; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__12 = 12; +int dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2, lwk3; + doublereal beta; + integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal safmax; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], &si[1], &c__1, &jw, + &v[v_offset], ldv, &work[1], &c_n1, &infqr); + lwk3 = (integer)work[1]; + i__1 = jw + max(lwk1, lwk2); + lwkopt = max(i__1, lwk3); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b17, &c_b18, &v[v_offset], ldv, (ftnlen)1); + nmin = ilaenv_(&c__12, (char *)"DLAQR3", (char *)"SV", &jw, &c__1, &jw, lwork, (ftnlen)6, (ftnlen)2); + if (jw > nmin) { + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &work[1], lwork, &infqr); + } else { + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + } + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b17, &c_b17, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b18, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b17, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr4.cpp b/lib/linalg/dlaqr4.cpp new file mode 100644 index 0000000000..e32193ee2d --- /dev/null +++ b/lib/linalg/dlaqr4.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlaqr2_(logical *, logical *, integer *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), + dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr5.cpp b/lib/linalg/dlaqr5.cpp new file mode 100644 index 0000000000..1cd0ac9d88 --- /dev/null +++ b/lib/linalg/dlaqr5.cpp @@ -0,0 +1,521 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = 0.; +static doublereal c_b8 = 1.; +static integer c__3 = 3; +static integer c__1 = 1; +static integer c__2 = 2; +int dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, + integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, + integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + doublereal *v, integer *ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) +{ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, wh_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4, d__5; + integer i__, j, k, m, i2, j2, i4, j4, k1; + doublereal h11, h12, h21, h22; + integer m22, ns, nu; + doublereal vt[3], scl; + integer kdu, kms; + doublereal ulp; + integer knz, kzs; + doublereal tst1, tst2, beta; + logical blk22, bmp22; + integer mend, jcol, jlen, jbot, mbot; + doublereal swap; + integer jtop, jrow, mtop; + doublereal alpha; + logical accum; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ndcol, incol, krcol, nbmps; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + dlaqr1_(integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax, refsum; + integer mstart; + doublereal smlnum; + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + if (*nshfts < 2) { + return 0; + } + if (*ktop >= *kbot) { + return 0; + } + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } + } + ns = *nshfts - *nshfts % 2; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + accum = *kacc22 == 1 || *kacc22 == 2; + blk22 = ns > 2 && *kacc22 == 2; + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.; + } + nbmps = ns / 2; + kdu = nbmps * 6 - 3; + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : incol <= i__1; + incol += i__2) { + ndcol = incol + kdu; + if (accum) { + dlaset_((char *)"ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu, (ftnlen)3); + } + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4, i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4, i__5); + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4, i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * h_dim1] != 0. || + h__[k + 3 + (k + 2) * h_dim1] == 0.) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], vt); + alpha = vt[0]; + dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = + vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * h__[k + 2 + k * h_dim1]); + if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], abs(d__1)) + + (d__2 = refsum * vt[2], abs(d__2)) > + ulp * ((d__3 = h__[k + k * h_dim1], abs(d__3)) + + (d__4 = h__[k + 1 + (k + 1) * h_dim1], abs(d__4)) + + (d__5 = h__[k + 2 + (k + 2) * h_dim1], abs(d__5)))) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m22 << 1) - 1], + &si[(m22 << 1) - 1], &sr[m22 * 2], &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + } + } + if (accum) { + jbot = min(ndcol, *kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop, krcol); j <= i__4; ++j) { + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5, i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + + v[m * v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4, *ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + if (accum) { + jtop = max(*ktop, incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.) { + k = krcol + (m - 1) * 3; + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6, i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = + v[m * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m * v_dim1 + 2] * h__[j + (k + 2) * h_dim1] + + v[m * v_dim1 + 3] * h__[j + (k + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + if (accum) { + kms = k - incol; + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4, i__6); j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (u[j + (kms + 1) * u_dim1] + + v[m * v_dim1 + 2] * u[j + (kms + 2) * u_dim1] + + v[m * v_dim1 + 3] * u[j + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } else if (*wantz) { + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (z__[j + (k + 1) * z_dim1] + + v[m * v_dim1 + 2] * z__[j + (k + 2) * z_dim1] + + v[m * v_dim1 + 3] * z__[j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (v[m22 * v_dim1 + 1] != 0.) { + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7, i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = + v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + if (accum) { + kms = k - incol; + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5, i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (u[j + (kms + 1) * u_dim1] + + v[m22 * v_dim1 + 2] * u[j + (kms + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (z__[j + (k + 1) * z_dim1] + + v[m22 * v_dim1 + 2] * z__[j + (k + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + } + } + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5, i__7); + if (h__[k + 1 + k * h_dim1] != 0.) { + tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + + (d__2 = h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 2) { + tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 3) { + tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 2) { + tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 3) { + tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 4) { + tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], abs(d__1)); + } + } + d__2 = smlnum, d__3 = ulp * tst1; + if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max(d__2, d__3)) { + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h12 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h21 = min(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h11 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h22 = min(d__3, d__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1, d__2)) { + h__[k + 1 + k * h_dim1] = 0.; + } + } + } + } + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4, i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (!blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3, i__4); + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3, i__4) - k1 + 1; + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__4 < 0 ? jcol >= i__3 : jcol <= i__3; + jcol += i__4) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"C", (char *)"N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * u_dim1], ldu, + &h__[incol + k1 + jcol * h_dim1], ldh, &c_b7, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &nu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + k1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__4 = max(*ktop, incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = max(*ktop, incol) - jrow; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &h__[jrow + (incol + k1) * h_dim1], + ldh, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + k1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &z__[jrow + (incol + k1) * z_dim1], + ldz, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + k1) * z_dim1], ldz, (ftnlen)3); + } + } + } else { + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__3 < 0 ? jcol >= i__4 : jcol <= i__4; + jcol += i__3) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, + &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)3); + dlaset_((char *)"ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &knz, &jlen, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"C", (char *)"N", &i2, &jlen, &j2, &c_b8, &u[u_offset], ldu, + &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1], ldh, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"L", (char *)"C", (char *)"N", &j2, &jlen, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"C", (char *)"N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + (i2 + 1) * u_dim1], + ldu, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, &c_b8, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &kdu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + 1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__3 = max(incol, *ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = max(incol, *ktop) - jrow; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &h__[jrow + (incol + 1) * h_dim1], ldh, + &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &h__[jrow + (incol + 1) * h_dim1], ldh, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + 1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, + &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &z__[jrow + (incol + 1) * z_dim1], + ldz, &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &z__[jrow + (incol + 1) * z_dim1], ldz, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], + ldu, &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + 1) * z_dim1], ldz, (ftnlen)3); + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlarfx.cpp b/lib/linalg/dlarfx.cpp new file mode 100644 index 0000000000..44d73f27a9 --- /dev/null +++ b/lib/linalg/dlarfx.cpp @@ -0,0 +1,552 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlarfx_(char *side, integer *m, integer *n, doublereal *v, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work, ftnlen side_len) +{ + integer c_dim1, c_offset, i__1; + integer j; + doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7, v8, v9, t10, v10, + sum; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + if (*tau == 0.) { + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + switch (*m) { + case 1: + goto L10; + case 2: + goto L30; + case 3: + goto L50; + case 4: + goto L70; + case 5: + goto L90; + case 6: + goto L110; + case 7: + goto L130; + case 8: + goto L150; + case 9: + goto L170; + case 10: + goto L190; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L10: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; + } + goto L410; + L30: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + } + goto L410; + L50: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + } + goto L410; + L70: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + } + goto L410; + L90: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + } + goto L410; + L110: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + } + goto L410; + L130: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + } + goto L410; + L150: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + } + goto L410; + L170: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + } + goto L410; + L190: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9] + + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; + } + goto L410; + } else { + switch (*n) { + case 1: + goto L210; + case 2: + goto L230; + case 3: + goto L250; + case 4: + goto L270; + case 5: + goto L290; + case 6: + goto L310; + case 7: + goto L330; + case 8: + goto L350; + case 9: + goto L370; + case 10: + goto L390; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L210: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; + } + goto L410; + L230: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + } + goto L410; + L250: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + } + goto L410; + L270: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + } + goto L410; + L290: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + } + goto L410; + L310: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + } + goto L410; + L330: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + } + goto L410; + L350: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + } + goto L410; + L370: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + } + goto L410; + L390: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9] + + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; + } + goto L410; + } +L410: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd0.cpp b/lib/linalg/dlasd0.cpp new file mode 100644 index 0000000000..006c379fa9 --- /dev/null +++ b/lib/linalg/dlasd0.cpp @@ -0,0 +1,143 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static integer c__2 = 2; +int dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, integer *smlsiz, integer *iwork, doublereal *work, + integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + integer pow_lmp_ii(integer *, integer *); + integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern int dlasd1_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), + xerbla_(char *, integer *, ftnlen); + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + m = *n + *sqre; + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD0", &i__1, (ftnlen)6); + return 0; + } + if (*n <= *smlsiz) { + dlasdq_((char *)"U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, + &u[u_offset], ldu, &work[1], info, (ftnlen)1); + return 0; + } + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + dlasdq_((char *)"U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[nlf + nlf * vt_dim1], + ldvt, &u[nlf + nlf * u_dim1], ldu, &u[nlf + nlf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + dlasdq_((char *)"U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[nrf + nrf * vt_dim1], + ldvt, &u[nrf + nrf * u_dim1], ldu, &u[nrf + nrf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; + } + } + for (lvl = nlvl; lvl >= 1; --lvl) { + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_lmp_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * u_dim1], ldu, + &vt[nlf + nlf * vt_dim1], ldvt, &iwork[idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd1.cpp b/lib/linalg/dlasd1.cpp new file mode 100644 index 0000000000..e7b7fba747 --- /dev/null +++ b/lib/linalg/dlasd1.cpp @@ -0,0 +1,96 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; +int dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + integer *idxq, integer *iwork, doublereal *work, integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc, idxp, ldvt2; + extern int dlasd2_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), + dlasd3_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *); + integer isigma; + extern int xerbla_(char *, integer *, ftnlen); + doublereal orgnrm; + integer coltyp; + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD1", &i__1, (ftnlen)6); + return 0; + } + n = *nl + *nr + 1; + m = n + *sqre; + ldu2 = n; + ldvt2 = m; + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1, d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + *alpha /= orgnrm; + *beta /= orgnrm; + dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], ldu, &vt[vt_offset], + ldvt, &work[isigma], &work[iu2], &ldu2, &work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], + &iwork[idxc], &idxq[1], &iwork[coltyp], info); + ldq = k; + dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[u_offset], ldu, + &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ivt2], &ldvt2, &iwork[idxc], + &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + n1 = k; + n2 = n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd2.cpp b/lib/linalg/dlasd2.cpp new file mode 100644 index 0000000000..36562850e0 --- /dev/null +++ b/lib/linalg/dlasd2.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b30 = 0.; +int dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, + integer *coltyp, integer *info) +{ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + doublereal c__; + integer i__, j, m, n; + doublereal s; + integer k2; + doublereal z1; + integer ct, jp; + doublereal eps, tau, tol; + integer psm[4], nlp1, nlp2, idxi, idxj; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer ctot[4], idxjp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer jprev; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + doublereal hlftol; + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD2", &i__1, (ftnlen)6); + return 0; + } + nlp1 = *nl + 1; + nlp2 = *nl + 2; + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; + } + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; + } + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; + } + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1, d__2); + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2, tol); + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + s = z__[jprev]; + c__ = z__[j]; + tau = dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &c__1, &c__, &s); + drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; +L120: + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; + } + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); + } + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + i__1 = *k - 1; + dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + dlaset_((char *)"A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2, (ftnlen)1); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; + } + } else { + dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + if (n > *k) { + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + dlacpy_((char *)"A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu, + (ftnlen)1); + i__1 = n - *k; + dlacpy_((char *)"A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt, + (ftnlen)1); + } + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd3.cpp b/lib/linalg/dlasd3.cpp new file mode 100644 index 0000000000..745c613e08 --- /dev/null +++ b/lib/linalg/dlasd3.cpp @@ -0,0 +1,218 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b13 = 1.; +static doublereal c_b26 = 0.; +int dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, + integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, + integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, + vt2_offset, i__1, i__2; + doublereal d__1, d__2; + double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *); + integer i__, j, m, n, jc; + doublereal rho; + integer nlp1, nlp2, nrp1; + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ctemp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer ktemp; + extern doublereal dlamc3_(doublereal *, doublereal *); + extern int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD3", &i__1, (ftnlen)6); + return 0; + } + if (*k == 1) { + d__[1] = abs(z__[1]); + dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; + } + } + return 0; + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; + } + dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_((char *)"G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info, (ftnlen)1); + rho *= rho; + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], + info); + if (*info != 0) { + return 0; + } + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[i__] - dsigma[j]) / + (dsigma[i__] + dsigma[j]); + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / + (dsigma[i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_lmp_sign(&d__2, &q[i__ + q_dim1]); + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; + } + temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + goto L100; + } + if (ctot[1] > 0) { + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], + ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } else { + dlacpy_((char *)"F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu, (ftnlen)1); + } + dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + dgemm_((char *)"N", (char *)"N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], + ldq, &c_b26, &u[nlp2 + u_dim1], ldu, (ftnlen)1, (ftnlen)1); +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset], ldvt2, &c_b26, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + return 0; + } + ktemp = ctot[1] + 1; + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[vt2_dim1 + 1], ldvt2, + &c_b26, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + } + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; + } + } + ctemp = ctot[2] + 1 + ctot[3]; + dgemm_((char *)"N", (char *)"N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt, (ftnlen)1, + (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasy2.cpp b/lib/linalg/dlasy2.cpp new file mode 100644 index 0000000000..94e9ed0e7c --- /dev/null +++ b/lib/linalg/dlasy2.cpp @@ -0,0 +1,284 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__4 = 4; +static integer c__1 = 1; +static integer c__16 = 16; +static integer c__0 = 0; +int dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, + doublereal *tl, integer *ldtl, doublereal *tr, integer *ldtr, doublereal *b, + integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, + integer *info) +{ + static integer locu12[4] = {3, 4, 1, 2}; + static integer locl21[4] = {2, 1, 4, 3}; + static integer locu22[4] = {4, 3, 2, 1}; + static logical xswpiv[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical bswpiv[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + integer i__, j, k; + doublereal x2[2], l21, u11, u12; + integer ip, jp; + doublereal u22, t16[16], gam, bet, eps, sgn, tmp[4], tau1, btmp[4], smin; + integer ipiv; + doublereal temp; + integer jpiv[4]; + doublereal xmax; + integer ipsv, jpsv; + logical bswap; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical xswap; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal smlnum; + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + *info = 0; + if (*n1 == 0 || *n2 == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + sgn = (doublereal)(*isgn); + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + case 4: + goto L50; + } +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = abs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + *scale = 1.; + gam = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (smlnum * gam > bet) { + *scale = 1. / gam; + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + return 0; +L20: + d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tr[(tr_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tr[tr_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tr[(tr_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; +L30: + d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tl[(tl_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tl[tl_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tl[(tl_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + ipiv = idamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (abs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (abs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.; + if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > abs(u11)) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); + *scale = .5 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } else { + x[x_dim1 + 2] = x2[1]; + d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2], abs(d__2)); + *xnorm = max(d__3, d__4); + } + return 0; +L50: + d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << 1) + 1], abs(d__2)), + d__5 = max(d__5, d__6), d__6 = (d__3 = tr[tr_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tr[(tr_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, d__6), + d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = max(d__5, d__6), + d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__1 = eps * smin; + smin = max(d__1, smlnum); + btmp[0] = 0.; + dcopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { + xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); + ipsv = ip; + jpsv = jp; + } + } + } + if (ipsv != i__) { + dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (k << 2) - 5]; + } + } + } + if (abs(t16[15]) < smin) { + *info = 1; + t16[15] = smin; + } + *scale = 1.; + if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) > abs(t16[5]) || + smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1, d__2), d__2 = abs(btmp[2]), + d__1 = max(d__1, d__2), d__2 = abs(btmp[3]); + *scale = .125 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1. / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; + } + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; + d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); + *xnorm = max(d__1, d__2); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasyf.cpp b/lib/linalg/dlasyf.cpp new file mode 100644 index 0000000000..aaafd1a88f --- /dev/null +++ b/lib/linalg/dlasyf.cpp @@ -0,0 +1,337 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b8 = -1.; +static doublereal c_b9 = 1.; +int dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, + integer *ipiv, doublereal *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer j, k; + doublereal t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + absakk = (d__1 = w[k + kw * w_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kk - 1 - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + dcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + dswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d21 = w[k - 1 + kw * w_dim1]; + d11 = w[k + kw * w_dim1] / d21; + d22 = w[k - 1 + (kw - 1) * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + a[j + (k - 1) * a_dim1] = + d21 * (d11 * w[j + (kw - 1) * w_dim1] - w[j + kw * w_dim1]); + a[j + k * a_dim1] = + d21 * (d22 * w[j + kw * w_dim1] - w[j + (kw - 1) * w_dim1]); + } + } + a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1]; + a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1]; + a[k + k * a_dim1] = w[k + kw * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + dgemv_((char *)"No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b9, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &c_b8, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b9, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b9, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + absakk = (d__1 = w[k + k * w_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + colmax = (d__1 = w[imax + k * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b9, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kp - kk - 1; + dcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + dswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = w[k + 1 + k * w_dim1]; + d11 = w[k + 1 + (k + 1) * w_dim1] / d21; + d22 = w[k + k * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + a[j + k * a_dim1] = + d21 * (d11 * w[j + k * w_dim1] - w[j + (k + 1) * w_dim1]); + a[j + (k + 1) * a_dim1] = + d21 * (d22 * w[j + (k + 1) * w_dim1] - w[j + k * w_dim1]); + } + } + a[k + k * a_dim1] = w[k + k * w_dim1]; + a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1]; + a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + dgemv_((char *)"No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b9, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &c_b8, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b9, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dorghr.cpp b/lib/linalg/dorghr.cpp new file mode 100644 index 0000000000..80ffa7dbc5 --- /dev/null +++ b/lib/linalg/dorghr.cpp @@ -0,0 +1,94 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +int dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, nb, nh, iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, nh) && !lquery) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DORGQR", (char *)" ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1, nh) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGHR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + work[1] = 1.; + return 0; + } + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + if (nh > 0) { + dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*ilo], &work[1], lwork, + &iinfo); + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dormhr.cpp b/lib/linalg/dormhr.cpp new file mode 100644 index 0000000000..9cb0cd6690 --- /dev/null +++ b/lib/linalg/dormhr.cpp @@ -0,0 +1,111 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen trans_len) +{ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i1, i2, nb, mi, nh, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (!left && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1, nq)) { + *info = -5; + } else if (*ihi < min(*ilo, nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1, nq)) { + *info = -8; + } else if (*ldc < max(1, *m)) { + *info = -11; + } else if (*lwork < max(1, nw) && !lquery) { + *info = -13; + } + if (*info == 0) { + if (left) { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } else { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } + lwkopt = max(1, nw) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DORMHR", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.; + return 0; + } + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &tau[*ilo], + &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyconv.cpp b/lib/linalg/dsyconv.cpp new file mode 100644 index 0000000000..9d4a2908ae --- /dev/null +++ b/lib/linalg/dsyconv.cpp @@ -0,0 +1,199 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dsyconv_(char *uplo, char *way, integer *n, doublereal *a, integer *lda, integer *ipiv, + doublereal *e, integer *info, ftnlen uplo_len, ftnlen way_len) +{ + integer a_dim1, a_offset, i__1; + integer i__, j, ip; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical convert; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --e; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + convert = lsame_(way, (char *)"C", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!convert && !lsame_(way, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYCONV", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + if (convert) { + i__ = *n; + e[1] = 0.; + while (i__ > 1) { + if (ipiv[i__] < 0) { + e[i__] = a[i__ - 1 + i__ * a_dim1]; + e[i__ - 1] = 0.; + a[i__ - 1 + i__ * a_dim1] = 0.; + --i__; + } else { + e[i__] = 0.; + } + --i__; + } + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + --i__; + } + --i__; + } + } else { + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + ++i__; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + } + ++i__; + } + i__ = *n; + while (i__ > 1) { + if (ipiv[i__] < 0) { + a[i__ - 1 + i__ * a_dim1] = e[i__]; + --i__; + } + --i__; + } + } + } else { + if (convert) { + i__ = 1; + e[*n] = 0.; + while (i__ <= *n) { + if (i__ < *n && ipiv[i__] < 0) { + e[i__] = a[i__ + 1 + i__ * a_dim1]; + e[i__ + 1] = 0.; + a[i__ + 1 + i__ * a_dim1] = 0.; + ++i__; + } else { + e[i__] = 0.; + } + ++i__; + } + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = temp; + } + } + ++i__; + } + ++i__; + } + } else { + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + --i__; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } + --i__; + } + i__ = 1; + while (i__ <= *n - 1) { + if (ipiv[i__] < 0) { + a[i__ + 1 + i__ * a_dim1] = e[i__]; + ++i__; + } + ++i__; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyr.cpp b/lib/linalg/dsyr.cpp new file mode 100644 index 0000000000..6806baea29 --- /dev/null +++ b/lib/linalg/dsyr.cpp @@ -0,0 +1,167 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +int dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, + integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"DSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +int dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dsytrs2_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + lquery = *lwork == -1; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } else if (*lwork < 1 && !lquery) { + *info = -10; + } + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, info, (ftnlen)1); + lwkopt = (integer)work[1]; + } + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYSV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info, (ftnlen)1); + if (*info == 0) { + if (*lwork < *n) { + dsytrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info, (ftnlen)1); + } else { + dsytrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, &work[1], info, + (ftnlen)1); + } + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytf2.cpp b/lib/linalg/dsytf2.cpp new file mode 100644 index 0000000000..8b48de1da4 --- /dev/null +++ b/lib/linalg/dsytf2.cpp @@ -0,0 +1,246 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dsytf2_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer i__, j, k; + doublereal t, r1, d11, d12, d21, d22; + integer kk, kp; + doublereal wk, wkm1, wkp1; + integer imax, jmax; + extern int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + logical upper; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + dswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k - 1 + k * a_dim1]; + a[k - 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + d__1 = -r1; + dsyr_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d12 = a[k - 1 + k * a_dim1]; + d22 = a[k - 1 + (k - 1) * a_dim1] / d12; + d11 = a[k + k * a_dim1] / d12; + t = 1. / (d11 * d22 - 1.); + d12 = t / d12; + for (j = k - 2; j >= 1; --j) { + wkm1 = d12 * (d11 * a[j + (k - 1) * a_dim1] - a[j + k * a_dim1]); + wk = d12 * (d22 * a[j + k * a_dim1] - a[j + (k - 1) * a_dim1]); + for (i__ = j; i__ >= 1; --i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k - 1) * a_dim1] * wkm1; + } + a[j + k * a_dim1] = wk; + a[j + (k - 1) * a_dim1] = wkm1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &a[imax + k * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + dswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k + 1 + k * a_dim1]; + a[k + 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + if (k < *n) { + d11 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + d__1 = -d11; + dsyr_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + dscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = a[k + 1 + k * a_dim1]; + d11 = a[k + 1 + (k + 1) * a_dim1] / d21; + d22 = a[k + k * a_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + wk = d21 * (d11 * a[j + k * a_dim1] - a[j + (k + 1) * a_dim1]); + wkp1 = d21 * (d22 * a[j + (k + 1) * a_dim1] - a[j + k * a_dim1]); + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k + 1) * a_dim1] * wkp1; + } + a[j + k * a_dim1] = wk; + a[j + (k + 1) * a_dim1] = wkp1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrf.cpp b/lib/linalg/dsytrf.cpp new file mode 100644 index 0000000000..6bfc84ab87 --- /dev/null +++ b/lib/linalg/dsytrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dsytrf_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, + integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int dsytf2_(char *, integer *, doublereal *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlasyf_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + dlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], &ldwork, &iinfo, + (ftnlen)1); + } else { + dsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + dlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], &ldwork, + &iinfo, (ftnlen)1); + } else { + i__1 = *n - k + 1; + dsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs.cpp b/lib/linalg/dsytrs.cpp new file mode 100644 index 0000000000..c9f849879b --- /dev/null +++ b/lib/linalg/dsytrs.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b19 = 1.; +int dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1; + extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L30; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + --k; + } else { + kp = -ipiv[k]; + if (kp != k - 1) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k - 1 + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + akm1k = a[k - 1 + k * a_dim1]; + akm1 = a[k - 1 + (k - 1) * a_dim1] / akm1k; + ak = a[k + k * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k - 1 + j * b_dim1] / akm1k; + bk = b[k + j * b_dim1] / akm1k; + b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += -2; + } + goto L10; + L30: + k = 1; + L40: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[(k + 1) * a_dim1 + 1], + &c__1, &c_b19, &b[k + 1 + b_dim1], ldb, (ftnlen)9); + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + goto L40; + L50:; + } else { + k = 1; + L60: + if (k > *n) { + goto L80; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n) { + i__1 = *n - k; + dger_(&i__1, nrhs, &c_b7, &a[k + 1 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 1 + b_dim1], ldb); + } + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + ++k; + } else { + kp = -ipiv[k]; + if (kp != k + 1) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n - 1) { + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 2 + b_dim1], ldb); + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + (k + 1) * a_dim1], &c__1, &b[k + 1 + b_dim1], + ldb, &b[k + 2 + b_dim1], ldb); + } + akm1k = a[k + 1 + k * a_dim1]; + akm1 = a[k + k * a_dim1] / akm1k; + ak = a[k + 1 + (k + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k + j * b_dim1] / akm1k; + bk = b[k + 1 + j * b_dim1] / akm1k; + b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += 2; + } + goto L60; + L80: + k = *n; + L90: + if (k < 1) { + goto L100; + } + if (ipiv[k] > 0) { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + } + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b19, &b[k - 1 + b_dim1], ldb, + (ftnlen)9); + } + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + goto L90; + L100:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs2.cpp b/lib/linalg/dsytrs2.cpp new file mode 100644 index 0000000000..2d2bc90525 --- /dev/null +++ b/lib/linalg/dsytrs2.cpp @@ -0,0 +1,180 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b10 = 1.; +int dsytrs2_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer i__, j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1, akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + integer iinfo; + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + dsyconv_(char *, char *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + dsyconv_(uplo, (char *)"C", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + if (upper) { + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + dtrsm_((char *)"L", (char *)"U", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else if (i__ > 1) { + if (ipiv[i__ - 1] == ipiv[i__]) { + akm1k = work[i__]; + akm1 = a[i__ - 1 + (i__ - 1) * a_dim1] / akm1k; + ak = a[i__ + i__ * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ - 1 + j * b_dim1] / akm1k; + bk = b[i__ + j * b_dim1] / akm1k; + b[i__ - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + --i__; + } + } + --i__; + } + dtrsm_((char *)"L", (char *)"U", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k]; + if (k < *n && kp == -ipiv[k + 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + } else { + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k + 1]; + if (kp == -ipiv[k]) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else { + akm1k = work[i__]; + akm1 = a[i__ + i__ * a_dim1] / akm1k; + ak = a[i__ + 1 + (i__ + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ + j * b_dim1] / akm1k; + bk = b[i__ + 1 + j * b_dim1] / akm1k; + b[i__ + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + ++i__; + } + ++i__; + } + dtrsm_((char *)"L", (char *)"L", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (k > 1 && kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + } + dsyconv_(uplo, (char *)"R", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrevc3.cpp b/lib/linalg/dtrevc3.cpp new file mode 100644 index 0000000000..bd1a0a379e --- /dev/null +++ b/lib/linalg/dtrevc3.cpp @@ -0,0 +1,858 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static logical c_false = FALSE_; +static doublereal c_b29 = 1.; +static logical c_true = TRUE_; +int dtrevc3_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen howmny_len) +{ + address a__1[2]; + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1[2], i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + double sqrt(doublereal); + integer i__, j, k; + doublereal x[4]; + integer j1, j2, iscomplex[128], nb, ii, ki, ip, is, iv; + doublereal wi, wr; + integer ki2; + doublereal rec, ulp, beta, emax; + logical pair; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); + logical allv; + integer ierr; + doublereal unfl, ovfl, smin; + logical over; + doublereal vmax; + integer jnxt; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + doublereal scale; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); + doublereal remax; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv, bothv; + extern int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); + doublereal vcrit; + logical somev; + doublereal xnorm; + extern int dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal bignum; + logical rightv; + integer maxwrk; + doublereal smlnum; + logical lquery; + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + bothv = lsame_(side, (char *)"B", (ftnlen)1, (ftnlen)1); + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1) || bothv; + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) || bothv; + allv = lsame_(howmny, (char *)"A", (ftnlen)1, (ftnlen)1); + over = lsame_(howmny, (char *)"B", (ftnlen)1, (ftnlen)1); + somev = lsame_(howmny, (char *)"S", (ftnlen)1, (ftnlen)1); + *info = 0; + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = howmny; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DTREVC", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = *n + (*n << 1) * nb; + work[1] = (doublereal)maxwrk; + lquery = *lwork == -1; + if (!rightv && !leftv) { + *info = -1; + } else if (!allv && !over && !somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1, *n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + i__2 = 1, i__3 = *n * 3; + if (*lwork < max(i__2, i__3) && !lquery) { + *info = -14; + } else { + if (somev) { + *m = 0; + pair = FALSE_; + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } + } + } else { + *m = *n; + } + if (*mm < *m) { + *info = -11; + } + } + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DTREVC3", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + if (over && *lwork >= *n + (*n << 4)) { + nb = (*lwork - *n) / (*n << 1); + nb = min(nb, 128); + i__2 = (nb << 1) + 1; + dlaset_((char *)"F", n, &i__2, &c_b17, &c_b17, &work[1], n, (ftnlen)1); + } else { + nb = 1; + } + unfl = dlamch_((char *)"Safe minimum", (ftnlen)12); + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + work[1] = 0.; + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + work[j] = 0.; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); + } + } + if (rightv) { + iv = 2; + if (nb > 2) { + iv = nb; + } + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + if (ip == -1) { + ip = 1; + goto L140; + } else if (ki == 1) { + ip = 0; + } else if (t[ki + (ki - 1) * t_dim1] == 0.) { + ip = 0; + } else { + ip = -1; + } + if (somev) { + if (ip == 0) { + if (!select[ki]) { + goto L140; + } + } else { + if (!select[ki - 1]) { + goto L140; + } + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + work[k + iv * *n] = -t[k + ki * t_dim1]; + } + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + iv * *n] = x[0]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + iv * *n], n, &wr, &c_b17, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + iv * *n] = x[0]; + work[j + iv * *n] = x[1]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L60:; + } + if (!over) { + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 1) { + i__2 = ki - 1; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= + (d__2 = t[ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + (iv - 1) * *n] = 1.; + work[ki + iv * *n] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + (iv - 1) * *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + iv * *n] = 1.; + } + work[ki + (iv - 1) * *n] = 0.; + work[ki - 1 + iv * *n] = 0.; + i__2 = ki - 2; + for (k = 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = + -work[ki - 1 + (iv - 1) * *n] * t[k + (ki - 1) * t_dim1]; + work[k + iv * *n] = -work[ki + iv * *n] * t[k + ki * t_dim1]; + } + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + (iv - 1) * *n], n, &wr, &wi, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + (iv - 1) * *n] = x[0]; + work[j + iv * *n] = x[2]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 1; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + (iv - 1) * *n], n, &wr, &wi, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + (iv - 1) * *n] = x[0]; + work[j + (iv - 1) * *n] = x[1]; + work[j - 1 + iv * *n] = x[2]; + work[j + iv * *n] = x[3]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[(iv - 1) * *n + 1], &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[3]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L90:; + } + if (!over) { + dcopy_(&ki, &work[(iv - 1) * *n + 1], &c__1, &vr[(is - 1) * vr_dim1 + 1], + &c__1); + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + emax = 0.; + i__2 = ki; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + is * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 2) { + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, + &work[(iv - 1) * *n + 1], &c__1, &work[ki - 1 + (iv - 1) * *n], + &vr[(ki - 1) * vr_dim1 + 1], &c__1, (ftnlen)1); + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki - 1 + (iv - 1) * *n], &vr[(ki - 1) * vr_dim1 + 1], + &c__1); + dscal_(n, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1); + } + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + ki * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = 0.; + work[k + iv * *n] = 0.; + } + iscomplex[iv - 2] = -ip; + iscomplex[iv - 1] = ip; + --iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki - 1; + } + if (iv <= 2 || ki2 == 1) { + i__2 = nb - iv + 1; + i__3 = ki2 + nb - iv; + dgemm_((char *)"N", (char *)"N", n, &i__2, &i__3, &c_b29, &vr[vr_offset], ldvr, + &work[iv * *n + 1], n, &c_b17, &work[(nb + iv) * *n + 1], n, (ftnlen)1, + (ftnlen)1); + i__2 = nb; + for (k = iv; k <= i__2; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__3 = *n; + for (ii = 1; ii <= i__3; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + i__2 = nb - iv + 1; + dlacpy_((char *)"F", n, &i__2, &work[(nb + iv) * *n + 1], n, &vr[ki2 * vr_dim1 + 1], + ldvr, (ftnlen)1); + iv = nb; + } else { + --iv; + } + } + --is; + if (ip != 0) { + --is; + } + L140:; + } + } + if (leftv) { + iv = 1; + ip = 0; + is = 1; + i__2 = *n; + for (ki = 1; ki <= i__2; ++ki) { + if (ip == 1) { + ip = -1; + goto L260; + } else if (ki == *n) { + ip = 0; + } else if (t[ki + 1 + ki * t_dim1] == 0.) { + ip = 0; + } else { + ip = 1; + } + if (somev) { + if (!select[ki]) { + goto L260; + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__3 = *n; + for (k = ki + 1; k <= i__3; ++k) { + work[k + iv * *n] = -t[ki + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 1; + i__3 = *n; + for (j = ki + 1; j <= i__3; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + d__2 = (d__1 = work[j + iv * *n], abs(d__1)); + vmax = max(d__2, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + i__4 = j - ki - 1; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 1 + (j + 1) * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + 1 + iv * *n] = x[1]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + 1 + iv * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } + L170:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + ii = idamax_(&i__3, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n) { + i__3 = *n - ki; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 1) * vl_dim1 + 1], ldvl, + &work[ki + 1 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= + (d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + iv * *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + (iv + 1) * *n] = 1.; + } else { + work[ki + iv * *n] = 1.; + work[ki + 1 + (iv + 1) * *n] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + iv * *n] = 0.; + work[ki + (iv + 1) * *n] = 0.; + i__3 = *n; + for (k = ki + 2; k <= i__3; ++k) { + work[k + iv * *n] = -work[ki + iv * *n] * t[ki + k * t_dim1]; + work[k + (iv + 1) * *n] = + -work[ki + 1 + (iv + 1) * *n] * t[ki + 1 + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 2; + i__3 = *n; + for (j = ki + 2; j <= i__3; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + (iv + 1) * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + (iv + 1) * *n] -= + ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + work[j + 1 + iv * *n] = x[1]; + work[j + 1 + (iv + 1) * *n] = x[3]; + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, d__2), + d__2 = abs(x[1]), d__1 = max(d__1, d__2), d__2 = abs(x[3]), + d__1 = max(d__1, d__2); + vmax = max(d__1, vmax); + vcrit = bignum / vmax; + } + L200:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + (iv + 1) * *n], &c__1, &vl[ki + (is + 1) * vl_dim1], + &c__1); + emax = 0.; + i__3 = *n; + for (k = ki; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (is + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n - 1) { + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + (iv + 1) * *n], &c__1, &work[ki + 1 + (iv + 1) * *n], + &vl[(ki + 1) * vl_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki + iv * *n], &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &work[ki + 1 + (iv + 1) * *n], &vl[(ki + 1) * vl_dim1 + 1], + &c__1); + } + emax = 0.; + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (ki + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + work[k + (iv + 1) * *n] = 0.; + } + iscomplex[iv - 1] = ip; + iscomplex[iv] = -ip; + ++iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki + 1; + } + if (iv >= nb - 1 || ki2 == *n) { + i__3 = *n - ki2 + iv; + dgemm_((char *)"N", (char *)"N", n, &iv, &i__3, &c_b29, &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, + &work[ki2 - iv + 1 + *n], n, &c_b17, &work[(nb + 1) * *n + 1], n, + (ftnlen)1, (ftnlen)1); + i__3 = iv; + for (k = 1; k <= i__3; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__4 = *n; + for (ii = 1; ii <= i__4; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + dlacpy_((char *)"F", n, &iv, &work[(nb + 1) * *n + 1], n, + &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, (ftnlen)1); + iv = 1; + } else { + ++iv; + } + } + ++is; + if (ip != 0) { + ++is; + } + L260:; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrexc.cpp b/lib/linalg/dtrexc.cpp new file mode 100644 index 0000000000..07568d6ed2 --- /dev/null +++ b/lib/linalg/dtrexc.cpp @@ -0,0 +1,217 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__2 = 2; +int dtrexc_(char *compq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *ifst, integer *ilst, doublereal *work, integer *info, ftnlen compq_len) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + integer nbf, nbl, here; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical wantq; + extern int dlaexc_(logical *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + integer nbnext; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + wantq = lsame_(compq, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantq && !lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1, *n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1, *n)) { + *info = -6; + } else if ((*ifst < 1 || *ifst > *n) && *n > 0) { + *info = -7; + } else if ((*ilst < 1 || *ilst > *n) && *n > 0) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTREXC", &i__1, (ftnlen)6); + return 0; + } + if (*n <= 1) { + return 0; + } + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { + nbf = 2; + } + } + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { + nbl = 2; + } + } + if (*ifst == *ilst) { + return 0; + } + if (*ifst < *ilst) { + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + here = *ifst; + L10: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { + nbnext = 2; + } + } + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbf, &nbnext, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + ++here; + } else { + if (t[here + 2 + (here + 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + } else { + here = *ifst; + L20: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &nbf, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbnext, &c__1, + &work[1], info); + --here; + } else { + if (t[here + (here - 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__2, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrtrs.cpp b/lib/linalg/dtrtrs.cpp new file mode 100644 index 0000000000..3ef3eac882 --- /dev/null +++ b/lib/linalg/dtrtrs.cpp @@ -0,0 +1,65 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b12 = 1.; +int dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, + integer *lda, doublereal *b, integer *ldb, integer *info, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1, *n)) { + *info = -7; + } else if (*ldb < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTRTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } + } + } + *info = 0; + dtrsm_((char *)"Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[b_offset], ldb, + (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/izamax.cpp b/lib/linalg/izamax.cpp new file mode 100644 index 0000000000..1aebf6ac52 --- /dev/null +++ b/lib/linalg/izamax.cpp @@ -0,0 +1,46 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +integer izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + integer ret_val, i__1; + integer i__, ix; + doublereal dmax__; + extern doublereal dcabs1_(doublecomplex *); + --zx; + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + dmax__ = dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[i__]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[i__]); + } + } + } else { + ix = 1; + dmax__ = dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[ix]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[ix]); + } + ix += *incx; + } + } + return ret_val; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zcop.cpp b/lib/linalg/zcop.cpp new file mode 100644 index 0000000000..4ec6ae0b78 --- /dev/null +++ b/lib/linalg/zcop.cpp @@ -0,0 +1,43 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zcopy_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + integer i__1, i__2, i__3; + integer i__, ix, iy; + --zy; + --zx; + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zdotu.cpp b/lib/linalg/zdotu.cpp new file mode 100644 index 0000000000..1b284d12c6 --- /dev/null +++ b/lib/linalg/zdotu.cpp @@ -0,0 +1,55 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +VOID zdotu_(doublecomplex *ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, + integer *incy) +{ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + integer i__, ix, iy; + doublecomplex ztemp; + --zy; + --zx; + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; + } + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf.cpp b/lib/linalg/zgetrf.cpp new file mode 100644 index 0000000000..5fb9182b87 --- /dev/null +++ b/lib/linalg/zgetrf.cpp @@ -0,0 +1,90 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zgetrf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, iinfo; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *), + zgetrf2_(integer *, integer *, doublecomplex *, integer *, integer *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF", &i__1, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= min(*m, *n)) { + zgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + i__1 = min(*m, *n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = min(*m, *n) - j + 1; + jb = min(i__3, nb); + i__3 = *m - j + 1; + zgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4, i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; + } + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (j + jb <= *m) { + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &z__1, + &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf2.cpp b/lib/linalg/zgetrf2.cpp new file mode 100644 index 0000000000..805b5810bc --- /dev/null +++ b/lib/linalg/zgetrf2.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zgetrf2_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + double z_lmp_abs(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, n1, n2; + doublecomplex temp; + integer iinfo; + doublereal sfmin; + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen); + extern doublereal dlamch_(char *, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + extern integer izamax_(integer *, doublecomplex *, integer *); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF2", &i__1, (ftnlen)7); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + ipiv[1] = 1; + i__1 = a_dim1 + 1; + if (a[i__1].r == 0. && a[i__1].i == 0.) { + *info = 1; + } + } else if (*n == 1) { + sfmin = dlamch_((char *)"S", (ftnlen)1); + i__ = izamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + i__1 = i__ + a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0.) { + if (i__ != 1) { + i__1 = a_dim1 + 1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = a_dim1 + 1; + i__2 = i__ + a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = i__ + a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + if (z_lmp_abs(&a[a_dim1 + 1]) >= sfmin) { + i__1 = *m - 1; + z_lmp_div(&z__1, &c_b1, &a[a_dim1 + 1]); + zscal_(&i__1, &z__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + 1 + a_dim1; + z_lmp_div(&z__1, &a[i__ + 1 + a_dim1], &a[a_dim1 + 1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } else { + *info = 1; + } + } else { + n1 = min(*m, *n) / 2; + n2 = *n - n1; + zgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + zlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1); + ztrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b1, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &z__1, &a[n1 + 1 + a_dim1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, &c_b1, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + zgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = min(*m, *n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; + } + i__1 = n1 + 1; + i__2 = min(*m, *n); + zlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetri.cpp b/lib/linalg/zgetri.cpp new file mode 100644 index 0000000000..a61e931cb4 --- /dev/null +++ b/lib/linalg/zgetri.cpp @@ -0,0 +1,132 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zgetri_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, + integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, jj, jp, nn, iws, nbmin; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern int ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, + ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + nb = ilaenv_(&c__1, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1, *n)) { + *info = -3; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8); + if (*info > 0) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + i__1 = ldwork * nb; + iws = max(i__1, 1); + if (*lwork < iws) { + nb = *lwork / ldwork; + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = *n; + } + if (nb < nbmin || nb >= *n) { + for (j = *n; j >= 1; --j) { + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + j * a_dim1; + work[i__2].r = a[i__3].r, work[i__2].i = a[i__3].i; + i__2 = i__ + j * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; + } + if (j < *n) { + i__1 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", n, &i__1, &z__1, &a[(j + 1) * a_dim1 + 1], lda, &work[j + 1], + &c__1, &c_b2, &a[j * a_dim1 + 1], &c__1, (ftnlen)12); + } + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = nb, i__3 = *n - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + i__4 = i__ + (jj - j) * ldwork; + i__5 = i__ + jj * a_dim1; + work[i__4].r = a[i__5].r, work[i__4].i = a[i__5].i; + i__4 = i__ + jj * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; + } + } + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &z__1, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b2, + &a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12); + } + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b2, &work[j], &ldwork, + &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + } + } + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + zswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } + } + work[1].r = (doublereal)iws, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegs2.cpp b/lib/linalg/zhegs2.cpp new file mode 100644 index 0000000000..685f548c61 --- /dev/null +++ b/lib/linalg/zhegs2.cpp @@ -0,0 +1,197 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zhegs2_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + doublecomplex z__1; + integer k; + doublecomplex ct; + doublereal akk, bkk; + extern int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), + ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGS2", &i__1, (ftnlen)6); + return 0; + } + if (*itype == 1) { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + ztrsv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, + &b[k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)19, (ftnlen)8); + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + ztrsv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[k + 1 + (k + 1) * b_dim1], + ldb, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } + } + } + } else { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + ztrmv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * b_dim1 + 1], &c__1, + &a[a_offset], lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k - 1; + ztrmv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)19, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &a[a_offset], + lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegst.cpp b/lib/linalg/zhegst.cpp new file mode 100644 index 0000000000..8c9d9434cb --- /dev/null +++ b/lib/linalg/zhegst.cpp @@ -0,0 +1,195 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {.5, 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b18 = 1.; +int zhegst_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublecomplex z__1; + integer k, kb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + zhegs2_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen), + zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGST", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zhegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + } else { + if (*itype == 1) { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"C", &i__3, &kb, &z__1, &a[k + (k + kb) * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + kb + k * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"N", &i__3, &kb, &z__1, &a[k + kb + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, &b[b_offset], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"N", &i__3, &kb, &c_b1, &a[k * a_dim1 + 1], lda, + &b[k * b_dim1 + 1], ldb, &c_b18, &a[a_offset], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"C", &i__3, &kb, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, + &c_b18, &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegv.cpp b/lib/linalg/zhegv.cpp new file mode 100644 index 0000000000..9d85be5132 --- /dev/null +++ b/lib/linalg/zhegv.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zhegv_(integer *itype, char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + integer nb, neig; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zheev_(char *, char *, integer *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, ftnlen, ftnlen); + char trans[1]; + logical upper, wantz; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zhegst_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int zpotrf_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + --rwork; + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!(wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (!(upper || lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1, *n)) { + *info = -6; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = max(i__1, i__2); + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < max(i__1, i__2) && !lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + zpotrf_(uplo, n, &b[b_offset], ldb, info, (ftnlen)1); + if (*info != 0) { + *info = *n + *info; + return 0; + } + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + zheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1], info, (ftnlen)1, + (ftnlen)1); + if (wantz) { + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + ztrsm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } else if (*itype == 3) { + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + ztrmm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } + } + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhemm.cpp b/lib/linalg/zhemm.cpp new file mode 100644 index 0000000000..3237e16c2c --- /dev/null +++ b/lib/linalg/zhemm.cpp @@ -0,0 +1,271 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zhemm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *c__, + integer *ldc, ftnlen side_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldb < max(1, *m)) { + info = 9; + } else if (*ldc < max(1, *m)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"ZHEMM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0 || + alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + j * c_dim1; + i__5 = k + j * c_dim1; + i__6 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, + z__2.i = temp1.r * a[i__6].i + temp1.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + i__4 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__4].r * z__3.r - b[i__4].i * z__3.i, + z__2.i = b[i__4].r * z__3.i + b[i__4].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__3.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + i__ * a_dim1; + d__1 = a[i__5].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + j * c_dim1; + i__4 = k + j * c_dim1; + i__5 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + i__3 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__3].r * z__3.r - b[i__3].i * z__3.i, + z__2.i = b[i__3].r * z__3.i + b[i__3].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__2 = i__ + j * c_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } else { + i__2 = i__ + j * c_dim1; + i__3 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3].i, + z__3.i = beta->r * c__[i__3].i + beta->i * c__[i__3].r; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1 * alpha->r, z__1.i = d__1 * alpha->i; + temp1.r = z__1.r, temp1.i = z__1.i; + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, + z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__2.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + j * b_dim1; + z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, + z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zher.cpp b/lib/linalg/zher.cpp new file mode 100644 index 0000000000..6514a72f65 --- /dev/null +++ b/lib/linalg/zher.cpp @@ -0,0 +1,187 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zher_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZHER ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zherk.cpp b/lib/linalg/zherk.cpp new file mode 100644 index 0000000000..efae201bfa --- /dev/null +++ b/lib/linalg/zherk.cpp @@ -0,0 +1,325 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zherk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, + integer *lda, doublereal *beta, doublecomplex *c__, integer *ldc, ftnlen uplo_len, + ftnlen trans_len) +{ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, l, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + doublereal rtemp; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldc < max(1, *n)) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZHERK ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + return 0; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetf2.cpp b/lib/linalg/zhetf2.cpp new file mode 100644 index 0000000000..c960a63bc1 --- /dev/null +++ b/lib/linalg/zhetf2.cpp @@ -0,0 +1,439 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int zhetf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer i__, j, k; + doublecomplex t; + doublereal r1, d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer kk, kp; + doublecomplex wk; + doublereal tt; + doublecomplex wkm1, wkp1; + integer imax, jmax; + extern int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k - 1 + (k - 1) * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d11 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + d__ = tt / d__; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_lmp_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__3].r * z__4.r - a[i__3].i * z__4.i, + z__3.i = a[i__3].r * z__4.i + a[i__3].i * z__4.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkm1); + z__5.r = a[i__4].r * z__6.r - a[i__4].i * z__6.i, + z__5.i = a[i__4].r * z__6.i + a[i__4].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L50: + if (k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + d__ = tt / d__; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_lmp_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__5].r * z__4.r - a[i__5].i * z__4.i, + z__3.i = a[i__5].r * z__4.i + a[i__5].i * z__4.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkp1); + z__5.r = a[i__6].r * z__6.r - a[i__6].i * z__6.i, + z__5.i = a[i__6].r * z__6.i + a[i__6].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L50; + } +L90: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetrf.cpp b/lib/linalg/zhetrf.cpp new file mode 100644 index 0000000000..cb60ff4b7b --- /dev/null +++ b/lib/linalg/zhetrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zhetrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zhetf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + zlahef_(char *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlahef_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zhetf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlahef_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zhetf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetri.cpp b/lib/linalg/zhetri.cpp new file mode 100644 index 0000000000..020b4ce52b --- /dev/null +++ b/lib/linalg/zhetri.cpp @@ -0,0 +1,319 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zhetri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2; + double z_lmp_abs(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer j, k; + doublereal t, ak; + integer kp; + doublereal akp1; + doublecomplex temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer kstep; + extern int zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k + 1) * a_dim1]); + i__1 = k + k * a_dim1; + ak = a[i__1].r / t; + i__1 = k + 1 + (k + 1) * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k + 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k + k * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + (k + 1) * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L50:; + } else { + k = *n; + L60: + if (k < 1) { + goto L80; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k - 1) * a_dim1]); + i__1 = k - 1 + (k - 1) * a_dim1; + ak = a[i__1].r / t; + i__1 = k + k * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k - 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k - 1 + (k - 1) * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + k * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = k + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L60; + L80:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlahef.cpp b/lib/linalg/zlahef.cpp new file mode 100644 index 0000000000..9a18a455ea --- /dev/null +++ b/lib/linalg/zlahef.cpp @@ -0,0 +1,520 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *), + z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublereal t, r1; + doublecomplex d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern int zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern int zlacgv_(integer *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + kstep = 1; + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k + kw * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + i__1 = k + kw * w_dim1; + i__2 = k + kw * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - 1; + zcopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + i__1 = k - imax; + zlacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + (kw - 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk - 1 - kp; + zlacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + if (k > 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + } + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + kw * w_dim1], &z__2); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + kw * w_dim1; + z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__4.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k - 2; + zlacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + i__1 = k + k * w_dim1; + i__2 = k + k * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax - k; + zlacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (imax < *n) { + i__1 = *n - imax; + zcopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, + &w[imax + 1 + (k + 1) * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + (k + 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kp - kk - 1; + zlacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + k * w_dim1], &z__2); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + k * w_dim1; + z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__4.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = *n - k - 1; + zlacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlaswp.cpp b/lib/linalg/zlaswp.cpp new file mode 100644 index 0000000000..40e941ffa5 --- /dev/null +++ b/lib/linalg/zlaswp.cpp @@ -0,0 +1,79 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zlaswp_(integer *n, doublecomplex *a, integer *lda, integer *k1, integer *k2, integer *ipiv, + integer *incx) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + doublecomplex temp; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = *k1 + (*k1 - *k2) * *incx; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; + } + } + ix += *incx; + } + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; + } + } + ix += *incx; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlasyf.cpp b/lib/linalg/zlasyf.cpp new file mode 100644 index 0000000000..2823d173de --- /dev/null +++ b/lib/linalg/zlasyf.cpp @@ -0,0 +1,431 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublecomplex t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk, colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + zcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + kw * w_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + zcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (kw - 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + kw * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + i__3 = j + kw * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + k * w_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + zcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (k + 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k + k * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + i__3 = j + k * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauu2.cpp b/lib/linalg/zlauu2.cpp new file mode 100644 index 0000000000..2e92542d49 --- /dev/null +++ b/lib/linalg/zlauu2.cpp @@ -0,0 +1,100 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + integer i__; + doublereal aii; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUU2", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &c_b1, &a[(i__ + 1) * a_dim1 + 1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda, &z__1, &a[i__ * a_dim1 + 1], &c__1, + (ftnlen)12); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], + &c__1); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b1, &a[i__ + 1 + a_dim1], lda, + &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, &a[i__ + a_dim1], lda, (ftnlen)19); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauum.cpp b/lib/linalg/zlauum.cpp new file mode 100644 index 0000000000..e61268ec3d --- /dev/null +++ b/lib/linalg/zlauum.cpp @@ -0,0 +1,103 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b21 = 1.; +int zlauum_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, ib, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + zlauu2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUUM", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zlauu2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &ib, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Upper", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &ib, &i__4, &c_b1, + &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda, + &c_b1, &a[i__ * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Upper", (char *)"No transpose", &ib, &i__3, &c_b21, + &a[i__ + (i__ + ib) * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)12); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &ib, &i__3, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda, (ftnlen)4, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Lower", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &ib, &i__3, &i__4, &c_b1, + &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b1, + &a[i__ + a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Lower", (char *)"Conjugate transpose", &ib, &i__3, &c_b21, + &a[i__ + ib + i__ * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)19); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf.cpp b/lib/linalg/zpotrf.cpp new file mode 100644 index 0000000000..5679af9ca9 --- /dev/null +++ b/lib/linalg/zpotrf.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b14 = -1.; +static doublereal c_b15 = 1.; +int zpotrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + integer j, jb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zpotrf2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zpotrf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Upper", (char *)"Conjugate transpose", &jb, &i__3, &c_b14, &a[j * a_dim1 + 1], lda, + &c_b15, &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)19); + zpotrf2_((char *)"Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &jb, &i__3, &i__4, &z__1, + &a[j * a_dim1 + 1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b1, + &a[j + (j + jb) * a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Lower", (char *)"No transpose", &jb, &i__3, &c_b14, &a[j + a_dim1], lda, &c_b15, + &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)12); + zpotrf2_((char *)"Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &jb, &i__4, &z__1, + &a[j + jb + a_dim1], lda, &a[j + a_dim1], lda, &c_b1, + &a[j + jb + j * a_dim1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &jb, &c_b1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } + } + goto L40; +L30: + *info = *info + j - 1; +L40: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf2.cpp b/lib/linalg/zpotrf2.cpp new file mode 100644 index 0000000000..262ea15497 --- /dev/null +++ b/lib/linalg/zpotrf2.cpp @@ -0,0 +1,89 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublereal c_b11 = -1.; +static doublereal c_b12 = 1.; +int zpotrf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + doublereal d__1; + double sqrt(doublereal); + integer n1, n2; + doublereal ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + i__1 = a_dim1 + 1; + ajj = a[i__1].r; + if (ajj <= 0. || disnan_(&ajj)) { + *info = 1; + return 0; + } + i__1 = a_dim1 + 1; + d__1 = sqrt(ajj); + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + n1 = *n / 2; + n2 = *n - n1; + zpotrf2_(uplo, &n1, &a[a_dim1 + 1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + if (upper) { + ztrsm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &n1, &n2, &c_b1, &a[a_dim1 + 1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"C", &n2, &n1, &c_b11, &a[(n1 + 1) * a_dim1 + 1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } else { + ztrsm_((char *)"R", (char *)"L", (char *)"C", (char *)"N", &n2, &n1, &c_b1, &a[a_dim1 + 1], lda, &a[n1 + 1 + a_dim1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"N", &n2, &n1, &c_b11, &a[n1 + 1 + a_dim1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotri.cpp b/lib/linalg/zpotri.cpp new file mode 100644 index 0000000000..a13f6fde5c --- /dev/null +++ b/lib/linalg/zpotri.cpp @@ -0,0 +1,40 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zpotri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + zlauum_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_(uplo, (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)8); + if (*info > 0) { + return 0; + } + zlauum_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsymv.cpp b/lib/linalg/zsymv.cpp new file mode 100644 index 0000000000..73e956493b --- /dev/null +++ b/lib/linalg/zsymv.cpp @@ -0,0 +1,263 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsymv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3, z__4; + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1, *n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZSYMV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } else { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsyr.cpp b/lib/linalg/zsyr.cpp new file mode 100644 index 0000000000..5e79f28d94 --- /dev/null +++ b/lib/linalg/zsyr.cpp @@ -0,0 +1,141 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsyr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytf2.cpp b/lib/linalg/zsytf2.cpp new file mode 100644 index 0000000000..bce7b51f1d --- /dev/null +++ b/lib/linalg/zsytf2.cpp @@ -0,0 +1,356 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zsytf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, j, k; + doublecomplex t, r1, d11, d12, d21, d22; + integer kk, kp; + doublecomplex wk, wkm1, wkp1; + integer imax, jmax; + extern int zsyr_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d12.r = a[i__1].r, d12.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &d12); + d22.r = z__1.r, d22.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d12); + d11.r = z__1.r, d11.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d12); + d12.r = z__1.r, d12.i = z__1.i; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11.r * a[i__1].r - d11.i * a[i__1].i, + z__3.i = d11.r * a[i__1].i + d11.i * a[i__1].r; + i__2 = j + k * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22.r * a[i__1].r - d22.i * a[i__1].i, + z__3.i = d22.r * a[i__1].i + d22.i * a[i__1].r; + i__2 = j + (k - 1) * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + z__3.r = a[i__3].r * wk.r - a[i__3].i * wk.i, + z__3.i = a[i__3].r * wk.i + a[i__3].i * wk.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + z__4.r = a[i__4].r * wkm1.r - a[i__4].i * wkm1.i, + z__4.i = a[i__4].r * wkm1.i + a[i__4].i * wkm1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + zswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d21.r = a[i__1].r, d21.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11.r * a[i__2].r - d11.i * a[i__2].i, + z__3.i = d11.r * a[i__2].i + d11.i * a[i__2].r; + i__3 = j + (k + 1) * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22.r * a[i__2].r - d22.i * a[i__2].i, + z__3.i = d22.r * a[i__2].i + d22.i * a[i__2].r; + i__3 = j + k * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + z__3.r = a[i__5].r * wk.r - a[i__5].i * wk.i, + z__3.i = a[i__5].r * wk.i + a[i__5].i * wk.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + z__4.r = a[i__6].r * wkp1.r - a[i__6].i * wkp1.i, + z__4.i = a[i__6].r * wkp1.i + a[i__6].i * wkp1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytrf.cpp b/lib/linalg/zsytrf.cpp new file mode 100644 index 0000000000..178193fbaa --- /dev/null +++ b/lib/linalg/zsytrf.cpp @@ -0,0 +1,124 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zsytrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zsytf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork; + extern int zlasyf_(char *, integer *, integer *, integer *, doublecomplex *, integer *, + integer *, doublecomplex *, integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytri.cpp b/lib/linalg/zsytri.cpp new file mode 100644 index 0000000000..3f7d4dea0f --- /dev/null +++ b/lib/linalg/zsytri.cpp @@ -0,0 +1,292 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zsytri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + doublecomplex d__; + integer k; + doublecomplex t, ak; + integer kp; + doublecomplex akp1, temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern VOID zdotu_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zsymv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L40; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k + 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k + 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + 1 + (k + 1) * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - kp - 1; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L40:; + } else { + k = *n; + L50: + if (k < 1) { + goto L60; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k - 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k - 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k - 1 + (k - 1) * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - k - 1; + zswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L50; + L60:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsm.cpp b/lib/linalg/ztrsm.cpp new file mode 100644 index 0000000000..160b65974a --- /dev/null +++ b/lib/linalg/ztrsm.cpp @@ -0,0 +1,443 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +int ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lside && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1, nrowa)) { + info = 9; + } else if (*ldb < max(1, *m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"ZTRSM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + return 0; + } + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + if (nounit) { + i__2 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * a[i__6].i, + z__2.i = b[i__5].r * a[i__6].i + b[i__5].i * a[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + if (nounit) { + i__3 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * a[i__7].i, + z__2.i = b[i__6].r * a[i__7].i + b[i__6].i * a[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5].i, + z__2.i = a[i__4].r * b[i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * b[i__4].i, + z__2.i = a[i__3].r * b[i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3].i, + z__2.i = z__3.r * b[i__3].i + z__3.i * b[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, + z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * b[i__6].i, + z__2.i = a[i__5].r * b[i__6].i + a[i__5].i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } + } else { + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5].i, + z__2.i = temp.r * b[i__5].i + temp.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6].i, + z__2.i = temp.r * b[i__6].i + temp.i * b[i__6].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsv.cpp b/lib/linalg/ztrsv.cpp new file mode 100644 index 0000000000..324416d9e3 --- /dev/null +++ b/lib/linalg/ztrsv.cpp @@ -0,0 +1,330 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, *n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"ZTRSV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx += *incx; + } + } + } + } else { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrti2.cpp b/lib/linalg/ztrti2.cpp new file mode 100644 index 0000000000..00cb4154b3 --- /dev/null +++ b/lib/linalg/ztrti2.cpp @@ -0,0 +1,88 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int ztrti2_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j; + doublecomplex ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + logical upper; + extern int ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTI2", &i__1, (ftnlen)6); + return 0; + } + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + j * a_dim1; + z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + i__2 = j - 1; + ztrmv_((char *)"Upper", (char *)"No transpose", diag, &i__2, &a[a_offset], lda, &a[j * a_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); + } + } else { + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + j * a_dim1; + z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + i__1 = *n - j; + ztrmv_((char *)"Lower", (char *)"No transpose", diag, &i__1, &a[j + 1 + (j + 1) * a_dim1], lda, + &a[j + 1 + j * a_dim1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrtri.cpp b/lib/linalg/ztrtri.cpp new file mode 100644 index 0000000000..771d54adb7 --- /dev/null +++ b/lib/linalg/ztrtri.cpp @@ -0,0 +1,112 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int ztrtri_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + doublecomplex z__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer j, jb, nb, nn; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + ztrti2_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0. && a[i__2].i == 0.) { + return 0; + } + } + *info = 0; + } + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_lmp_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + if (nb <= 1 || nb >= *n) { + ztrti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4, i__5); + i__4 = j - 1; + ztrmm_((char *)"Left", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &c_b1, &a[a_offset], lda, + &a[j * a_dim1 + 1], lda, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)12, (ftnlen)1); + ztrti2_((char *)"Upper", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1, i__4); + if (j + jb <= *n) { + i__1 = *n - j - jb + 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, &a[j + jb + j * a_dim1], lda, + (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)12, (ftnlen)1); + } + ztrti2_((char *)"Lower", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/src/.gitignore b/src/.gitignore index b0c27849b2..f0554e3bfe 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -988,6 +988,8 @@ /fix_qeq_reaxff.h /fix_qmmm.cpp /fix_qmmm.h +/fix_qtpie_reaxff.cpp +/fix_qtpie_reaxff.h /fix_reaxff.cpp /fix_reaxff.h /fix_reaxff_bonds.cpp diff --git a/src/Depend.sh b/src/Depend.sh index 85542b21c0..9ddb29450d 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -68,6 +68,10 @@ if (test $1 = "COLLOID") then depend OPENMP fi +if (test $1 = "COLVARS") then + depend KOKKOS +fi + if (test $1 = "DIELECTRIC") then depend OPENMP fi diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ee2e8e61fe..64ba0c6b03 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -131,6 +131,10 @@ action fft3d_kokkos.h fft3d.h action fftdata_kokkos.h fft3d.h action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h +action fix_cmap_kokkos.cpp fix_cmap.cpp +action fix_cmap_kokkos.h fix_cmap.h +action fix_colvars_kokkos.cpp fix_colvars.cpp +action fix_colvars_kokkos.h fix_colvars.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp @@ -163,6 +167,8 @@ action fix_npt_kokkos.cpp action fix_npt_kokkos.h action fix_nve_kokkos.cpp action fix_nve_kokkos.h +action fix_nve_limit_kokkos.cpp +action fix_nve_limit_kokkos.h action fix_nve_sphere_kokkos.cpp action fix_nve_sphere_kokkos.h action fix_nvt_kokkos.cpp @@ -179,6 +185,8 @@ action compute_reaxff_atom_kokkos.cpp compute_reaxff_atom.cpp action compute_reaxff_atom_kokkos.h compute_reaxff_atom.h action fix_reaxff_species_kokkos.cpp fix_reaxff_species.cpp action fix_reaxff_species_kokkos.h fix_reaxff_species.h +action fix_recenter_kokkos.cpp +action fix_recenter_kokkos.h action fix_rx_kokkos.cpp fix_rx.cpp action fix_rx_kokkos.h fix_rx.h action fix_setforce_kokkos.cpp @@ -205,8 +213,12 @@ action fix_wall_lj93_kokkos.cpp action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h +action fix_wall_region_kokkos.cpp +action fix_wall_region_kokkos.h action grid3d_kokkos.cpp fft3d.h action grid3d_kokkos.h fft3d.h +action group_kokkos.cpp +action group_kokkos.h action improper_class2_kokkos.cpp improper_class2.cpp action improper_class2_kokkos.h improper_class2.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp @@ -409,6 +421,8 @@ action rand_pool_wrap_kokkos.cpp action rand_pool_wrap_kokkos.h action region_block_kokkos.cpp action region_block_kokkos.h +action region_sphere_kokkos.cpp +action region_sphere_kokkos.h action remap_kokkos.cpp remap.cpp action remap_kokkos.h remap.h action sna_kokkos_impl.h sna.cpp diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index e2ae9ffb19..7529921058 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -208,6 +208,8 @@ void AtomKokkos::sort() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->sort_device) { flag = 0; + if (comm->me == 0) + error->warning(FLERR,"Fix {} not compatible with Kokkos sorting on device", fix_iextra->style); break; } } diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7030f706b8..d4dd68ce18 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -24,17 +24,6 @@ namespace LAMMPS_NS { -union d_ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - d_ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - d_ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - d_ubuf(int arg) : i(arg) {} -}; - class AtomVecKokkos : virtual public AtomVec { public: AtomVecKokkos(class LAMMPS *); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 8f821c3036..4cebf34eb2 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -739,6 +739,8 @@ void CommKokkos::exchange() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->exchange_comm_device) { flag = 0; + if (comm->me == 0) + error->warning(FLERR,"Fix {} not compatible with sending data in Kokkos communication", fix_iextra->style); break; } } diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp new file mode 100644 index 0000000000..dd92afe9cc --- /dev/null +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -0,0 +1,962 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_cmap_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "input.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "modify.h" +#include "update.h" +#include "variable.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom +static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 +static constexpr double CMAPXMIN2 = -180.0; +static constexpr double CMAPDX = 15.0; // 360/CMAPDIM + + +/* ---------------------------------------------------------------------- */ + +template +FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : + FixCMAP(lmp, narg, arg) +{ + kokkosable = 1; + exchange_comm_device = sort_device = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + // allocate memory for CMAP data + + memoryKK->create_kokkos(k_g_axis,g_axis,CMAPDIM,"cmap:g_axis"); + memoryKK->create_kokkos(k_cmapgrid,cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:grid"); + memoryKK->create_kokkos(k_d1cmapgrid,d1cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d1grid"); + memoryKK->create_kokkos(k_d2cmapgrid,d2cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d2grid"); + memoryKK->create_kokkos(k_d12cmapgrid,d12cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d12grid"); + + d_g_axis = k_g_axis.template view(); + d_cmapgrid = k_cmapgrid.template view(); + d_d1cmapgrid = k_d1cmapgrid.template view(); + d_d2cmapgrid = k_d2cmapgrid.template view(); + d_d12cmapgrid = k_d12cmapgrid.template view(); + + // read and setup CMAP data + + read_grid_map(arg[3]); + + int i = 0; + double angle = -180.0; + + while (angle < 180.0) { + g_axis[i] = angle; + angle += CMAPDX; + i++; + } + + FixCMAPKokkos::grow_arrays(atom->nmax); + + for( int i=0 ; i(); + k_cmapgrid.template sync(); + k_d1cmapgrid.template sync(); + k_d2cmapgrid.template sync(); + k_d12cmapgrid.template sync(); + + d_count = typename AT::t_int_scalar("fix_cmap:count"); + h_count = Kokkos::create_mirror_view(d_count); + +} + +/* ---------------------------------------------------------------------- */ + +template +FixCMAPKokkos::~FixCMAPKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_g_axis,g_axis); + memoryKK->destroy_kokkos(k_cmapgrid,cmapgrid); + memoryKK->destroy_kokkos(k_d1cmapgrid,d1cmapgrid); + memoryKK->destroy_kokkos(k_d2cmapgrid,d2cmapgrid); + memoryKK->destroy_kokkos(k_d12cmapgrid,d12cmapgrid); + + memoryKK->destroy_kokkos(k_num_crossterm,num_crossterm); + memoryKK->destroy_kokkos(k_crossterm_type,crossterm_type); + memoryKK->destroy_kokkos(k_crossterm_atom1,crossterm_atom1); + memoryKK->destroy_kokkos(k_crossterm_atom2,crossterm_atom2); + memoryKK->destroy_kokkos(k_crossterm_atom3,crossterm_atom3); + memoryKK->destroy_kokkos(k_crossterm_atom4,crossterm_atom4); + memoryKK->destroy_kokkos(k_crossterm_atom5,crossterm_atom5); + + memoryKK->destroy_kokkos(d_crosstermlist); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::init() +{ + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot yet use respa with Kokkos"); + + // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code + + maxcrossterm = ncmap; + memoryKK->create_kokkos(d_crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); +} + +/* ---------------------------------------------------------------------- + store local neighbor list as if newton_bond = OFF, even if actually ON +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::pre_neighbor() +{ + atomKK->sync(execution_space,X_MASK); + d_x = atomKK->k_x.view(); + int nlocal = atomKK->nlocal; + + map_style = atom->map_style; + if (map_style == Atom::MAP_ARRAY) { + k_map_array = atomKK->k_map_array; + k_map_array.template sync(); + } else if (map_style == Atom::MAP_HASH) { + k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); + } + + atomKK->k_sametag.sync(); + d_sametag = atomKK->k_sametag.view(); + + copymode = 1; + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nlocal),*this,ncrosstermlist); + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void FixCMAPKokkos::operator()(TagFixCmapPreNeighbor, const int i, int &l_ncrosstermlist, const bool is_final ) const +{ + for( int m = 0; m < d_num_crossterm(i); m++) { + + int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); + int atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); + int atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); + int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); + int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); + + if( atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) + Kokkos::abort("CMAP atoms missing on proc"); + + atom1 = closest_image(i,atom1); + atom2 = closest_image(i,atom2); + atom3 = closest_image(i,atom3); + atom4 = closest_image(i,atom4); + atom5 = closest_image(i,atom5); + + if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { + if (l_ncrosstermlist > maxcrossterm) Kokkos::abort("l_ncrosstermlist > maxcrossterm"); + if(is_final) { + d_crosstermlist(l_ncrosstermlist,0) = atom1; + d_crosstermlist(l_ncrosstermlist,1) = atom2; + d_crosstermlist(l_ncrosstermlist,2) = atom3; + d_crosstermlist(l_ncrosstermlist,3) = atom4; + d_crosstermlist(l_ncrosstermlist,4) = atom5; + d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); + } + l_ncrosstermlist++; + } + } +} + +/* ---------------------------------------------------------------------- + compute CMAP terms as if newton_bond = OFF, even if actually ON +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::post_force(int vflag) +{ + d_x = atomKK->k_x.template view(); + d_f = atomKK->k_f.template view(); + atomKK->sync(execution_space,X_MASK|F_MASK); + + int eflag = eflag_caller; + ev_init(eflag,vflag); + + copymode = 1; + nlocal = atomKK->nlocal; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,ncrosstermlist),*this,ecmap); + copymode = 0; + atomKK->modified(execution_space,F_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, double &ecmapKK) const +{ + // Definition of cross-term dihedrals + + // phi dihedral + // |--------------------| + // a1-----a2-----a3-----a4-----a5 cross-term atoms + // C N CA C N cross-term atom types + // |--------------------| + // psi dihedral + + int i1 = d_crosstermlist(n,0); + int i2 = d_crosstermlist(n,1); + int i3 = d_crosstermlist(n,2); + int i4 = d_crosstermlist(n,3); + int i5 = d_crosstermlist(n,4); + int type = d_crosstermlist(n,5); + if (type == 0) return; + + // calculate bond vectors for both dihedrals + + // phi + // vb21 = r2 - r1 + + double vb21x = d_x(i2,0) - d_x(i1,0); + double vb21y = d_x(i2,1) - d_x(i1,1); + double vb21z = d_x(i2,2) - d_x(i1,2); + double vb12x = -1.0*vb21x; + double vb12y = -1.0*vb21y; + double vb12z = -1.0*vb21z; + double vb32x = d_x(i3,0) - d_x(i2,0); + double vb32y = d_x(i3,1) - d_x(i2,1); + double vb32z = d_x(i3,2) - d_x(i2,2); + double vb23x = -1.0*vb32x; + double vb23y = -1.0*vb32y; + double vb23z = -1.0*vb32z; + + double vb34x = d_x(i3,0) - d_x(i4,0); + double vb34y = d_x(i3,1) - d_x(i4,1); + double vb34z = d_x(i3,2) - d_x(i4,2); + + // psi + // bond vectors same as for phi: vb32 + + double vb43x = -1.0*vb34x; + double vb43y = -1.0*vb34y; + double vb43z = -1.0*vb34z; + + double vb45x = d_x(i4,0) - d_x(i5,0); + double vb45y = d_x(i4,1) - d_x(i5,1); + double vb45z = d_x(i4,2) - d_x(i5,2); + + // calculate normal vectors for planes that define the dihedral angles + + double a1x = vb12y*vb23z - vb12z*vb23y; + double a1y = vb12z*vb23x - vb12x*vb23z; + double a1z = vb12x*vb23y - vb12y*vb23x; + + double b1x = vb43y*vb23z - vb43z*vb23y; + double b1y = vb43z*vb23x - vb43x*vb23z; + double b1z = vb43x*vb23y - vb43y*vb23x; + + double a2x = vb23y*vb34z - vb23z*vb34y; + double a2y = vb23z*vb34x - vb23x*vb34z; + double a2z = vb23x*vb34y - vb23y*vb34x; + + double b2x = vb45y*vb43z - vb45z*vb43y; + double b2y = vb45z*vb43x - vb45x*vb43z; + double b2z = vb45x*vb43y - vb45y*vb43x; + + // calculate terms used later in calculations + + double r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); + double a1sq = a1x*a1x + a1y*a1y + a1z*a1z; + double b1sq = b1x*b1x + b1y*b1y + b1z*b1z; + + double r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); + double a2sq = a2x*a2x + a2y*a2y + a2z*a2z; + double b2sq = b2x*b2x + b2y*b2y + b2z*b2z; + if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; + + // vectors needed to calculate the cross-term dihedral angles + + double dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; + double dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; + double dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; + double dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; + + // cross-term dihedral angles + // calculate the backbone dihedral angles as VMD and GROMACS + + double phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); + double psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + + if (phi == 180.0) phi= -180.0; + if (psi == 180.0) psi= -180.0; + + double phi1 = phi; + if (phi1 < 0.0) phi1 += 360.0; + double psi1 = psi; + if (psi1 < 0.0) psi1 += 360.0; + + // find the neighbor grid point index + + int li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + int li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + int li3 = int((phi-CMAPXMIN2)/CMAPDX); + int li4 = int((psi-CMAPXMIN2)/CMAPDX); + int mli3 = li3 % CMAPDIM; + int mli4 = li4 % CMAPDIM; + int mli31 = (li3+1) % CMAPDIM; + int mli41 = (li4+1) %CMAPDIM; + int mli1 = li1 % CMAPDIM; + int mli2 = li2 % CMAPDIM; + int mli11 = (li1+1) % CMAPDIM; + int mli21 = (li2+1) %CMAPDIM; + int t1 = type-1; + if (t1 < 0 || t1 > 5) Kokkos::abort("Invalid CMAP crossterm_type"); + + // determine the values and derivatives for the grid square points + + double gs[4],d1gs[4],d2gs[4],d12gs[4]; + + gs[0] = d_cmapgrid(t1,mli3,mli4); + gs[1] = d_cmapgrid(t1,mli31,mli4); + gs[2] = d_cmapgrid(t1,mli31,mli41); + gs[3] = d_cmapgrid(t1,mli3,mli41); + d1gs[0] = d_d1cmapgrid(t1,mli1,mli2); + d1gs[1] = d_d1cmapgrid(t1,mli11,mli2); + d1gs[2] = d_d1cmapgrid(t1,mli11,mli21); + d1gs[3] = d_d1cmapgrid(t1,mli1,mli21); + d2gs[0] = d_d2cmapgrid(t1,mli1,mli2); + d2gs[1] = d_d2cmapgrid(t1,mli11,mli2); + d2gs[2] = d_d2cmapgrid(t1,mli11,mli21); + d2gs[3] = d_d2cmapgrid(t1,mli1,mli21); + d12gs[0] = d_d12cmapgrid(t1,mli1,mli2); + d12gs[1] = d_d12cmapgrid(t1,mli11,mli2); + d12gs[2] = d_d12cmapgrid(t1,mli11,mli21); + d12gs[3] = d_d12cmapgrid(t1,mli1,mli21); + + // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) + + double E, dEdPhi, dEdPsi; + bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); + + // sum up cmap energy contributions + // needed for compute_scalar() + + double engfraction = 0.2 * E; + if (i1 < nlocal) ecmapKK += engfraction; + if (i2 < nlocal) ecmapKK += engfraction; + if (i3 < nlocal) ecmapKK += engfraction; + if (i4 < nlocal) ecmapKK += engfraction; + if (i5 < nlocal) ecmapKK += engfraction; + + // calculate the derivatives dphi/dr_i + + double dphidr1x = 1.0*r32/a1sq*a1x; + double dphidr1y = 1.0*r32/a1sq*a1y; + double dphidr1z = 1.0*r32/a1sq*a1z; + + double dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + dpr34r32/b1sq/r32*b1x; + double dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + dpr34r32/b1sq/r32*b1y; + double dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + dpr34r32/b1sq/r32*b1z; + + double dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; + double dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; + double dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; + + double dphidr4x = r32/b1sq*b1x; + double dphidr4y = r32/b1sq*b1y; + double dphidr4z = r32/b1sq*b1z; + + // calculate the derivatives dpsi/dr_i + + double dpsidr1x = 1.0*r43/a2sq*a2x; + double dpsidr1y = 1.0*r43/a2sq*a2y; + double dpsidr1z = 1.0*r43/a2sq*a2z; + + double dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; + double dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; + double dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; + + double dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; + double dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; + double dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; + + double dpsidr4x = r43/b2sq*b2x; + double dpsidr4y = r43/b2sq*b2y; + double dpsidr4z = r43/b2sq*b2z; + + // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) + // apply force to each of the 5 atoms + + if (i1 < nlocal) { + Kokkos::atomic_add(&d_f(i1,0), dEdPhi*dphidr1x); + Kokkos::atomic_add(&d_f(i1,1), dEdPhi*dphidr1y); + Kokkos::atomic_add(&d_f(i1,2), dEdPhi*dphidr1z); + } + if (i2 < nlocal) { + Kokkos::atomic_add(&d_f(i2,0), dEdPhi*dphidr2x + dEdPsi*dpsidr1x); + Kokkos::atomic_add(&d_f(i2,1), dEdPhi*dphidr2y + dEdPsi*dpsidr1y); + Kokkos::atomic_add(&d_f(i2,2), dEdPhi*dphidr2z + dEdPsi*dpsidr1z); + } + if (i3 < nlocal) { + Kokkos::atomic_add(&d_f(i3,0), -dEdPhi*dphidr3x - dEdPsi*dpsidr2x); + Kokkos::atomic_add(&d_f(i3,1), -dEdPhi*dphidr3y - dEdPsi*dpsidr2y); + Kokkos::atomic_add(&d_f(i3,2), -dEdPhi*dphidr3z - dEdPsi*dpsidr2z); + } + if (i4 < nlocal) { + Kokkos::atomic_add(&d_f(i4,0), -dEdPhi*dphidr4x - dEdPsi*dpsidr3x); + Kokkos::atomic_add(&d_f(i4,1), -dEdPhi*dphidr4y - dEdPsi*dpsidr3y); + Kokkos::atomic_add(&d_f(i4,2), -dEdPhi*dphidr4z - dEdPsi*dpsidr3z); + } + if (i5 < nlocal) { + Kokkos::atomic_add(&d_f(i5,0), -dEdPsi*dpsidr4x); + Kokkos::atomic_add(&d_f(i5,1), -dEdPsi*dpsidr4y); + Kokkos::atomic_add(&d_f(i5,2), -dEdPsi*dpsidr4z); + } +} + +/* ---------------------------------------------------------------------- + allocate atom-based array +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::grow_arrays(int nmax) +{ + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + // force reallocation on host + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); + + memoryKK->grow_kokkos(k_num_crossterm,num_crossterm,nmax,"cmap:num_crossterm"); + memoryKK->grow_kokkos(k_crossterm_type,crossterm_type,nmax,CMAPMAX,"cmap:crossterm_type"); + memoryKK->grow_kokkos(k_crossterm_atom1,crossterm_atom1,nmax,CMAPMAX,"cmap:crossterm_atom1"); + memoryKK->grow_kokkos(k_crossterm_atom2,crossterm_atom2,nmax,CMAPMAX,"cmap:crossterm_atom2"); + memoryKK->grow_kokkos(k_crossterm_atom3,crossterm_atom3,nmax,CMAPMAX,"cmap:crossterm_atom3"); + memoryKK->grow_kokkos(k_crossterm_atom4,crossterm_atom4,nmax,CMAPMAX,"cmap:crossterm_atom4"); + memoryKK->grow_kokkos(k_crossterm_atom5,crossterm_atom5,nmax,CMAPMAX,"cmap:crossterm_atom5"); + + d_num_crossterm = k_num_crossterm.template view(); + d_crossterm_type = k_crossterm_type.template view(); + d_crossterm_atom1 = k_crossterm_atom1.template view(); + d_crossterm_atom2 = k_crossterm_atom2.template view(); + d_crossterm_atom3 = k_crossterm_atom3.template view(); + d_crossterm_atom4 = k_crossterm_atom4.template view(); + d_crossterm_atom5 = k_crossterm_atom5.template view(); + + // must initialize num_crossterm to 0 for added atoms + // may never be set for some atoms when data file is read + + for (int i = nmax_previous; i < nmax; i++) k_num_crossterm.h_view(i) = 0; + nmax_previous = nmax; + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based array +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::copy_arrays(int i, int j, int delflag) +{ + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + FixCMAP::copy_arrays(i,j,delflag); + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} + +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_num_crossterm.sync_device(); + k_crossterm_type.sync_device(); + k_crossterm_atom1.sync_device(); + k_crossterm_atom2.sync_device(); + k_crossterm_atom3.sync_device(); + k_crossterm_atom4.sync_device(); + k_crossterm_atom5.sync_device(); + + Sorter.sort(LMPDeviceType(), k_num_crossterm.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_type.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom1.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom2.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom3.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom4.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom5.d_view); + + k_num_crossterm.modify_device(); + k_crossterm_type.modify_device(); + k_crossterm_atom1.modify_device(); + k_crossterm_atom2.modify_device(); + k_crossterm_atom3.modify_device(); + k_crossterm_atom4.modify_device(); + k_crossterm_atom5.modify_device(); +} + +/* ---------------------------------------------------------------------- + initialize one atom's array values, called when atom is created +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::set_arrays(int i) +{ + k_num_crossterm.sync_host(); + num_crossterm[i] = 0; + k_num_crossterm.modify_host(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixCMAPKokkos::pack_exchange(int i, double *buf) +{ + k_num_crossterm.sync_host(); + k_crossterm_type.sync_host(); + k_crossterm_atom1.sync_host(); + k_crossterm_atom2.sync_host(); + k_crossterm_atom3.sync_host(); + k_crossterm_atom4.sync_host(); + k_crossterm_atom5.sync_host(); + + int m = FixCMAP::pack_exchange(i,buf); + + k_num_crossterm.modify_host(); + k_crossterm_type.modify_host(); + k_crossterm_atom1.modify_host(); + k_crossterm_atom2.modify_host(); + k_crossterm_atom3.modify_host(); + k_crossterm_atom4.modify_host(); + k_crossterm_atom5.modify_host(); + + return m; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) +{ + k_num_crossterm.sync_host(); + k_crossterm_type.sync_host(); + k_crossterm_atom1.sync_host(); + k_crossterm_atom2.sync_host(); + k_crossterm_atom3.sync_host(); + k_crossterm_atom4.sync_host(); + k_crossterm_atom5.sync_host(); + + int m = FixCMAP::unpack_exchange(nlocal,buf); + + k_num_crossterm.modify_host(); + k_crossterm_type.modify_host(); + k_crossterm_atom1.modify_host(); + k_crossterm_atom2.modify_host(); + k_crossterm_atom3.modify_host(); + k_crossterm_atom4.modify_host(); + k_crossterm_atom5.modify_host(); + + return m; +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange +------------------------------------------------------------------------- */ + +template +int FixCMAPKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + k_buf.template sync(); + k_copylist.template sync(); + k_exchange_sendlist.template sync(); + + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + auto d_copylist = k_copylist.template view(); + auto d_exchange_sendlist = k_exchange_sendlist.template view(); + + Kokkos::deep_copy(d_count,0); + + auto l_num_crossterm = d_num_crossterm; + auto l_crossterm_type = d_crossterm_type; + auto l_crossterm_atom1 = d_crossterm_atom1; + auto l_crossterm_atom2 = d_crossterm_atom2; + auto l_crossterm_atom3 = d_crossterm_atom3; + auto l_crossterm_atom4 = d_crossterm_atom4; + auto l_crossterm_atom5 = d_crossterm_atom5; + auto l_count = d_count; + + copymode = 1; + + Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { + + const int i = d_exchange_sendlist(mysend); + + if (!final) offset += (1+l_num_crossterm(i)*6); + else { + + int m = nsend + offset; + d_buf(mysend) = d_ubuf(m).d; + d_buf(m++) = d_ubuf(l_num_crossterm(i)).d; + + for (int k = 0; k < l_num_crossterm(i); k++) { + d_buf(m++) = d_ubuf(l_crossterm_type(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom1(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom2(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom3(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom4(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom5(i,k)).d; + } + + if (mysend == nsend-1) l_count() = m; + offset = m - nsend; + + const int j = d_copylist(mysend); + if (j > -1) { + l_num_crossterm(i) = l_num_crossterm(j); + for (int k = 0; k < l_num_crossterm(i); k++) { + l_crossterm_type(i,k) = l_crossterm_type(j,k); + l_crossterm_atom1(i,k) = l_crossterm_atom1(j,k); + l_crossterm_atom2(i,k) = l_crossterm_atom2(j,k); + l_crossterm_atom3(i,k) = l_crossterm_atom3(j,k); + l_crossterm_atom4(i,k) = l_crossterm_atom4(j,k); + l_crossterm_atom5(i,k) = l_crossterm_atom5(j,k); + } + } + } + }); + + copymode = 0; + + k_buf.template modify(); + if (space == Host) k_buf.template sync(); + else k_buf.template sync(); + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); + + Kokkos::deep_copy(h_count,d_count); + return h_count(); +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nrecv1, int nextrarecv1, ExecutionSpace /*space*/) +{ + k_buf.template sync(); + k_indices.template sync(); + + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + auto d_indices = k_indices.template view(); + + auto l_num_crossterm = d_num_crossterm; + auto l_crossterm_type = d_crossterm_type; + auto l_crossterm_atom1 = d_crossterm_atom1; + auto l_crossterm_atom2 = d_crossterm_atom2; + auto l_crossterm_atom3 = d_crossterm_atom3; + auto l_crossterm_atom4 = d_crossterm_atom4; + auto l_crossterm_atom5 = d_crossterm_atom5; + + copymode = 1; + + Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { + int index = d_indices(i); + if (index > -1) { + int m = d_ubuf(d_buf(i)).i; + if (i >= nrecv1) m = nextrarecv1 + d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i; + l_num_crossterm(index) = static_cast (d_ubuf(d_buf(m++)).i); + for (int k = 0; k < l_num_crossterm(index); k++) { + l_crossterm_type(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom1(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom2(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom3(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom4(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom5(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + } + } + }); + + copymode = 0; + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixCMAPKokkos::dihedral_angle_atan2(double fx, double fy, double fz, + double ax, double ay, double az, + double bx, double by, double bz, + double absg) const +{ + // calculate the dihedral angle + + double angle = 0.0, arg1, arg2; + + arg1 = absg*(fx*bx+fy*by+fz*bz); + arg2 = ax*bx+ay*by+az*bz; + + if (arg1 == 0 && arg2 == 0) + Kokkos::abort("CMAP: atan2 function cannot take 2 zero arguments"); + else { + angle = Kokkos::atan2(arg1,arg2); + angle = angle*180.0/MY_PI; + } + + return angle; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int low2, double *gs, + double *d1gs, double *d2gs, double *d12gs, + double &E, double &dEdPhi, double &dEdPsi ) const +{ + + // FUSE bc_coeff() and bc_interpol() inline functions for + // KOKKOS version to avoid passing cij[][] array back and forth + + // calculate the bicubic interpolation coefficients c_ij + + const int wt[16][16] = + { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0}, + {2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0,-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1}, + {0, 0, 0, 0, 2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1}, + {-3, 3, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0}, + {9,-9, 9,-9, 6, 3,-3,-6, 6,-6,-3, 3, 4, 2, 1, 2}, + {-6, 6,-6, 6,-4,-2, 2, 4,-3, 3, 3,-3,-2,-1,-1,-2}, + {2,-2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0}, + {-6, 6,-6, 6,-3,-3, 3, 3,-4, 4, 2,-2,-2,-2,-1,-1}, + {4,-4, 4,-4, 2, 2,-2,-2, 2,-2,-2, 2, 1, 1, 1, 1} + }; + + int i, j, k, in; + double xx, x[16], cij[4][4]; + + for (i = 0; i < 4; i++) { + x[i] = gs[i]; + x[i+4] = d1gs[i]*CMAPDX; + x[i+8] = d2gs[i]*CMAPDX; + x[i+12] = d12gs[i]*CMAPDX*CMAPDX; + } + + in = 0; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + xx = 0.0; + for (k = 0; k < 16; k++) xx += wt[in][k]*x[k]; + in++; + cij[i][j] = xx; + } + } + + // for a given point of interest and its corresponding grid square values, + // gradients and cross-derivatives + // calculate the interpolated value of the point of interest (POI) + + double t, u, gs1l, gs2l; + + // set the interpolation coefficients + // bc_coeff(gs,d1gs,d2gs,d12gs,&cij[0]); + + gs1l = d_g_axis(low1); + gs2l = d_g_axis(low2); + + t = (x1-gs1l)/CMAPDX; + u = (x2-gs2l)/CMAPDX; + + E = dEdPhi = dEdPsi = 0.0; + + for (i = 3; i >= 0; i--) { + E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; + dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; + dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; + } + + dEdPhi *= (180.0/MY_PI/CMAPDX); + dEdPsi *= (180.0/MY_PI/CMAPDX); +} + +/* ---------------------------------------------------------------------- + return local index of atom J or any of its images that is closest to atom I + if J is not a valid index like -1, just return it + copied from domain.cpp +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int FixCMAPKokkos::closest_image(const int i, int j) const +{ + if (j < 0) return j; + + const X_FLOAT xi0 = d_x(i,0); + const X_FLOAT xi1 = d_x(i,1); + const X_FLOAT xi2 = d_x(i,2); + + int closest = j; + X_FLOAT delx = xi0 - d_x(j,0); + X_FLOAT dely = xi1 - d_x(j,1); + X_FLOAT delz = xi2 - d_x(j,2); + X_FLOAT rsqmin = delx*delx + dely*dely + delz*delz; + X_FLOAT rsq; + + while (d_sametag[j] >= 0) { + j = d_sametag[j]; + delx = xi0 - d_x(j,0); + dely = xi1 - d_x(j,1); + delz = xi2 - d_x(j,2); + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < rsqmin) { + rsqmin = rsq; + closest = j; + } + } + + return closest; +} + +namespace LAMMPS_NS { +template class FixCMAPKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixCMAPKokkos; +#endif +} diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h new file mode 100644 index 0000000000..745b2bcfe2 --- /dev/null +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -0,0 +1,123 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(cmap/kk,FixCMAPKokkos); +FixStyle(cmap/kk/device,FixCMAPKokkos); +FixStyle(cmap/kk/host,FixCMAPKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_CMAP_KOKKOS_H +#define LMP_FIX_CMAP_KOKKOS_H + +#include "fix_cmap.h" + +#include "kokkos_base.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +struct TagFixCmapPreNeighbor{}; +struct TagFixCmapPostForce{}; + +template +class FixCMAPKokkos : public FixCMAP, public KokkosBase { + typedef ArrayTypes AT; + + public: + FixCMAPKokkos(class LAMMPS *, int, char **); + ~FixCMAPKokkos() override; + + void init() override; + void pre_neighbor() override; + void post_force(int) override; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixCmapPreNeighbor, const int, int&, const bool) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixCmapPostForce, const int, double&) const; + + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; + void set_arrays(int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, + ExecutionSpace space) override; + + protected: + + int nlocal; + + typename AT::t_x_array d_x; + typename AT::t_f_array d_f; + + DAT::tdual_int_1d k_sametag; + typename AT::t_int_1d d_sametag; + int map_style; + DAT::tdual_int_1d k_map_array; + dual_hash_type k_map_hash; + + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; + + DAT::tdual_int_1d k_num_crossterm; + typename AT::t_int_1d d_num_crossterm; + + DAT::tdual_int_2d k_crossterm_type; + typename AT::t_int_2d d_crosstermlist, d_crossterm_type; + + DAT::tdual_tagint_2d k_crossterm_atom1, k_crossterm_atom2, k_crossterm_atom3; + DAT::tdual_tagint_2d k_crossterm_atom4, k_crossterm_atom5; + typename AT::t_tagint_2d d_crossterm_atom1, d_crossterm_atom2, d_crossterm_atom3; + typename AT::t_tagint_2d d_crossterm_atom4, d_crossterm_atom5; + + DAT::tdual_float_1d k_g_axis; + typename AT::t_float_1d d_g_axis; + + DAT::tdual_float_3d k_cmapgrid, k_d1cmapgrid, k_d2cmapgrid, k_d12cmapgrid; + typename AT::t_float_3d d_cmapgrid, d_d1cmapgrid, d_d2cmapgrid, d_d12cmapgrid; + + // calculate dihedral angles + KOKKOS_INLINE_FUNCTION + double dihedral_angle_atan2(double, double, double, double, double, double, double, double, + double, double) const; + + // perform bicubic interpolation at point of interest + KOKKOS_INLINE_FUNCTION + void bc_interpol(double, double, int, int, double *, double *, double *, double *, + double &, double &, double &) const; + + // copied from Domain + KOKKOS_INLINE_FUNCTION + int closest_image(const int, int) const; + +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_CMAP_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/fix_colvars_kokkos.cpp b/src/KOKKOS/fix_colvars_kokkos.cpp new file mode 100644 index 0000000000..faf738f9e7 --- /dev/null +++ b/src/KOKKOS/fix_colvars_kokkos.cpp @@ -0,0 +1,51 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_colvars_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +FixColvarsKokkos::FixColvarsKokkos(LAMMPS *lmp, int narg, char **arg) : + FixColvars(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +void FixColvarsKokkos::post_force(int vflag) +{ + atomKK->sync(Host,X_MASK|F_MASK|TAG_MASK|IMAGE_MASK); + FixColvars::post_force(vflag); + atomKK->modified(Host,F_MASK); +} + +/* ---------------------------------------------------------------------- */ +void FixColvarsKokkos::end_of_step() +{ + if (store_forces) { + atomKK->sync(Host,F_MASK|TAG_MASK); + FixColvars::end_of_step(); + } +} diff --git a/src/KOKKOS/fix_colvars_kokkos.h b/src/KOKKOS/fix_colvars_kokkos.h new file mode 100644 index 0000000000..fe649c7350 --- /dev/null +++ b/src/KOKKOS/fix_colvars_kokkos.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(colvars/kk,FixColvarsKokkos); +FixStyle(colvars/kk/device,FixColvarsKokkos); +FixStyle(colvars/kk/host,FixColvarsKokkos); +// clang-format on +#else + +#ifndef LMP_FIX_COLVARS_KOKKOS_H +#define LMP_FIX_COLVARS_KOKKOS_H + +#include "fix_colvars.h" + +namespace LAMMPS_NS { + +class FixColvarsKokkos : public FixColvars { + + public: + FixColvarsKokkos(class LAMMPS *, int, char **); + + void post_force(int) override; + void end_of_step() override; + +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index b08542fd17..45af29e36d 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -106,7 +106,7 @@ void FixEfieldKokkos::post_force(int vflag) // update region if necessary if (region) { - if (!utils::strmatch(region->style, "^block")) + if (!(utils::strmatch(region->style, "^block") || utils::strmatch(region->style, "^sphere"))) error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style); region->prematch(); DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); diff --git a/src/KOKKOS/fix_momentum_kokkos.cpp b/src/KOKKOS/fix_momentum_kokkos.cpp index fa959cd582..b41a3530cb 100644 --- a/src/KOKKOS/fix_momentum_kokkos.cpp +++ b/src/KOKKOS/fix_momentum_kokkos.cpp @@ -24,7 +24,8 @@ using namespace LAMMPS_NS; using namespace FixConst; /* ---------------------------------------------------------------------- - Contributing author: Dan Ibanez (SNL) + Contributing authors: Dan Ibanez (SNL) + Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ @@ -35,6 +36,7 @@ FixMomentumKokkos::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; atomKK = (AtomKokkos *) atom; + groupKK = (GroupKokkos *)group; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; @@ -92,8 +94,7 @@ void FixMomentumKokkos::end_of_step() double ekin_old,ekin_new; ekin_old = ekin_new = 0.0; - if (dynamic) - masstotal = group->mass(igroup); // change once Group is ported to Kokkos + if (dynamic) masstotal = groupKK->mass(igroup); // do nothing if group is empty, i.e. mass is zero; @@ -107,12 +108,8 @@ void FixMomentumKokkos::end_of_step() auto groupbit2 = groupbit; if (linear) { - /* this is needed because Group is not Kokkos-aware ! */ - atomKK->sync(ExecutionSpaceFromDevice::space, - V_MASK | MASK_MASK | TYPE_MASK | RMASS_MASK); - Few tmpvcm; - group->vcm(igroup,masstotal,&tmpvcm[0]); - const Few vcm(tmpvcm); + double vcm[3]; + groupKK->vcm(igroup,masstotal,vcm); // adjust velocities by vcm to zero linear momentum // only adjust a component if flag is set @@ -133,20 +130,11 @@ void FixMomentumKokkos::end_of_step() } if (angular) { - Few tmpxcm, tmpangmom, tmpomega; - double inertia[3][3]; - /* syncs for each Kokkos-unaware Group method */ - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->xcm(igroup,masstotal,&tmpxcm[0]); - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | V_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->angmom(igroup,&tmpxcm[0],&tmpangmom[0]); - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->inertia(igroup,&tmpxcm[0],inertia); - group->omega(&tmpangmom[0],inertia,&tmpomega[0]); - const Few xcm(tmpxcm), angmom(tmpangmom), omega(tmpomega); + double xcm[3],angmom[3],omega[3],inertia[3][3]; + groupKK->xcm(igroup,masstotal,xcm); + groupKK->angmom(igroup,xcm,angmom); + groupKK->inertia(igroup,xcm,inertia); + group->omega(angmom,inertia,omega); // adjust velocities to zero omega // vnew_i = v_i - w x r_i @@ -167,10 +155,10 @@ void FixMomentumKokkos::end_of_step() x_i[0] = x(i,0); x_i[1] = x(i,1); x_i[2] = x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - auto dx = unwrap[0] - xcm[0]; - auto dy = unwrap[1] - xcm[1]; - auto dz = unwrap[2] - xcm[2]; + auto unwrapKK = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); + auto dx = unwrapKK[0] - xcm[0]; + auto dy = unwrapKK[1] - xcm[1]; + auto dz = unwrapKK[2] - xcm[2]; v(i,0) -= omega[1]*dz - omega[2]*dy; v(i,1) -= omega[2]*dx - omega[0]*dz; v(i,2) -= omega[0]*dy - omega[1]*dx; diff --git a/src/KOKKOS/fix_momentum_kokkos.h b/src/KOKKOS/fix_momentum_kokkos.h index 3bb46035fe..0ab91c423d 100644 --- a/src/KOKKOS/fix_momentum_kokkos.h +++ b/src/KOKKOS/fix_momentum_kokkos.h @@ -24,6 +24,8 @@ FixStyle(momentum/kk/host,FixMomentumKokkos); #define LMP_FIX_MOMENTUM_KOKKOS_H #include "fix_momentum.h" + +#include "group_kokkos.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -35,6 +37,8 @@ class FixMomentumKokkos : public FixMomentum { FixMomentumKokkos(class LAMMPS *, int, char **); void end_of_step() override; + private: + GroupKokkos *groupKK; }; } diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp new file mode 100644 index 0000000000..de77427e49 --- /dev/null +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -0,0 +1,202 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy, alphataubio at gmail +------------------------------------------------------------------------- */ + +#include "fix_nve_limit_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "kokkos_type.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +FixNVELimitKokkos::FixNVELimitKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNVELimit(lmp, narg, arg) +{ + kokkosable = 1; + execution_space = ExecutionSpaceFromDevice::space; + atomKK = (AtomKokkos *) atom; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- + allow for both per-type and per-atom mass +------------------------------------------------------------------------- */ + +template +void FixNVELimitKokkos::initial_integrate(int /*vflag*/) +{ + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + auto d_x = atomKK->k_x.template view(); + auto d_v = atomKK->k_v.template view(); + auto d_f = atomKK->k_f.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto l_groupbit = groupbit; + auto l_dtf = dtf; + auto l_dtv = dtv; + auto l_vlimitsq = vlimitsq; + + int d_ncount; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & l_groupbit) { + const double dtfm = l_dtf / d_rmass[i]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > l_vlimitsq) { + l_ncount++; + const double scale = sqrt(l_vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + + d_x(i,0) += l_dtv * d_v(i,0); + d_x(i,1) += l_dtv * d_v(i,1); + d_x(i,2) += l_dtv * d_v(i,2); + } + }, d_ncount); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + auto l_groupbit = groupbit; + atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & l_groupbit) { + const double dtfm = l_dtf / d_mass[d_type[i]]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > l_vlimitsq) { + l_ncount++; + const double scale = sqrt(l_vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + + d_x(i,0) += l_dtv * d_v(i,0); + d_x(i,1) += l_dtv * d_v(i,1); + d_x(i,2) += l_dtv * d_v(i,2); + } + }, d_ncount); + } + + ncount += d_ncount; + atomKK->modified(execution_space, X_MASK | V_MASK ); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixNVELimitKokkos::final_integrate() +{ + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + auto d_v = atomKK->k_v.template view(); + auto d_f = atomKK->k_f.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto l_groupbit = groupbit; + auto l_dtf = dtf; + auto l_vlimitsq = vlimitsq; + + int d_ncount; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & l_groupbit) { + const double dtfm = l_dtf / d_rmass[i]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > l_vlimitsq) { + l_ncount++; + const double scale = sqrt(l_vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + } + }, d_ncount); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & l_groupbit) { + const double dtfm = l_dtf / d_mass[d_type[i]]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > l_vlimitsq) { + l_ncount++; + const double scale = sqrt(l_vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + } + }, d_ncount); + } + + ncount += d_ncount; + atomKK->modified(execution_space, V_MASK ); +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixNVELimitKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixNVELimitKokkos; +#endif +} + diff --git a/src/KOKKOS/fix_nve_limit_kokkos.h b/src/KOKKOS/fix_nve_limit_kokkos.h new file mode 100644 index 0000000000..b611996b66 --- /dev/null +++ b/src/KOKKOS/fix_nve_limit_kokkos.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(nve/limit/kk,FixNVELimitKokkos); +FixStyle(nve/limit/kk/device,FixNVELimitKokkos); +FixStyle(nve/limit/kk/host,FixNVELimitKokkos); + +// clang-format on +#else + +#ifndef LMP_FIX_NVE_LIMIT_KOKKOS_H +#define LMP_FIX_NVE_LIMIT_KOKKOS_H + +#include "fix_nve_limit.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixNVELimitKokkos : public FixNVELimit { + public: + FixNVELimitKokkos(class LAMMPS *, int, char **); + void initial_integrate(int) override; + void final_integrate() override; + +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp new file mode 100644 index 0000000000..607f5ce8d9 --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -0,0 +1,128 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_recenter_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "input.h" +#include "modify.h" +#include "update.h" +#include "domain.h" +#include "group_kokkos.h" + +using namespace LAMMPS_NS; + +enum{BOX,LATTICE,FRACTION}; + +/* ---------------------------------------------------------------------- */ + +template +FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **arg) : + FixRecenter(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + groupKK = (GroupKokkos *)group; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | MASK_MASK; + datamask_modify = X_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRecenterKokkos::initial_integrate(int /*vflag*/) +{ + atomKK->sync(execution_space,datamask_read); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + // target COM + // bounding box around domain works for both orthogonal and triclinic + + double xtarget,ytarget,ztarget; + double *bboxlo,*bboxhi; + + if (scaleflag == FRACTION) { + if (domain->triclinic == 0) { + bboxlo = domain->boxlo; + bboxhi = domain->boxhi; + } else { + bboxlo = domain->boxlo_bound; + bboxhi = domain->boxhi_bound; + } + } + + if (xinitflag) xtarget = xinit; + else if (scaleflag == FRACTION) + xtarget = bboxlo[0] + xcom*(bboxhi[0] - bboxlo[0]); + else xtarget = xcom; + + if (yinitflag) ytarget = yinit; + else if (scaleflag == FRACTION) + ytarget = bboxlo[1] + ycom*(bboxhi[1] - bboxlo[1]); + else ytarget = ycom; + + if (zinitflag) ztarget = zinit; + else if (scaleflag == FRACTION) + ztarget = bboxlo[2] + zcom*(bboxhi[2] - bboxlo[2]); + else ztarget = zcom; + + // current COM + + if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup); + double xcm[3]; + groupKK->xcm(igroup,masstotal,xcm); + + // shift coords by difference between actual COM and requested COM + + shift[0] = xflag ? (xtarget - xcm[0]) : 0.0; + shift[1] = yflag ? (ytarget - xcm[1]) : 0.0; + shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; + distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto l_group2bit = group2bit; + double l_shiftx = shift[0]; + double l_shifty = shift[1]; + double l_shiftz = shift[2]; + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), + KOKKOS_LAMBDA(const int i) { + if (d_mask[i] & l_group2bit) { + d_x(i,0) += l_shiftx; + d_x(i,1) += l_shifty; + d_x(i,2) += l_shiftz; + } + }); + + copymode = 0; + atomKK->modified(execution_space,datamask_modify); +} + +namespace LAMMPS_NS { +template class FixRecenterKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixRecenterKokkos; +#endif +} diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h new file mode 100644 index 0000000000..36e154e05c --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(recenter/kk,FixRecenterKokkos); +FixStyle(recenter/kk/device,FixRecenterKokkos); +FixStyle(recenter/kk/host,FixRecenterKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_RECENTER_KOKKOS_H +#define LMP_FIX_RECENTER_KOKKOS_H + +#include "fix_recenter.h" + +#include "group_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixRecenterKokkos : public FixRecenter { + public: + FixRecenterKokkos(class LAMMPS *, int, char **); + void initial_integrate(int) override; + private: + GroupKokkos *groupKK; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_RECENTER_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index e8f376643f..8c69c33fcf 100644 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -84,7 +84,7 @@ void FixSetForceKokkos::post_force(int /*vflag*/) // update region if necessary if (region) { - if (!utils::strmatch(region->style, "^block")) + if (!(utils::strmatch(region->style, "^block") || utils::strmatch(region->style, "^sphere"))) error->all(FLERR,"Cannot (yet) use {}-style region with fix setforce/kk",region->style); region->prematch(); DAT::tdual_int_1d k_match = DAT::tdual_int_1d("setforce:k_match",nlocal); diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 9ba796b1ab..1b6d45ead7 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -79,6 +79,9 @@ void FixSpringSelfKokkos::init() { FixSpringSelf::init(); + if (kstyle != CONSTANT) + error->all(FLERR, "Fix spring/self/kk does not support variable spring constants (yet)"); + if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot (yet) use respa with Kokkos"); } diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp new file mode 100644 index 0000000000..ab6f7186a1 --- /dev/null +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -0,0 +1,357 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_wall_region_kokkos.h" + +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "error.h" +#include "kokkos_base.h" +#include "math_special_kokkos.h" +#include "memory_kokkos.h" +#include "region.h" +#include "region_block_kokkos.h" +#include "region_sphere_kokkos.h" + +using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; + +enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; + +/* ---------------------------------------------------------------------- */ + +template +FixWallRegionKokkos::FixWallRegionKokkos(LAMMPS *lmp, int narg, char **arg) : + FixWallRegion(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | V_MASK | MASK_MASK; + datamask_modify = F_MASK; +} + +template +FixWallRegionKokkos::~FixWallRegionKokkos() +{ + if (copymode) return; + memoryKK->destroy_kokkos(k_vatom,vatom); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallRegionKokkos::post_force(int vflag) +{ + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + // virial setup + + v_init(vflag); + + // reallocate per-atom arrays if necessary + + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"wall_region:vatom"); + d_vatom = k_vatom.template view(); + } + + d_x = atomKK->k_x.template view(); + d_f = atomKK->k_f.template view(); + if (style == COLLOID) d_radius = atomKK->k_radius.template view(); + d_mask = atomKK->k_mask.template view(); + int nlocal = atomKK->nlocal; + + region->prematch(); + + // virial setup + + v_init(vflag); + + // region->match() ensures particle is in region or on surface, else error + // if returned contact dist r = 0, is on surface, also an error + // in COLLOID case, r <= radius is an error + // initilize ewall after region->prematch(), + // so a dynamic region can access last timestep values + + // energy intialize + // eflag is used to track whether wall energies have been communicated + + eflag = 0; + double result[10]; + copymode = 1; + + if(auto *regionKK = dynamic_cast*>(region)) { + FixWallRegionKokkosFunctor> functor(this,regionKK); + Kokkos::parallel_reduce(nlocal,functor,result); + } else if (auto *regionKK = dynamic_cast*>(region)){ + FixWallRegionKokkosFunctor> functor(this,regionKK); + Kokkos::parallel_reduce(nlocal,functor,result); + } + + copymode = 0; + for( int i=0 ; i<4 ; i++ ) ewall[i] = result[i]; + + if (vflag_global) { + virial[0] += result[4]; + virial[1] += result[5]; + virial[2] += result[6]; + virial[3] += result[7]; + virial[4] += result[8]; + virial[5] += result[9]; + } + + atomKK->modified(execution_space,F_MASK); + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } +} + +/* ---------------------------------------------------------------------- + interaction of all particles in group with a wall + m = index of wall coeffs + which = xlo,xhi,ylo,yhi,zlo,zhi + error if any particle is on or behind wall +------------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixWallRegionKokkos::wall_particle(T regionKK, const int i, value_type result) const { + if (d_mask(i) & groupbit) { + + if (!regionKK->match_kokkos(d_x(i,0), d_x(i,1), d_x(i,2))) Kokkos::abort("Particle outside surface of region used in fix wall/region"); + + double rinv, tooclose; + + if (style == COLLOID) + tooclose = d_radius(i); + else + tooclose = 0.0; + + int n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); + + for ( int m = 0; m < n; m++) { + + double r = regionKK->d_contact[m].r; + double delx = regionKK->d_contact[m].delx; + double dely = regionKK->d_contact[m].dely; + double delz = regionKK->d_contact[m].delz; + + if (r <= tooclose) + Kokkos::abort("Particle outside surface of region used in fix wall/region"); + else + rinv = 1.0 / r; + + double fwallKK, engKK; + + if (style == LJ93) engKK = lj93(r,fwallKK); + else if (style == LJ126) engKK = lj126(r,fwallKK); + else if (style == LJ1043) engKK = lj1043(r,fwallKK); + else if (style == MORSE) engKK = morse(r,fwallKK); + else if (style == COLLOID) engKK = colloid(r,d_radius(i),fwallKK); + else engKK = harmonic(r,fwallKK); + + double fx = fwallKK * delx * rinv; + double fy = fwallKK * dely * rinv; + double fz = fwallKK * delz * rinv; + d_f(i,0) += fx; + d_f(i,1) += fy; + d_f(i,2) += fz; + result[1] -= fx; + result[2] -= fy; + result[3] -= fz; + result[0] += engKK; + if (evflag) { + double v[6] = { + fx * delx, + fy * dely, + fz * delz, + fx * dely, + fx * delz, + fy * delz + }; + v_tally(result,i,v); + } + } + } +} + +/* ---------------------------------------------------------------------- + LJ 9/3 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj93(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r10inv = r4inv * r4inv * r2inv; + fwallKK = coeff1 * r10inv - coeff2 * r4inv; + return coeff3 * r4inv * r4inv * rinv - coeff4 * r2inv * rinv - offset; +} + +/* ---------------------------------------------------------------------- + LJ 12/6 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj126(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r6inv = r2inv * r2inv * r2inv; + fwallKK = r6inv * (coeff1 * r6inv - coeff2) * rinv; + return r6inv * (coeff3 * r6inv - coeff4) - offset; +} + +/* ---------------------------------------------------------------------- + LJ 10/4/3 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj1043(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r10inv = r4inv * r4inv * r2inv; + fwallKK = coeff5 * r10inv * rinv - coeff6 * r4inv * rinv - coeff7 * powint(r + coeff4, -4); + return coeff1 * r10inv - coeff2 * r4inv - coeff3 * powint(r + coeff4, -3) - offset; +} + +/* ---------------------------------------------------------------------- + Morse interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::morse(double r, double& fwallKK) const +{ + double dr = r - sigma; + double dexp = exp(-alpha * dr); + fwallKK = coeff1 * (dexp * dexp - dexp); + return epsilon * (dexp * dexp - 2.0 * dexp) - offset; +} + +/* ---------------------------------------------------------------------- + colloid interaction for finite-size particle of rad with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::colloid(double r, double rad, double& fwallKK) const +{ + double new_coeff2 = coeff2 * rad * rad * rad; + double diam = 2.0 * rad; + + double rad2 = rad * rad; + double rad4 = rad2 * rad2; + double rad8 = rad4 * rad4; + double delta2 = rad2 - r * r; + double rinv = 1.0 / delta2; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r8inv = r4inv * r4inv; + fwallKK = coeff1 * + (rad8 * rad + 27.0 * rad4 * rad2 * rad * r * r + 63.0 * rad4 * rad * powint(r, 4) + + 21.0 * rad2 * rad * powint(r, 6)) * + r8inv - + new_coeff2 * r2inv; + + double r2 = 0.5 * diam - r; + double rinv2 = 1.0 / r2; + double r2inv2 = rinv2 * rinv2; + double r4inv2 = r2inv2 * r2inv2; + double r3 = r + 0.5 * diam; + double rinv3 = 1.0 / r3; + double r2inv3 = rinv3 * rinv3; + double r4inv3 = r2inv3 * r2inv3; + return coeff3 * + ((-3.5 * diam + r) * r4inv2 * r2inv2 * rinv2 + + (3.5 * diam + r) * r4inv3 * r2inv3 * rinv3) - + coeff4 * ((-diam * r + r2 * r3 * (log(-r2) - log(r3))) * (-rinv2) * rinv3) - offset; +} + +/* ---------------------------------------------------------------------- + harmonic interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::harmonic(double r, double& fwallKK) const +{ + double dr = cutoff - r; + fwallKK = 2.0 * epsilon * dr; + return epsilon * dr * dr; +} + +/* ---------------------------------------------------------------------- + tally virial into global and per-atom accumulators + i = local index of atom + v = total virial for the interaction + increment global virial by v + increment per-atom virial by v + this method can be used when fix computes forces in post_force() + and the force depends on a distance to some external object + e.g. fix wall/lj93: compute virial only on owned atoms +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixWallRegionKokkos::v_tally(value_type result, int i, double *v) const +{ + if (vflag_global) { + result[4] += v[0]; + result[5] += v[1]; + result[6] += v[2]; + result[7] += v[3]; + result[8] += v[4]; + result[9] += v[5]; + } + + if (vflag_atom) { + Kokkos::atomic_add(&(d_vatom(i,0)),v[0]); + Kokkos::atomic_add(&(d_vatom(i,1)),v[1]); + Kokkos::atomic_add(&(d_vatom(i,2)),v[2]); + Kokkos::atomic_add(&(d_vatom(i,3)),v[3]); + Kokkos::atomic_add(&(d_vatom(i,4)),v[4]); + Kokkos::atomic_add(&(d_vatom(i,5)),v[5]); + } +} + +namespace LAMMPS_NS { +template class FixWallRegionKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixWallRegionKokkos; +#endif +} diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h new file mode 100644 index 0000000000..e959ffc42c --- /dev/null +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -0,0 +1,108 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(wall/region/kk,FixWallRegionKokkos); +FixStyle(wall/region/kk/device,FixWallRegionKokkos); +FixStyle(wall/region/kk/host,FixWallRegionKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_WALL_REGION_KOKKOS_H +#define LMP_FIX_WALL_REGION_KOKKOS_H + +#include "fix_wall_region.h" + +#include "kokkos_type.h" +#include "region_block_kokkos.h" +#include "region_sphere_kokkos.h" + +namespace LAMMPS_NS { + +template +class FixWallRegionKokkos : public FixWallRegion { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef double value_type[]; + + FixWallRegionKokkos(class LAMMPS *, int, char **); + ~FixWallRegionKokkos() override; + void post_force(int) override; + + template + KOKKOS_INLINE_FUNCTION + void wall_particle(T, const int, value_type) const; + + private: + + typename AT::t_x_array d_x; + typename AT::t_f_array d_f; + typename AT::t_float_1d d_radius; + typename AT::t_int_1d d_mask; + + DAT::tdual_virial_array k_vatom; + typename AT::t_virial_array d_vatom; + + KOKKOS_INLINE_FUNCTION + double lj93(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double lj126(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double lj1043(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double morse(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double colloid(double, double, double&) const; + + KOKKOS_INLINE_FUNCTION + double harmonic(double, double&) const; + + KOKKOS_INLINE_FUNCTION + void v_tally(value_type, int, double*) const; + +}; + +template +struct FixWallRegionKokkosFunctor { + typedef DeviceType device_type; + typedef double value_type[]; + const int value_count; + FixWallRegionKokkos c; + T *regionKK; + + FixWallRegionKokkosFunctor(FixWallRegionKokkos* c_ptr, T *regionKK): + value_count(10), c(*c_ptr), regionKK(regionKK) {} + + KOKKOS_INLINE_FUNCTION + void init(value_type result) const { + for (int i=0 ; i<10 ; i++ ) result[i] = 0.0; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type result) const { + c.wall_particle(regionKK,i,result); + } +}; + +} + +#endif +#endif + diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp new file mode 100644 index 0000000000..b2de2e6a64 --- /dev/null +++ b/src/KOKKOS/group_kokkos.cpp @@ -0,0 +1,363 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "group_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain_kokkos.h" +#include "kokkos_few.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +GroupKokkos::GroupKokkos(LAMMPS *lmp) : Group(lmp) +{ + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; +} + +// ---------------------------------------------------------------------- +// computations on a group of atoms +// ---------------------------------------------------------------------- + +/* ---------------------------------------------------------------------- + compute the total mass of group of atoms + use either per-type mass or per-atom rmass +------------------------------------------------------------------------- */ + +template +double GroupKokkos::mass(int igroup) +{ + int groupbit = bitmask[igroup]; + auto d_mask = atomKK->k_mask.template view(); + double one = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,MASK_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_rmass(i); + }, one); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,MASK_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); + }, one); + + } + + double all; + MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world); + return all; +} + +/* ---------------------------------------------------------------------- + compute the center-of-mass coords of group of atoms + masstotal = total mass + return center-of-mass coords in cm[] + must unwrap atoms to compute center-of-mass correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) +{ + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + double cmone[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } + + MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + xcm[0] /= masstotal; + xcm[1] /= masstotal; + xcm[2] /= masstotal; + } +} + +/* ---------------------------------------------------------------------- + compute the center-of-mass velocity of group of atoms + masstotal = total mass + return center-of-mass velocity in vcm[] +------------------------------------------------------------------------- */ + +template +void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) +{ + int groupbit = bitmask[igroup]; + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + double p[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } + + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; + } +} + +/* ---------------------------------------------------------------------- + compute the angular momentum L (lmom) of group + around center-of-mass cm + must unwrap atoms to compute L correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) +{ + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; + double p[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } + MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world); +} + +/* ---------------------------------------------------------------------- + compute moment of inertia tensor around center-of-mass xcm of group + must unwrap atoms to compute itensor correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3][3]) +{ + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; + + double ione[3][3]; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) ione[i][j] = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } + + ione[1][0] = ione[0][1]; + ione[2][1] = ione[1][2]; + ione[2][0] = ione[0][2]; + MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world); +} + +namespace LAMMPS_NS { +template class GroupKokkos; +#ifdef LMP_KOKKOS_GPU +template class GroupKokkos; +#endif +} diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h new file mode 100644 index 0000000000..f23023b17c --- /dev/null +++ b/src/KOKKOS/group_kokkos.h @@ -0,0 +1,38 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_GROUP_KOKKOS_H +#define LMP_GROUP_KOKKOS_H + +#include "group.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class GroupKokkos : public Group { + public: + GroupKokkos(class LAMMPS *); + double mass(int); // total mass of atoms in group + void xcm(int, double, double *); // center-of-mass coords of group + void vcm(int, double, double *); // center-of-mass velocity of group + void angmom(int, double *, double *); // angular momentum of group + void inertia(int, double *, double[3][3]); // inertia tensor + + private: + ExecutionSpace execution_space; +}; + +} // namespace LAMMPS_NS + +#endif diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 9d3e3fca0f..fd304efd1c 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -41,6 +41,16 @@ constexpr int HALF = 4; #define MAX_TYPES_STACKPARAMS 12 static constexpr LAMMPS_NS::bigint LMP_KOKKOS_AV_DELTA = 10; +namespace LAMMPS_NS { + union d_ubuf { + double d; + int64_t i; + KOKKOS_INLINE_FUNCTION d_ubuf(double arg) : d(arg) {} + KOKKOS_INLINE_FUNCTION d_ubuf(int64_t arg) : i(arg) {} + KOKKOS_INLINE_FUNCTION d_ubuf(int arg) : i(arg) {} + }; +} + namespace Kokkos { static auto NoInit = [](std::string const& label) { return Kokkos::view_alloc(Kokkos::WithoutInitializing, label); @@ -166,7 +176,7 @@ template using KKScatterView = Kokkos::Experimental::ScatterView; -// set ExecutionSpace stuct with variable "space" +// set ExecutionSpace struct with variable "space" template struct ExecutionSpaceFromDevice; @@ -1108,6 +1118,14 @@ typedef tdual_float_2d::t_host_um t_float_2d_um; typedef tdual_float_2d::t_host_const_um t_float_2d_const_um; typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread; +//3d float array n +typedef Kokkos::DualView tdual_float_3d; +typedef tdual_float_3d::t_host t_float_3d; +typedef tdual_float_3d::t_host_const t_float_3d_const; +typedef tdual_float_3d::t_host_um t_float_3d_um; +typedef tdual_float_3d::t_host_const_um t_float_3d_const_um; +typedef tdual_float_3d::t_host_const_randomread t_float_3d_randomread; + #ifdef LMP_KOKKOS_NO_LEGACY typedef Kokkos::DualView tdual_float_1d_4; #else diff --git a/src/KOKKOS/math_special_kokkos.h b/src/KOKKOS/math_special_kokkos.h index d8af28d33a..1cc35e1969 100644 --- a/src/KOKKOS/math_special_kokkos.h +++ b/src/KOKKOS/math_special_kokkos.h @@ -248,6 +248,29 @@ namespace MathSpecialKokkos { return yy; } + + /* ---------------------------------------------------------------------- + ans = v1 - v2 + ------------------------------------------------------------------------- */ + + KOKKOS_INLINE_FUNCTION + static void sub3(const double *v1, const double *v2, double *ans) + { + ans[0] = v1[0] - v2[0]; + ans[1] = v1[1] - v2[1]; + ans[2] = v1[2] - v2[2]; + } + + /* ---------------------------------------------------------------------- + dot product of 2 vectors + ------------------------------------------------------------------------- */ + + KOKKOS_INLINE_FUNCTION + static double dot3(const double *v1, const double *v2) + { + return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; + } + } // namespace MathSpecialKokkos } // namespace LAMMPS_NS diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4fec623c5d..fb3149ca4b 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -151,7 +151,11 @@ void NPairKokkos::build(NeighList *list_) if (GHOST) nall += atom->nghost; - if (nall == 0) return; + if (nall == 0) { + list->inum = 0; + list->gnum = 0; + return; + } list->grow(nall); diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index c4bd603041..399142dfaf 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -923,6 +923,8 @@ int GetMaxNeighs(NeighStyle* list) maxneigh = MAX(maxneigh,num_neighs); }, Kokkos::Max(maxneigh)); + if (maxneigh < 0) maxneigh = 0; + return maxneigh; } @@ -958,6 +960,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) { lastcall = fpair->lmp->update->ntimestep; vectorsize = GetMaxNeighs(list); + if (vectorsize == 0) vectorsize = 1; vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2 #if defined(KOKKOS_ENABLE_HIP) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 8df33c32db..05ac8eea2a 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -13,10 +13,13 @@ ------------------------------------------------------------------------- */ #include "region_block_kokkos.h" + #include "atom_kokkos.h" #include "atom_masks.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; /* ---------------------------------------------------------------------- */ @@ -25,135 +28,50 @@ RegBlockKokkos::RegBlockKokkos(LAMMPS *lmp, int narg, char **arg) : RegBlock(lmp, narg, arg) { atomKK = (AtomKokkos*) atom; + memoryKK->create_kokkos(d_contact,6,"region_block:d_contact"); } -/* ---------------------------------------------------------------------- - inside = 1 if x,y,z is inside or on surface - inside = 0 if x,y,z is outside and not on surface -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::k_inside(double x, double y, double z) const +RegBlockKokkos::~RegBlockKokkos() { - if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) - return 1; - return 0; + if (copymode) return; + memoryKK->destroy_kokkos(d_contact); } +/* ---------------------------------------------------------------------- */ + template void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { groupbit = groupbit_in; d_match = k_match_in.template view(); - auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); - - x = atomKK->k_x.view(); - mask = atomKK->k_mask.view(); + d_x = atomKK->k_x.view(); + d_mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; - k_match_in.template modify(); } +/* ---------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) const { - if (mask[i] & groupbit) { - double x_tmp = x(i,0); - double y_tmp = x(i,1); - double z_tmp = x(i,2); - d_match[i] = match(x_tmp,y_tmp,z_tmp); + if (d_mask[i] & groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); } } -/* ---------------------------------------------------------------------- - determine if point x,y,z is a match to region volume - XOR computes 0 if 2 args are the same, 1 if different - note that k_inside() returns 1 for points on surface of region - thus point on surface of exterior region will not match - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic, apply inverse transform to x,y,z - unmove first, then unrotate, so don't have to change rotation point - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::match(double x, double y, double z) const -{ - if (dynamic) inverse_transform(x,y,z); - return !(k_inside(x,y,z) ^ interior); -} - -/* ---------------------------------------------------------------------- - transform a point x,y,z in moved space back to region space - undisplace first, then unrotate (around original P) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::inverse_transform(double &x, double &y, double &z) const -{ - if (moveflag) { - x -= dx; - y -= dy; - z -= dz; - } - if (rotateflag) rotate(x,y,z,-theta); -} - -/* ---------------------------------------------------------------------- - rotate x,y,z by angle via right-hand rule around point and runit normal - sign of angle determines whether rotating forward/backward in time - return updated x,y,z - R = vector axis of rotation - P = point = point to rotate around - R0 = runit = unit vector for R - X0 = x,y,z = initial coord of atom - D = X0 - P = vector from P to X0 - C = (D dot R0) R0 = projection of D onto R, i.e. Dparallel - A = D - C = vector from R line to X0, i.e. Dperp - B = R0 cross A = vector perp to A in plane of rotation, same len as A - A,B define plane of circular rotation around R line - new x,y,z = P + C + A cos(angle) + B sin(angle) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::rotate(double &x, double &y, double &z, double angle) const -{ - double a[3],b[3],c[3],d[3],disp[3]; - - double sine = sin(angle); - double cosine = cos(angle); - d[0] = x - point[0]; - d[1] = y - point[1]; - d[2] = z - point[2]; - double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; - c[0] = x0dotr * runit[0]; - c[1] = x0dotr * runit[1]; - c[2] = x0dotr * runit[2]; - a[0] = d[0] - c[0]; - a[1] = d[1] - c[1]; - a[2] = d[2] - c[2]; - b[0] = runit[1]*a[2] - runit[2]*a[1]; - b[1] = runit[2]*a[0] - runit[0]*a[2]; - b[2] = runit[0]*a[1] - runit[1]*a[0]; - disp[0] = a[0]*cosine + b[0]*sine; - disp[1] = a[1]*cosine + b[1]*sine; - disp[2] = a[2]*cosine + b[2]*sine; - x = point[0] + c[0] + disp[0]; - y = point[1] + c[1] + disp[1]; - z = point[2] + c[2] + disp[2]; -} - namespace LAMMPS_NS { template class RegBlockKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 017e4e5ee4..052a6a4bcf 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -24,15 +24,19 @@ RegionStyle(block/kk/host,RegBlockKokkos); #define LMP_REGION_BLOCK_KOKKOS_H #include "region_block.h" + #include "kokkos_base.h" #include "kokkos_type.h" +#include "math_special_kokkos.h" namespace LAMMPS_NS { +using namespace MathSpecialKokkos; + struct TagRegBlockMatchAll{}; template -class RegBlockKokkos : public RegBlock, public KokkosBase { +class RegBlockKokkos : public RegBlock, public KokkosBase { friend class FixPour; public: @@ -40,27 +44,379 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { typedef ArrayTypes AT; RegBlockKokkos(class LAMMPS *, int, char **); + ~RegBlockKokkos() override; void match_all_kokkos(int, DAT::tdual_int_1d) override; KOKKOS_INLINE_FUNCTION void operator()(TagRegBlockMatchAll, const int&) const; + KOKKOS_INLINE_FUNCTION + int match_kokkos(double x, double y, double z) const + { + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); + } + + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double x, double y, double z, double cutoff) + { + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior_kokkos(xnear, cutoff); + else + ncontact = surface_exterior_kokkos(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; + } + + Kokkos::View d_contact; + private: int groupbit; typename AT::t_int_1d d_match; - - typename AT::t_x_array_randomread x; - typename AT::t_int_1d_randomread mask; + typename AT::t_x_array_randomread d_x; + typename AT::t_int_1d_randomread d_mask; KOKKOS_INLINE_FUNCTION - int k_inside(double, double, double) const; + int surface_interior_kokkos(double *x, double cutoff) + { + double delta; + + // x is exterior to block + + if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; + + // x is interior to block or on its surface + + int n = 0; + + delta = x[0] - xlo; + if (delta < cutoff && !open_faces[0]) { + d_contact[n].r = delta; + d_contact[n].delx = delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 0; + n++; + } + delta = xhi - x[0]; + if (delta < cutoff && !open_faces[1]) { + d_contact[n].r = delta; + d_contact[n].delx = -delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 1; + n++; + } + + delta = x[1] - ylo; + if (delta < cutoff && !open_faces[2]) { + d_contact[n].r = delta; + d_contact[n].dely = delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 2; + n++; + } + delta = yhi - x[1]; + if (delta < cutoff && !open_faces[3]) { + d_contact[n].r = delta; + d_contact[n].dely = -delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 3; + n++; + } + + delta = x[2] - zlo; + if (delta < cutoff && !open_faces[4]) { + d_contact[n].r = delta; + d_contact[n].delz = delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 4; + n++; + } + delta = zhi - x[2]; + if (delta < cutoff && !open_faces[5]) { + d_contact[n].r = delta; + d_contact[n].delz = -delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 5; + n++; + } + + return n; + } + KOKKOS_INLINE_FUNCTION - int match(double, double, double) const; + int surface_exterior_kokkos(double *x, double cutoff) + { + double xp, yp, zp; + double xc, yc, zc, dist, mindist; + + // x is far enough from block that there is no contact + // x is interior to block + + if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || + x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) + return 0; + if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; + + // x is exterior to block or on its surface + // xp,yp,zp = point on surface of block that x is closest to + // could be edge or corner pt of block + // do not add contact point if r >= cutoff + + if (!openflag) { + if (x[0] < xlo) + xp = xlo; + else if (x[0] > xhi) + xp = xhi; + else + xp = x[0]; + if (x[1] < ylo) + yp = ylo; + else if (x[1] > yhi) + yp = yhi; + else + yp = x[1]; + if (x[2] < zlo) + zp = zlo; + else if (x[2] > zhi) + zp = zhi; + else + zp = x[2]; + } else { + mindist = MAXDOUBLEINT; + for (int i = 0; i < 6; i++) { + if (open_faces[i]) continue; + dist = find_closest_point(i, x, xc, yc, zc); + if (dist < mindist) { + xp = xc; + yp = yc; + zp = zc; + mindist = dist; + } + } + } + + add_contact(0, x, xp, yp, zp); + d_contact[0].iwall = 0; + if (d_contact[0].r < cutoff) return 1; + return 0; + } + KOKKOS_INLINE_FUNCTION - void inverse_transform(double &, double &, double &) const; + void add_contact(int n, double *x, double xp, double yp, double zp) + { + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; + } + KOKKOS_INLINE_FUNCTION - void rotate(double &, double &, double &, double) const; + int k_inside(double x, double y, double z) const + { + if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) + return 1; + return 0; + } + + KOKKOS_INLINE_FUNCTION + void forward_transform(double &x, double &y, double &z) const + { + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } + } + + KOKKOS_INLINE_FUNCTION + void inverse_transform(double &x, double &y, double &z) const + { + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); + } + + KOKKOS_INLINE_FUNCTION + void rotate(double &x, double &y, double &z, double angle) const + { + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; + } + + KOKKOS_INLINE_FUNCTION + void point_on_line_segment(double *a, double *b, double *c, double *d) + { + double ba[3], ca[3]; + + sub3(b, a, ba); + sub3(c, a, ca); + double t = dot3(ca, ba) / dot3(ba, ba); + if (t <= 0.0) { + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + } else if (t >= 1.0) { + d[0] = b[0]; + d[1] = b[1]; + d[2] = b[2]; + } else { + d[0] = a[0] + t * ba[0]; + d[1] = a[1] + t * ba[1]; + d[2] = a[2] + t * ba[2]; + } + } + + KOKKOS_INLINE_FUNCTION + double inside_face(double *xproj, int iface) + { + if (iface < 2) { + if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; + } else if (iface < 4) { + if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) + return 1; + } else { + if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; + } + + return 0; + } + + KOKKOS_INLINE_FUNCTION + double find_closest_point(int i, double *x, double &xc, double &yc, double &zc) + { + double dot, d2, d2min; + double xr[3], xproj[3], p[3]; + + xr[0] = x[0] - corners[i][0][0]; + xr[1] = x[1] - corners[i][0][1]; + xr[2] = x[2] - corners[i][0][2]; + dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; + xproj[0] = xr[0] - dot * face[i][0]; + xproj[1] = xr[1] - dot * face[i][1]; + xproj[2] = xr[2] - dot * face[i][2]; + + d2min = MAXDOUBLEINT; + + // check if point projects inside of face + + if (inside_face(xproj, i)) { + d2 = d2min = dot * dot; + xc = xproj[0] + corners[i][0][0]; + yc = xproj[1] + corners[i][0][1]; + zc = xproj[2] + corners[i][0][2]; + + // check each edge + + } else { + point_on_line_segment(corners[i][0], corners[i][1], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][1], corners[i][2], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][2], corners[i][3], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][3], corners[i][0], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + } + + return d2min; + } }; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp new file mode 100644 index 0000000000..07275ee69e --- /dev/null +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -0,0 +1,86 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "region_sphere_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "memory_kokkos.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +RegSphereKokkos::RegSphereKokkos(LAMMPS *lmp, int narg, char **arg) + : RegSphere(lmp, narg, arg) +{ + atomKK = (AtomKokkos*) atom; + memoryKK->create_kokkos(d_contact,1,"region_sphere:d_contact"); +} + +/* ---------------------------------------------------------------------- */ + +template +RegSphereKokkos::~RegSphereKokkos() +{ + if (copymode) return; + memoryKK->destroy_kokkos(d_contact); +} + +/* ---------------------------------------------------------------------- */ + +template +void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) +{ + groupbit = groupbit_in; + d_match = k_match_in.template view(); + auto execution_space = ExecutionSpaceFromDevice::space; + atomKK->sync(execution_space, X_MASK | MASK_MASK); + d_x = atomKK->k_x.view(); + d_mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; + k_match_in.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegSphereKokkos::operator()(TagRegSphereMatchAll, const int &i) const { + if (d_mask[i] & groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class RegSphereKokkos; +#ifdef LMP_KOKKOS_GPU +template class RegSphereKokkos; +#endif +} + diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h new file mode 100644 index 0000000000..08951138c3 --- /dev/null +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -0,0 +1,233 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef REGION_CLASS +// clang-format off +RegionStyle(sphere/kk,RegSphereKokkos); +RegionStyle(sphere/kk/device,RegSphereKokkos); +RegionStyle(sphere/kk/host,RegSphereKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_REGION_SPHERE_KOKKOS_H +#define LMP_REGION_SPHERE_KOKKOS_H + +#include "region_sphere.h" + +#include "kokkos_base.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +struct TagRegSphereMatchAll{}; + +template +class RegSphereKokkos : public RegSphere, public KokkosBase { + friend class FixPour; + + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + RegSphereKokkos(class LAMMPS *, int, char **); + ~RegSphereKokkos() override; + + void match_all_kokkos(int, DAT::tdual_int_1d) override; + + KOKKOS_INLINE_FUNCTION + void operator()(TagRegSphereMatchAll, const int&) const; + + KOKKOS_INLINE_FUNCTION + int match_kokkos(double x, double y, double z) const + { + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); + } + + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double x, double y, double z, double cutoff) + { + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) ncontact = surface_interior_kokkos(xnear, cutoff); + else + ncontact = surface_exterior_kokkos(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; + } + + Kokkos::View d_contact; + + private: + int groupbit; + typename AT::t_int_1d d_match; + typename AT::t_x_array_randomread d_x; + typename AT::t_int_1d_randomread d_mask; + + KOKKOS_INLINE_FUNCTION + int surface_interior_kokkos(double *x, double cutoff) + { + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r > radius || r == 0.0) return 0; + + double delta = radius - r; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = -radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; + } + + KOKKOS_INLINE_FUNCTION + int surface_exterior_kokkos(double *x, double cutoff) + { + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r < radius) return 0; + + double delta = r - radius; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; + } + + KOKKOS_INLINE_FUNCTION + void add_contact(int n, double *x, double xp, double yp, double zp) + { + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; + } + + KOKKOS_INLINE_FUNCTION + int k_inside(double x, double y, double z) const + { + const double delx = x - xc; + const double dely = y - yc; + const double delz = z - zc; + const double r = sqrt(delx * delx + dely * dely + delz * delz); + + if (r <= radius) return 1; + return 0; + } + + KOKKOS_INLINE_FUNCTION + void forward_transform(double &x, double &y, double &z) const + { + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } + } + + KOKKOS_INLINE_FUNCTION + void inverse_transform(double &x, double &y, double &z) const + { + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); + } + + KOKKOS_INLINE_FUNCTION + void rotate(double &x, double &y, double &z, double angle) const + { + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; + } + +}; + +} + +#endif +#endif + diff --git a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos index 81164aa040..decb26b5f6 100644 --- a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos +++ b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 57c25702aa..ad91e7e203 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index fd173b5588..d4fafed2dc 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -10,7 +10,7 @@ KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC # uncomment when compiling with Intel 21.5 or older FMTFLAGS = # -std=c++11 diff --git a/src/MANYBODY/pair_sw_angle_table.cpp b/src/MANYBODY/pair_sw_angle_table.cpp index 12592f4af6..9bd0316cf5 100644 --- a/src/MANYBODY/pair_sw_angle_table.cpp +++ b/src/MANYBODY/pair_sw_angle_table.cpp @@ -402,14 +402,18 @@ void PairSWAngleTable::threebody_table(Param *paramij, Param *paramik, ParamTabl rinv12 = 1.0/(r1*r2); cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12; - - var = acos(cs); + cs = MAX(-1.0,MIN(cs,1.0)); // look up energy (f(theta), ftheta) and force (df(theta)/dtheta, fprimetheta) at // angle theta (var) in angle table belonging to parameter set paramijk + + var = acos(cs); uf_lookup(table_paramijk, var, ftheta, fprimetheta); - acosprime = 1.0 / (sqrt(1 - cs*cs ) ); + if ((cs*cs - 1.0) != 0.0) + acosprime = 1.0 / (sqrt(1 - cs*cs ) ); + else + acosprime = 0.0; facradtable = facexp*ftheta; frad1table = facradtable*gsrainvsq1; @@ -724,7 +728,7 @@ double PairSWAngleTable::splint(double *xa, double *ya, double *y2a, int n, doub void PairSWAngleTable::uf_lookup(ParamTable *pm, double x, double &u, double &f) { - if (!std::isfinite(x)) { error->one(FLERR, "Illegal angle in angle style table"); } + if (!std::isfinite(x)) error->one(FLERR, "Illegal angle in pair style sw/angle/table"); double fraction,a,b; diff --git a/src/MC/fix_charge_regulation.cpp b/src/MC/fix_charge_regulation.cpp index ddf14f6804..f5be7a041b 100644 --- a/src/MC/fix_charge_regulation.cpp +++ b/src/MC/fix_charge_regulation.cpp @@ -174,9 +174,9 @@ FixChargeRegulation::~FixChargeRegulation() { neighbor->exclusion_group_group_delete(exclusion_group, igroupall); } - if (groupstrings) { + if (ngroups > 0) { for (int i = 0; i < ngroups; ++i) delete[] groupstrings[i]; - memory->destroy(groupstrings); + memory->sfree(groupstrings); } } diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index 631dfe7b4b..02116965b5 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -127,6 +127,9 @@ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : FixCMAP::~FixCMAP() { + + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,Atom::GROW); @@ -413,8 +416,6 @@ void FixCMAP::post_force(int vflag) r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); a2sq = a2x*a2x + a2y*a2y + a2z*a2z; b2sq = b2x*b2x + b2y*b2y + b2z*b2z; - //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) - // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) continue; dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h index 1c6aba95e0..856dc06852 100644 --- a/src/MOLECULE/fix_cmap.h +++ b/src/MOLECULE/fix_cmap.h @@ -64,7 +64,7 @@ class FixCMAP : public Fix { double memory_usage() override; - private: + protected: int eflag_caller; int ctype, ilevel_respa; int ncrosstermtypes, crossterm_per_atom, maxcrossterm; diff --git a/src/OPENMP/pair_reaxff_omp.cpp b/src/OPENMP/pair_reaxff_omp.cpp index 96e6389870..85369cc7bf 100644 --- a/src/OPENMP/pair_reaxff_omp.cpp +++ b/src/OPENMP/pair_reaxff_omp.cpp @@ -106,7 +106,9 @@ void PairReaxFFOMP::init_style() auto acks2_fixes = modify->get_fix_by_style("^acks2/reax"); int have_qeq = modify->get_fix_by_style("^qeq/reax").size() - + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size(); + + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size() + + modify->get_fix_by_style("^qtpie/reax").size(); + if (qeqflag && (have_qeq != 1)) error->all(FLERR,"Pair style reaxff/omp requires use of exactly one of the " diff --git a/src/REAXFF/fix_qeq_reaxff.cpp b/src/REAXFF/fix_qeq_reaxff.cpp index 921f6e0261..dc8fbd5afd 100644 --- a/src/REAXFF/fix_qeq_reaxff.cpp +++ b/src/REAXFF/fix_qeq_reaxff.cpp @@ -344,7 +344,7 @@ void FixQEqReaxFF::allocate_matrix() int mincap; double safezone; - if (reaxflag) { + if (reaxflag && reaxff) { mincap = reaxff->api->system->mincap; safezone = reaxff->api->system->safezone; } else { @@ -1158,7 +1158,7 @@ void FixQEqReaxFF::get_chi_field() for (int i = 0; i < nlocal; i++) { if (mask[i] & efgroupbit) { if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; - chi_field[i] = -efield->efield[i][3]; + chi_field[i] = efield->efield[i][3]; } } } diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp new file mode 100644 index 0000000000..0279032c7f --- /dev/null +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -0,0 +1,1195 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: + Efstratios M Kritikos, California Institute of Technology + (Implemented original version in LAMMMPS Aug 2019) + Navraj S Lalli, Imperial College London + (Reimplemented QTPIE as a new fix in LAMMPS Aug 2024 and extended functionality) +------------------------------------------------------------------------- */ + +#include "fix_qtpie_reaxff.h" + +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "fix_efield.h" +#include "force.h" +#include "group.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "region.h" +#include "respa.h" +#include "text_file_reader.h" +#include "update.h" + +#include "pair_reaxff.h" +#include "reaxff_api.h" + +#include +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +static constexpr double CONV_TO_EV = 14.4; +static constexpr double SMALL = 1.0e-14; +static constexpr double QSUMSMALL = 0.00001; +static constexpr double ANGSTROM_TO_BOHRRADIUS = 1.8897261259; + +/* ---------------------------------------------------------------------- */ + +FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), matvecs(0), pertype_option(nullptr), gauss_file(nullptr) +{ + // this fix returns a global scalar (the number of iterations) + scalar_flag = 1; + extscalar = 0; + + // this fix returns a per-atom vector (the effective electronegativity) + peratom_flag = 1; + size_peratom_cols = 0; + + imax = 200; + maxwarn = 1; + + if ((narg < 9) || (narg > 12)) error->all(FLERR,"Illegal fix {} command", style); + + nevery = utils::inumeric(FLERR,arg[3],false,lmp); + if (nevery <= 0) error->all(FLERR,"Illegal fix {} command", style); + + swa = utils::numeric(FLERR,arg[4],false,lmp); + swb = utils::numeric(FLERR,arg[5],false,lmp); + tolerance = utils::numeric(FLERR,arg[6],false,lmp); + pertype_option = utils::strdup(arg[7]); + gauss_file = utils::strdup(arg[8]); + + int iarg = 9; + while (iarg < narg) { + if (strcmp(arg[iarg],"nowarn") == 0) maxwarn = 0; + else if (strcmp(arg[iarg],"maxiter") == 0) { + if (iarg+1 > narg-1) + error->all(FLERR,"Illegal fix {} command", style); + imax = utils::numeric(FLERR,arg[iarg+1],false,lmp); + iarg++; + } else error->all(FLERR,"Illegal fix {} command", style); + iarg++; + } + shld = nullptr; + + nn = nt = n_cap = 0; + nmax = 0; + m_fill = m_cap = 0; + pack_flag = 0; + s = nullptr; + t = nullptr; + nprev = 4; + + Hdia_inv = nullptr; + b_s = nullptr; + chi_eff = nullptr; + b_t = nullptr; + b_prc = nullptr; + b_prm = nullptr; + + // CG + + p = nullptr; + q = nullptr; + r = nullptr; + d = nullptr; + + // H matrix + + H.firstnbr = nullptr; + H.numnbrs = nullptr; + H.jlist = nullptr; + H.val = nullptr; + + comm_forward = comm_reverse = 1; + + // perform initial allocation of atom-based arrays + // register with Atom class + + reaxff = dynamic_cast(force->pair_match("^reaxff",0)); + + s_hist = t_hist = nullptr; + atom->add_callback(Atom::GROW); +} + +/* ---------------------------------------------------------------------- */ + +FixQtpieReaxFF::~FixQtpieReaxFF() +{ + if (copymode) return; + + delete[] pertype_option; + delete[] gauss_file; + + // unregister callbacks to this fix from Atom class + + atom->delete_callback(id,Atom::GROW); + + memory->destroy(s_hist); + memory->destroy(t_hist); + + FixQtpieReaxFF::deallocate_storage(); + FixQtpieReaxFF::deallocate_matrix(); + + memory->destroy(shld); + + memory->destroy(gauss_exp); + if (!reaxflag) { + memory->destroy(chi); + memory->destroy(eta); + memory->destroy(gamma); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::post_constructor() +{ + grow_arrays(atom->nmax); + for (int i = 0; i < atom->nmax; i++) + for (int j = 0; j < nprev; ++j) + s_hist[i][j] = t_hist[i][j] = 0; + + pertype_parameters(pertype_option); +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::setmask() +{ + int mask = 0; + mask |= PRE_FORCE; + mask |= PRE_FORCE_RESPA; + mask |= MIN_PRE_FORCE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pertype_parameters(char *arg) +{ + const int nlocal = atom->nlocal; + const int *mask = atom->mask; + const int *type = atom->type; + const int ntypes = atom->ntypes; + + // read gaussian orbital exponents + memory->create(gauss_exp,ntypes+1,"qtpie/reaxff:gauss_exp"); + if (comm->me == 0) { + gauss_exp[0] = 0.0; + try { + FILE *fp = utils::open_potential(gauss_file, lmp, nullptr); + if (!fp) throw TokenizerException("Fix qtpie/reaxff: could not open gauss file", gauss_file); + TextFileReader reader(fp,"qtpie/reaxff gaussian exponents"); + reader.ignore_comments = true; + for (int i = 1; i <= ntypes; i++) { + const char *line = reader.next_line(); + if (!line) + throw TokenizerException("Fix qtpie/reaxff: Incorrect number of atom types in gauss file",""); + ValueTokenizer values(line); + + if (values.count() != 2) + throw TokenizerException("Fix qtpie/reaxff: Incorrect number of values per line " + "in gauss file",std::to_string(values.count())); + + int itype = values.next_int(); + if ((itype < 1) || (itype > ntypes)) + throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in gauss file", + std::to_string(itype)); + + double exp = values.next_double(); + if (exp < 0) + throw TokenizerException("Fix qtpie/reaxff: Invalid orbital exponent in gauss file", + std::to_string(exp)); + gauss_exp[itype] = exp; + } + } catch (std::exception &e) { + error->one(FLERR,e.what()); + } + } + + MPI_Bcast(gauss_exp,ntypes+1,MPI_DOUBLE,0,world); + + // define a cutoff distance (in atomic units) beyond which overlap integrals are neglected + // in calc_chi_eff() + const double exp_min = find_min_exp(gauss_exp,ntypes+1); + const int olap_cut = 10; // overlap integrals are neglected if less than pow(10,-olap_cut) + dist_cutoff = sqrt(2*olap_cut/exp_min*log(10.0)); + + // read chi, eta and gamma + + if (utils::strmatch(arg,"^reaxff")) { + reaxflag = 1; + Pair *pair = force->pair_match("^reaxff",0); + if (!pair) error->all(FLERR,"No reaxff pair style for fix qtpie/reaxff"); + + int tmp, tmp_all; + chi = (double *) pair->extract("chi",tmp); + eta = (double *) pair->extract("eta",tmp); + gamma = (double *) pair->extract("gamma",tmp); + if ((chi == nullptr) || (eta == nullptr) || (gamma == nullptr)) + error->all(FLERR, "Fix qtpie/reaxff could not extract qtpie parameters from pair reaxff"); + tmp = tmp_all = 0; + for (int i = 0; i < nlocal; ++i) { + if (mask[i] & groupbit) { + if ((chi[type[i]] == 0.0) && (eta[type[i]] == 0.0) && (gamma[type[i]] == 0.0)) + tmp = type[i]; + } + } + MPI_Allreduce(&tmp, &tmp_all, 1, MPI_INT, MPI_MAX, world); + if (tmp_all) + error->all(FLERR, "No qtpie parameters for atom type {} provided by pair reaxff", tmp_all); + return; + } else if (utils::strmatch(arg,"^reax/c")) { + error->all(FLERR, "Fix qtpie/reaxff keyword 'reax/c' is obsolete; please use 'reaxff'"); + } else if (platform::file_is_readable(arg)) { + ; // arg is readable file. will read below + } else { + error->all(FLERR, "Unknown fix qtpie/reaxff keyword {}", arg); + } + + reaxflag = 0; + + memory->create(chi,ntypes+1,"qtpie/reaxff:chi"); + memory->create(eta,ntypes+1,"qtpie/reaxff:eta"); + memory->create(gamma,ntypes+1,"qtpie/reaxff:gamma"); + + if (comm->me == 0) { + chi[0] = eta[0] = gamma[0] = 0.0; + try { + TextFileReader reader(arg,"qtpie/reaxff parameter"); + reader.ignore_comments = false; + for (int i = 1; i <= ntypes; i++) { + const char *line = reader.next_line(); + if (!line) + throw TokenizerException("Fix qtpie/reaxff: Invalid param file format",""); + ValueTokenizer values(line); + + if (values.count() != 4) + throw TokenizerException("Fix qtpie/reaxff: Incorrect format of param file",""); + + int itype = values.next_int(); + if ((itype < 1) || (itype > ntypes)) + throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in param file", + std::to_string(itype)); + + chi[itype] = values.next_double(); + eta[itype] = values.next_double(); + gamma[itype] = values.next_double(); + } + } catch (std::exception &e) { + error->one(FLERR,e.what()); + } + } + + MPI_Bcast(chi,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(eta,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(gamma,ntypes+1,MPI_DOUBLE,0,world); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::allocate_storage() +{ + nmax = atom->nmax; + + memory->create(s,nmax,"qtpie:s"); + memory->create(t,nmax,"qtpie:t"); + + memory->create(Hdia_inv,nmax,"qtpie:Hdia_inv"); + memory->create(b_s,nmax,"qtpie:b_s"); + memory->create(chi_eff,nmax,"qtpie:chi_eff"); + vector_atom = chi_eff; + memory->create(b_t,nmax,"qtpie:b_t"); + memory->create(b_prc,nmax,"qtpie:b_prc"); + memory->create(b_prm,nmax,"qtpie:b_prm"); + + int size = nmax; + + memory->create(p,size,"qtpie:p"); + memory->create(q,size,"qtpie:q"); + memory->create(r,size,"qtpie:r"); + memory->create(d,size,"qtpie:d"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::deallocate_storage() +{ + memory->destroy(s); + memory->destroy(t); + + memory->destroy(Hdia_inv); + memory->destroy(b_s); + memory->destroy(b_t); + memory->destroy(b_prc); + memory->destroy(b_prm); + memory->destroy(chi_eff); + + memory->destroy(p); + memory->destroy(q); + memory->destroy(r); + memory->destroy(d); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::reallocate_storage() +{ + deallocate_storage(); + allocate_storage(); + init_storage(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::allocate_matrix() +{ + int i,ii; + bigint m; + + int mincap; + double safezone; + + if (reaxflag && reaxff) { + mincap = reaxff->api->system->mincap; + safezone = reaxff->api->system->safezone; + } else { + mincap = REAX_MIN_CAP; + safezone = REAX_SAFE_ZONE; + } + + n_cap = MAX((int)(atom->nlocal * safezone), mincap); + + // determine the total space for the H matrix + + m = 0; + for (ii = 0; ii < nn; ii++) { + i = ilist[ii]; + m += numneigh[i]; + } + bigint m_cap_big = (bigint)MAX(m * safezone, mincap * REAX_MIN_NBRS); + if (m_cap_big > MAXSMALLINT) + error->one(FLERR,"Too many neighbors in fix {}",style); + m_cap = m_cap_big; + + H.n = n_cap; + H.m = m_cap; + memory->create(H.firstnbr,n_cap,"qtpie:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qtpie:H.numnbrs"); + memory->create(H.jlist,m_cap,"qtpie:H.jlist"); + memory->create(H.val,m_cap,"qtpie:H.val"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::deallocate_matrix() +{ + memory->destroy(H.firstnbr); + memory->destroy(H.numnbrs); + memory->destroy(H.jlist); + memory->destroy(H.val); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::reallocate_matrix() +{ + deallocate_matrix(); + allocate_matrix(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init() +{ + if (!atom->q_flag) + error->all(FLERR,"Fix {} requires atom attribute q", style); + + if (group->count(igroup) == 0) + error->all(FLERR,"Fix {} group has no atoms", style); + + // compute net charge and print warning if too large + double qsum_local = 0.0, qsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + if (atom->mask[i] & groupbit) + qsum_local += atom->q[i]; + } + MPI_Allreduce(&qsum_local,&qsum,1,MPI_DOUBLE,MPI_SUM,world); + + if ((comm->me == 0) && (fabs(qsum) > QSUMSMALL)) + error->warning(FLERR,"Fix {} group is not charge neutral, net charge = {:.8}", style, qsum); + + // get pointer to fix efield if present. there may be at most one instance of fix efield in use. + efield = nullptr; + auto fixes = modify->get_fix_by_style("^efield"); + if (fixes.size() == 1) efield = dynamic_cast(fixes.front()); + else if (fixes.size() > 1) + error->all(FLERR, "There may be only one fix efield instance used with fix {}", style); + + // ensure that fix efield is properly initialized before accessing its data and check some settings + if (efield) { + efield->init(); + if (strcmp(update->unit_style,"real") != 0) + error->all(FLERR,"Must use unit_style real with fix {} and external fields", style); + + if (efield->groupbit != 1){ // if efield is not applied to all atoms + error->all(FLERR,"Must use group id all for fix efield when using fix {}", style); + } + + if (efield->region){ // if efield is not applied to all atoms + error->all(FLERR,"Keyword region not supported for fix efield when using fix {}", style); + } + + if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM) + error->all(FLERR,"Atom-style external electric field requires atom-style " + "potential variable when used with fix {}", style); + } + + // we need a half neighbor list w/ Newton off + // built whenever re-neighboring occurs + + neighbor->add_request(this, NeighConst::REQ_NEWTON_OFF); + + init_shielding(); + init_taper(); + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::compute_scalar() +{ + return matvecs/2.0; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_shielding() +{ + int i,j; + int ntypes; + + ntypes = atom->ntypes; + if (shld == nullptr) + memory->create(shld,ntypes+1,ntypes+1,"qtpie:shielding"); + + for (i = 1; i <= ntypes; ++i) + for (j = 1; j <= ntypes; ++j) + shld[i][j] = pow(gamma[i] * gamma[j], -1.5); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_taper() +{ + double d7, swa2, swa3, swb2, swb3; + + if (fabs(swa) > 0.01 && comm->me == 0) + error->warning(FLERR,"Fix qtpie/reaxff has non-zero lower Taper radius cutoff"); + if (swb < 0) + error->all(FLERR, "Fix qtpie/reaxff has negative upper Taper radius cutoff"); + else if (swb < 5 && comm->me == 0) + error->warning(FLERR,"Fix qtpie/reaxff has very low Taper radius cutoff"); + + d7 = pow(swb - swa, 7); + swa2 = SQR(swa); + swa3 = CUBE(swa); + swb2 = SQR(swb); + swb3 = CUBE(swb); + + Tap[7] = 20.0 / d7; + Tap[6] = -70.0 * (swa + swb) / d7; + Tap[5] = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7; + Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7; + Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7; + Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7; + Tap[1] = 140.0 * swa3 * swb3 / d7; + Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 - + 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::setup_pre_force(int vflag) +{ + if (reaxff) { + nn = reaxff->list->inum; + nt = reaxff->list->inum + reaxff->list->gnum; + ilist = reaxff->list->ilist; + numneigh = reaxff->list->numneigh; + firstneigh = reaxff->list->firstneigh; + } else { + nn = list->inum; + nt = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + + deallocate_storage(); + allocate_storage(); + + init_storage(); + + deallocate_matrix(); + allocate_matrix(); + + pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::setup_pre_force_respa(int vflag, int ilevel) +{ + if (ilevel < nlevels_respa-1) return; + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::min_setup_pre_force(int vflag) +{ + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_storage() +{ + calc_chi_eff(); + + for (int ii = 0; ii < nn; ii++) { + int i = ilist[ii]; + if (atom->mask[i] & groupbit) { + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi_eff[i]; + b_t[i] = -1.0; + b_prc[i] = 0; + b_prm[i] = 0; + s[i] = t[i] = 0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pre_force(int /*vflag*/) +{ + if (update->ntimestep % nevery) return; + + int n = atom->nlocal; + + if (reaxff) { + nn = reaxff->list->inum; + nt = reaxff->list->inum + reaxff->list->gnum; + ilist = reaxff->list->ilist; + numneigh = reaxff->list->numneigh; + firstneigh = reaxff->list->firstneigh; + } else { + nn = list->inum; + nt = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + + // grow arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) reallocate_storage(); + if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) + reallocate_matrix(); + + calc_chi_eff(); + + init_matvec(); + + matvecs_s = CG(b_s, s); // CG on s - parallel + matvecs_t = CG(b_t, t); // CG on t - parallel + matvecs = matvecs_s + matvecs_t; + + calculate_Q(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pre_force_respa(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::min_pre_force(int vflag) +{ + pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_matvec() +{ + /* fill-in H matrix */ + compute_H(); + + int ii, i; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi_eff[i]; + b_t[i] = -1.0; + + /* quadratic extrapolation for s & t from previous solutions */ + t[i] = t_hist[i][2] + 3 * (t_hist[i][0] - t_hist[i][1]); + + /* cubic extrapolation for s & t from previous solutions */ + s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); + } + } + + pack_flag = 2; + comm->forward_comm(this); //Dist_vector(s); + pack_flag = 3; + comm->forward_comm(this); //Dist_vector(t); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::compute_H() +{ + int jnum; + int i, j, ii, jj, flag; + double dx, dy, dz, r_sqr; + constexpr double EPSILON = 0.0001; + + int *type = atom->type; + tagint *tag = atom->tag; + double **x = atom->x; + int *mask = atom->mask; + + // fill in the H matrix + m_fill = 0; + r_sqr = 0; + for (ii = 0; ii < nn; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + H.firstnbr[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + dx = x[j][0] - x[i][0]; + dy = x[j][1] - x[i][1]; + dz = x[j][2] - x[i][2]; + r_sqr = SQR(dx) + SQR(dy) + SQR(dz); + + flag = 0; + if (r_sqr <= SQR(swb)) { + if (j < atom->nlocal) flag = 1; + else if (tag[i] < tag[j]) flag = 1; + else if (tag[i] == tag[j]) { + if (dz > EPSILON) flag = 1; + else if (fabs(dz) < EPSILON) { + if (dy > EPSILON) flag = 1; + else if (fabs(dy) < EPSILON && dx > EPSILON) + flag = 1; + } + } + } + + if (flag) { + H.jlist[m_fill] = j; + H.val[m_fill] = calculate_H(sqrt(r_sqr), shld[type[i]][type[j]]); + m_fill++; + } + } + H.numnbrs[i] = m_fill - H.firstnbr[i]; + } + } + + if (m_fill >= H.m) + error->all(FLERR,"Fix qtpie/reaxff H matrix size has been exceeded: m_fill={} H.m={}\n", + m_fill, H.m); +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::calculate_H(double r, double gamma) +{ + double Taper, denom; + + Taper = Tap[7] * r + Tap[6]; + Taper = Taper * r + Tap[5]; + Taper = Taper * r + Tap[4]; + Taper = Taper * r + Tap[3]; + Taper = Taper * r + Tap[2]; + Taper = Taper * r + Tap[1]; + Taper = Taper * r + Tap[0]; + + denom = r * r * r + gamma; + denom = pow(denom,1.0/3.0); + + return Taper * CONV_TO_EV / denom; +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::CG(double *b, double *x) +{ + int i, j; + double tmp, alpha, beta, b_norm; + double sig_old, sig_new; + + int jj; + + pack_flag = 1; + sparse_matvec(&H, x, q); + comm->reverse_comm(this); //Coll_Vector(q); + + vector_sum(r , 1., b, -1., q, nn); + + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) + d[j] = r[j] * Hdia_inv[j]; //pre-condition + } + + b_norm = parallel_norm(b, nn); + sig_new = parallel_dot(r, d, nn); + + for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) { + comm->forward_comm(this); //Dist_vector(d); + sparse_matvec(&H, d, q); + comm->reverse_comm(this); //Coll_vector(q); + + tmp = parallel_dot(d, q, nn); + alpha = sig_new / tmp; + + vector_add(x, alpha, d, nn); + vector_add(r, -alpha, q, nn); + + // pre-conditioning + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) + p[j] = r[j] * Hdia_inv[j]; + } + + sig_old = sig_new; + sig_new = parallel_dot(r, p, nn); + + beta = sig_new / sig_old; + vector_sum(d, 1., p, beta, d, nn); + } + + if ((i >= imax) && maxwarn && (comm->me == 0)) + error->warning(FLERR, "Fix qtpie/reaxff CG convergence failed after {} iterations at step {}", + i,update->ntimestep); + return i; +} + + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::sparse_matvec(sparse_matrix *A, double *x, double *b) +{ + int i, j, itr_j; + int ii; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + b[i] = eta[atom->type[i]] * x[i]; + } + + int nall = atom->nlocal + atom->nghost; + for (i = atom->nlocal; i < nall; ++i) + b[i] = 0; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + for (itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + b[i] += A->val[itr_j] * x[j]; + b[j] += A->val[itr_j] * x[i]; + } + } + } + +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::calculate_Q() +{ + int i, k; + double u, s_sum, t_sum; + double *q = atom->q; + + int ii; + + s_sum = parallel_vector_acc(s, nn); + t_sum = parallel_vector_acc(t, nn); + u = s_sum / t_sum; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + q[i] = s[i] - u * t[i]; + + /* backup s & t */ + for (k = nprev-1; k > 0; --k) { + s_hist[i][k] = s_hist[i][k-1]; + t_hist[i][k] = t_hist[i][k-1]; + } + s_hist[i][0] = s[i]; + t_hist[i][0] = t[i]; + } + } + + pack_flag = 4; + comm->forward_comm(this); //Dist_vector(atom->q); +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_forward_comm(int n, int *list, double *buf, + int /*pbc_flag*/, int * /*pbc*/) +{ + int m; + + if (pack_flag == 1) + for (m = 0; m < n; m++) buf[m] = d[list[m]]; + else if (pack_flag == 2) + for (m = 0; m < n; m++) buf[m] = s[list[m]]; + else if (pack_flag == 3) + for (m = 0; m < n; m++) buf[m] = t[list[m]]; + else if (pack_flag == 4) + for (m = 0; m < n; m++) buf[m] = atom->q[list[m]]; + return n; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::unpack_forward_comm(int n, int first, double *buf) +{ + int i, m; + + if (pack_flag == 1) + for (m = 0, i = first; m < n; m++, i++) d[i] = buf[m]; + else if (pack_flag == 2) + for (m = 0, i = first; m < n; m++, i++) s[i] = buf[m]; + else if (pack_flag == 3) + for (m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; + else if (pack_flag == 4) + for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_reverse_comm(int n, int first, double *buf) +{ + int i, m; + for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; + return n; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::unpack_reverse_comm(int n, int *list, double *buf) +{ + for (int m = 0; m < n; m++) q[list[m]] += buf[m]; +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixQtpieReaxFF::memory_usage() +{ + double bytes; + + bytes = (double)atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist + bytes += (double)atom->nmax*11 * sizeof(double); // storage + bytes += (double)n_cap*2 * sizeof(int); // matrix... + bytes += (double)m_cap * sizeof(int); + bytes += (double)m_cap * sizeof(double); + + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate fictitious charge arrays +------------------------------------------------------------------------- */ + +void FixQtpieReaxFF::grow_arrays(int nmax) +{ + memory->grow(s_hist,nmax,nprev,"qtpie:s_hist"); + memory->grow(t_hist,nmax,nprev,"qtpie:t_hist"); +} + +/* ---------------------------------------------------------------------- + copy values within fictitious charge arrays +------------------------------------------------------------------------- */ + +void FixQtpieReaxFF::copy_arrays(int i, int j, int /*delflag*/) +{ + for (int m = 0; m < nprev; m++) { + s_hist[j][m] = s_hist[i][m]; + t_hist[j][m] = t_hist[i][m]; + } +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_exchange(int i, double *buf) +{ + for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; + for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +int FixQtpieReaxFF::unpack_exchange(int nlocal, double *buf) +{ + for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m]; + for (int m = 0; m < nprev; m++) t_hist[nlocal][m] = buf[nprev+m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_norm(double *v, int n) +{ + int i; + double my_sum, norm_sqr; + + int ii; + + my_sum = 0.0; + norm_sqr = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_sum += SQR(v[i]); + } + + MPI_Allreduce(&my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world); + + return sqrt(norm_sqr); +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_dot(double *v1, double *v2, int n) +{ + int i; + double my_dot, res; + + int ii; + + my_dot = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_dot += v1[i] * v2[i]; + } + + MPI_Allreduce(&my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_vector_acc(double *v, int n) +{ + int i; + double my_acc, res; + + int ii; + + my_acc = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_acc += v[i]; + } + + MPI_Allreduce(&my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::vector_sum(double* dest, double c, double* v, + double d, double* y, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) + dest[kk] = c * v[kk] + d * y[kk]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::vector_add(double* dest, double c, double* v, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) + dest[kk] += c * v[kk]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::calc_chi_eff() +{ + memset(&chi_eff[0],0,atom->nmax*sizeof(double)); + + const auto x = (const double * const *)atom->x; + const int ntypes = atom->ntypes; + const int *type = atom->type; + + double dist,overlap,sum_n,sum_d,expa,expb,chia,chib,phia,phib,p,m; + int i,j; + + // check ghost atoms are stored up to the distance cutoff for overlap integrals + const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); + if(comm_cutoff < dist_cutoff/ANGSTROM_TO_BOHRRADIUS) { + error->all(FLERR,"comm cutoff = {} Angstrom is smaller than distance cutoff = {} Angstrom " + "for overlap integrals in {}. Increase comm cutoff with comm_modify", + comm_cutoff, dist_cutoff/ANGSTROM_TO_BOHRRADIUS, style); + } + + // efield energy is in real units of kcal/mol, factor needed for conversion to eV + const double qe2f = force->qe2f; + const double factor = 1.0/qe2f; + + if (efield) { + if (efield->varflag != FixEfield::CONSTANT) + efield->update_efield_variables(); + } + + // compute chi_eff for each local atom + for (i = 0; i < nn; i++) { + expa = gauss_exp[type[i]]; + chia = chi[type[i]]; + if (efield) { + if (efield->varflag != FixEfield::ATOM) { + phia = -factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); + } else { // atom-style potential from FixEfield + phia = efield->efield[i][3]; + } + } + + sum_n = 0.0; + sum_d = 0.0; + + for (j = 0; j < nt; j++) { + dist = distance(x[i],x[j])*ANGSTROM_TO_BOHRRADIUS; // in atomic units + + if (dist < dist_cutoff) { + expb = gauss_exp[type[j]]; + chib = chi[type[j]]; + + // overlap integral of two normalised 1s Gaussian type orbitals + p = expa + expb; + m = expa * expb / p; + overlap = pow((4.0*m/p),0.75) * exp(-m*dist*dist); + + if (efield) { + if (efield->varflag != FixEfield::ATOM) { + phib = -factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); + } else { // atom-style potential from FixEfield + phib = efield->efield[j][3]; + } + sum_n += (chia - chib + phia - phib) * overlap; + } else { + sum_n += (chia - chib) * overlap; + } + sum_d += overlap; + } + } + + chi_eff[i] = sum_n / sum_d; + } +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::find_min_exp(const double *array, const int array_length) +{ + // index of first gaussian orbital exponent is 1 + double exp_min = array[1]; + for (int i = 2; i < array_length; i++) + { + if (array[i] < exp_min) + exp_min = array[i]; + } + return exp_min; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::distance(const double *posa, const double *posb) +{ + double dx, dy, dz; + dx = posb[0] - posa[0]; + dy = posb[1] - posa[1]; + dz = posb[2] - posa[2]; + return sqrt(dx*dx + dy*dy + dz*dz); +} diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h new file mode 100644 index 0000000000..2f86e27a7a --- /dev/null +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -0,0 +1,141 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(qtpie/reaxff,FixQtpieReaxFF); +// clang-format on +#else + +#ifndef LMP_FIX_QTPIE_REAXFF_H +#define LMP_FIX_QTPIE_REAXFF_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixQtpieReaxFF : public Fix { + public: + FixQtpieReaxFF(class LAMMPS *, int, char **); + ~FixQtpieReaxFF() override; + int setmask() override; + void post_constructor() override; + void init() override; + void init_list(int, class NeighList *) override; + void init_storage(); + void setup_pre_force(int) override; + void pre_force(int) override; + + void setup_pre_force_respa(int, int) override; + void pre_force_respa(int, int, int) override; + + void min_setup_pre_force(int); + void min_pre_force(int) override; + + double compute_scalar() override; + + protected: + int nevery, reaxflag; + int matvecs; + int nn, nt, m_fill; + int n_cap, nmax, m_cap; + int pack_flag; + int nlevels_respa; + class NeighList *list; + class PairReaxFF *reaxff; + class FixEfield *efield; + int *ilist, *jlist, *numneigh, **firstneigh; + + double swa, swb; // lower/upper Taper cutoff radius + double Tap[8]; // Taper function + double tolerance; // tolerance for the norm of the rel residual in CG + + double *chi, *eta, *gamma; // qeq parameters + double **shld; + + // fictitious charges + + double *s, *t; + double **s_hist, **t_hist; + int nprev; + + typedef struct { + int n, m; + int *firstnbr; + int *numnbrs; + int *jlist; + double *val; + } sparse_matrix; + + sparse_matrix H; + double *Hdia_inv; + double *b_s, *b_t; + double *b_prc, *b_prm; + double *chi_eff; // array of effective electronegativities + + //CG storage + double *p, *q, *r, *d; + int imax, maxwarn; + + char *pertype_option; // argument to determine how per-type info is obtained + char *gauss_file; // input file for gaussian orbital exponents + double *gauss_exp; // array of gaussian orbital exponents for each atom type + double dist_cutoff; // separation distance beyond which to neglect overlap integrals + + void pertype_parameters(char *); + void init_shielding(); + void init_taper(); + void allocate_storage(); + void deallocate_storage(); + void reallocate_storage(); + void allocate_matrix(); + void deallocate_matrix(); + void reallocate_matrix(); + + void init_matvec(); + void init_H(); + void compute_H(); + double calculate_H(double, double); + void calculate_Q(); + + int CG(double *, double *); + void sparse_matvec(sparse_matrix *, double *, double *); + + int pack_forward_comm(int, int *, double *, int, int *) override; + void unpack_forward_comm(int, int, double *) override; + int pack_reverse_comm(int, int, double *) override; + void unpack_reverse_comm(int, int *, double *) override; + double memory_usage() override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + double parallel_norm(double *, int); + double parallel_dot(double *, double *, int); + double parallel_vector_acc(double *, int); + + void vector_sum(double *, double, double *, double, double *, int); + void vector_add(double *, double, double *, int); + + void calc_chi_eff(); + double find_min_exp(const double*, const int); + double distance(const double*, const double*); + + int matvecs_s, matvecs_t; // Iteration count for each system +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 0183d2670b..915b2b0dd0 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -143,9 +143,10 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : x0 = nullptr; clusterID = nullptr; - int ntmp = 1; + int ntmp = atom->nmax; memory->create(x0, ntmp, "reaxff/species:x0"); memory->create(clusterID, ntmp, "reaxff/species:clusterID"); + memset(clusterID, 0, sizeof(double) * ntmp); vector_atom = clusterID; nmax = 0; @@ -193,8 +194,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : if (iarg + ntypes + 1 > narg) utils::missing_cmd_args(FLERR, "fix reaxff/species element", error); - for (int i = 0; i < ntypes; i++) - eletype[i] = arg[iarg + 1 + i]; + for (int i = 0; i < ntypes; i++) eletype[i] = arg[iarg + 1 + i]; GetUniqueElements(); iarg += ntypes + 1; @@ -348,8 +348,7 @@ void FixReaxFFSpecies::setup(int /*vflag*/) ntotal = static_cast(atom->natoms); if (!eleflag) { - for (int i = 0; i < ntypes; i++) - eletype[i] = reaxff->eletype[i+1]; + for (int i = 0; i < ntypes; i++) eletype[i] = reaxff->eletype[i + 1]; GetUniqueElements(); } memory->destroy(Name); @@ -441,6 +440,7 @@ void FixReaxFFSpecies::Output_ReaxFF_Bonds(bigint ntimestep, FILE * /*fp*/) memory->destroy(clusterID); memory->create(x0, nmax, "reaxff/species:x0"); memory->create(clusterID, nmax, "reaxff/species:clusterID"); + memset(clusterID, 0, sizeof(double) * nmax); vector_atom = clusterID; } diff --git a/src/REAXFF/pair_reaxff.cpp b/src/REAXFF/pair_reaxff.cpp index 08e90933b2..ca60e5248f 100644 --- a/src/REAXFF/pair_reaxff.cpp +++ b/src/REAXFF/pair_reaxff.cpp @@ -340,11 +340,13 @@ void PairReaxFF::init_style() auto acks2_fixes = modify->get_fix_by_style("^acks2/reax"); int have_qeq = modify->get_fix_by_style("^qeq/reax").size() - + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size(); + + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size() + + modify->get_fix_by_style("^qtpie/reax").size(); if (qeqflag && (have_qeq != 1)) error->all(FLERR,"Pair style reaxff requires use of exactly one of the " - "fix qeq/reaxff or fix qeq/shielded or fix acks2/reaxff commands"); + "fix qeq/reaxff or fix qeq/shielded or fix acks2/reaxff or " + "fix qtpie/reaxff commands"); api->system->acks2_flag = acks2_fixes.size(); if (api->system->acks2_flag) diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 4023f2c0b3..b14748565b 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -95,7 +95,7 @@ NEB::NEB(LAMMPS *lmp, double etol_in, double ftol_in, int n1steps_in, int n2step NEB::~NEB() { - MPI_Comm_free(&roots); + if (roots != MPI_COMM_NULL) MPI_Comm_free(&roots); memory->destroy(all); delete[] rdist; if (fp) { diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp index 6d6ec1bbfd..b1b9dc077e 100644 --- a/src/SPIN/neb_spin.cpp +++ b/src/SPIN/neb_spin.cpp @@ -79,7 +79,7 @@ NEBSpin::NEBSpin(LAMMPS *lmp) : Command(lmp), fp(nullptr) { NEBSpin::~NEBSpin() { - MPI_Comm_free(&roots); + if (roots != MPI_COMM_NULL) MPI_Comm_free(&roots); memory->destroy(all); delete[] rdist; if (fp) { @@ -164,8 +164,10 @@ void NEBSpin::run() // create MPI communicator for root proc from each world int color; - if (me == 0) color = 0; - else color = 1; + if (me == 0) + color = 0; + else + color = MPI_UNDEFINED; MPI_Comm_split(uworld,color,0,&roots); // search for neb_spin fix, allocate it @@ -728,19 +730,21 @@ void NEBSpin::print_status() local_norm_inf = MAX(temp_inf,local_norm_inf); } - double fmaxreplica; - MPI_Allreduce(&tnorm2,&fmaxreplica,1,MPI_DOUBLE,MPI_MAX,roots); + double fmaxreplica = 0.0; + double fmaxatom = 0.0; double fnorminf = 0.0; MPI_Allreduce(&local_norm_inf,&fnorminf,1,MPI_DOUBLE,MPI_MAX,world); - double fmaxatom; - MPI_Allreduce(&fnorminf,&fmaxatom,1,MPI_DOUBLE,MPI_MAX,roots); - if (verbose) { - freplica = new double[nreplica]; - MPI_Allgather(&tnorm2,1,MPI_DOUBLE,&freplica[0],1,MPI_DOUBLE,roots); - fmaxatomInRepl = new double[nreplica]; - MPI_Allgather(&fnorminf,1,MPI_DOUBLE,&fmaxatomInRepl[0],1,MPI_DOUBLE,roots); + if (me == 0) { + MPI_Allreduce(&tnorm2,&fmaxreplica,1,MPI_DOUBLE,MPI_MAX,roots); + MPI_Allreduce(&fnorminf,&fmaxatom,1,MPI_DOUBLE,MPI_MAX,roots); + if (verbose) { + freplica = new double[nreplica]; + MPI_Allgather(&tnorm2,1,MPI_DOUBLE,&freplica[0],1,MPI_DOUBLE,roots); + fmaxatomInRepl = new double[nreplica]; + MPI_Allgather(&fnorminf,1,MPI_DOUBLE,&fmaxatomInRepl[0],1,MPI_DOUBLE,roots); + } } double one[7]; @@ -828,5 +832,9 @@ void NEBSpin::print_status() fprintf(ulogfile,"\n"); fflush(ulogfile); } + if ((me == 0) && verbose) { + delete[] freplica; + delete[] fmaxatomInRepl; + } } } diff --git a/src/atom.cpp b/src/atom.cpp index e0fceffe9c..04a21b9c52 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -3276,7 +3276,7 @@ int Atom::extract_datatype(const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 19Nov2024 \endverbatim * diff --git a/src/command.h b/src/command.h index 23f315fc27..0122727463 100644 --- a/src/command.h +++ b/src/command.h @@ -20,8 +20,12 @@ namespace LAMMPS_NS { class Command : protected Pointers { public: - Command(class LAMMPS *lmp) : Pointers(lmp) {}; + Command(class LAMMPS *lmp) : Pointers(lmp), copymode(0) {}; virtual void command(int, char **) = 0; + + protected: + int copymode; // if set, do not deallocate during destruction + // required when classes are used as functors by Kokkos }; } // namespace LAMMPS_NS diff --git a/src/delete_atoms.h b/src/delete_atoms.h index 0aef095327..a1825dab93 100644 --- a/src/delete_atoms.h +++ b/src/delete_atoms.h @@ -30,7 +30,7 @@ class DeleteAtoms : public Command { DeleteAtoms(class LAMMPS *); void command(int, char **) override; - private: + protected: int *dlist; int allflag, compress_flag, bond_flag, mol_flag; std::map *hash; diff --git a/src/dump_image.cpp b/src/dump_image.cpp index f8403e7c71..9610ef4d9a 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -1097,7 +1097,7 @@ void DumpImage::create_image() color = colortype[itype]; } - ibonus = body[i]; + ibonus = body[j]; n = bptr->image(ibonus,bodyflag1,bodyflag2,bodyvec,bodyarray); for (k = 0; k < n; k++) { if (bodyvec[k] == SPHERE) diff --git a/src/fix_efield.h b/src/fix_efield.h index 72fd204898..108395cc2c 100644 --- a/src/fix_efield.h +++ b/src/fix_efield.h @@ -26,6 +26,7 @@ namespace LAMMPS_NS { class FixEfield : public Fix { friend class FixQEqReaxFF; + friend class FixQtpieReaxFF; public: FixEfield(class LAMMPS *, int, char **); diff --git a/src/fix_nve_limit.h b/src/fix_nve_limit.h index 2a32aee975..e8a07d815a 100644 --- a/src/fix_nve_limit.h +++ b/src/fix_nve_limit.h @@ -36,7 +36,7 @@ class FixNVELimit : public Fix { void reset_dt() override; double compute_scalar() override; - private: + protected: double dtv, dtf; double *step_respa; int ncount; diff --git a/src/fix_recenter.h b/src/fix_recenter.h index a45f0201bf..dfdb48b8d2 100644 --- a/src/fix_recenter.h +++ b/src/fix_recenter.h @@ -34,7 +34,7 @@ class FixRecenter : public Fix { double compute_scalar() override; double compute_vector(int) override; - private: + protected: int group2bit, scaleflag; int xflag, yflag, zflag; int xinitflag, yinitflag, zinitflag; diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index df00a2ba8c..d0c7ed8f94 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -21,9 +21,11 @@ #include "atom.h" #include "domain.h" #include "error.h" +#include "input.h" #include "memory.h" #include "respa.h" #include "update.h" +#include "variable.h" #include @@ -33,8 +35,7 @@ using namespace FixConst; /* ---------------------------------------------------------------------- */ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - xoriginal(nullptr) + Fix(lmp, narg, arg), xoriginal(nullptr), kstr(nullptr), kval(nullptr) { if ((narg < 4) || (narg > 5)) error->all(FLERR,"Illegal fix spring/self command"); @@ -45,9 +46,17 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : extscalar = 1; energy_global_flag = 1; respa_level_support = 1; + maxatom = 0; + kvar = -1; - k = utils::numeric(FLERR,arg[3],false,lmp); - if (k <= 0.0) error->all(FLERR,"Illegal fix spring/self command"); + if (utils::strmatch(arg[3], "^v_")) { + kstr = utils::strdup(arg[3] + 2); + kstyle = NONE; + } else { + k = utils::numeric(FLERR,arg[3],false,lmp); + kstyle = CONSTANT; + if (k <= 0.0) error->all(FLERR,"Illegal force constant for fix spring/self command"); + } xflag = yflag = zflag = 1; @@ -103,8 +112,10 @@ FixSpringSelf::~FixSpringSelf() atom->delete_callback(id,Atom::GROW); atom->delete_callback(id,Atom::RESTART); - // delete locally stored array + // delete locally stored arrays + delete[] kstr; + memory->destroy(kval); memory->destroy(xoriginal); } @@ -123,6 +134,25 @@ int FixSpringSelf::setmask() void FixSpringSelf::init() { + // check variable + + if (kstr) { + kvar = input->variable->find(kstr); + if (kvar < 0) error->all(FLERR, "Variable {} for fix spring/self does not exist", kstr); + if (input->variable->equalstyle(kvar)) + kstyle = EQUAL; + else if (input->variable->atomstyle(kvar)) + kstyle = ATOM; + else + error->all(FLERR, "Variable {} for fix spring/self is invalid style", kstr); + } + + + if ((kstyle == ATOM) && (atom->nmax > maxatom)) { + maxatom = atom->nmax; + memory->destroy(kval); + memory->create(kval, maxatom, "sprint/self:kval"); + } if (utils::strmatch(update->integrate_style,"^respa")) { ilevel_respa = (dynamic_cast(update->integrate))->nlevels-1; if (respa_level >= 0) ilevel_respa = MIN(respa_level,ilevel_respa); @@ -162,24 +192,58 @@ void FixSpringSelf::post_force(int /*vflag*/) double dx,dy,dz; double unwrap[3]; + // reallocate kval array if necessary + + if ((kstyle == ATOM) && (atom->nmax > maxatom)) { + maxatom = atom->nmax; + memory->destroy(kval); + memory->create(kval, maxatom, "sprint/self:kval"); + } + espring = 0.0; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - domain->unmap(x[i],image[i],unwrap); - dx = unwrap[0] - xoriginal[i][0]; - dy = unwrap[1] - xoriginal[i][1]; - dz = unwrap[2] - xoriginal[i][2]; - if (!xflag) dx = 0.0; - if (!yflag) dy = 0.0; - if (!zflag) dz = 0.0; - f[i][0] -= k*dx; - f[i][1] -= k*dy; - f[i][2] -= k*dz; - espring += k * (dx*dx + dy*dy + dz*dz); + if ((kstyle == CONSTANT) || (kstyle == EQUAL)) { + // update k if equal style variable + if (kstyle == EQUAL) { + k = input->variable->compute_equal(kvar); + if (k < 0.0) + error->all(FLERR,"Evaluation of {} gave bad value {} for fix spring/self", kstr, k); } + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + domain->unmap(x[i],image[i],unwrap); + dx = unwrap[0] - xoriginal[i][0]; + dy = unwrap[1] - xoriginal[i][1]; + dz = unwrap[2] - xoriginal[i][2]; + if (!xflag) dx = 0.0; + if (!yflag) dy = 0.0; + if (!zflag) dz = 0.0; + f[i][0] -= k*dx; + f[i][1] -= k*dy; + f[i][2] -= k*dz; + espring += k * (dx*dx + dy*dy + dz*dz); + } + espring *= 0.5; + } else { + // update kval for kstyle == ATOM + input->variable->compute_atom(kvar, igroup, kval, 1, 0); + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + domain->unmap(x[i],image[i],unwrap); + dx = unwrap[0] - xoriginal[i][0]; + dy = unwrap[1] - xoriginal[i][1]; + dz = unwrap[2] - xoriginal[i][2]; + if (!xflag) dx = 0.0; + if (!yflag) dy = 0.0; + if (!zflag) dz = 0.0; + f[i][0] -= kval[i]*dx; + f[i][1] -= kval[i]*dy; + f[i][2] -= kval[i]*dz; + espring += kval[i] * (dx*dx + dy*dy + dz*dz); + } - espring *= 0.5; + espring *= 0.5; + } } /* ---------------------------------------------------------------------- */ @@ -213,7 +277,8 @@ double FixSpringSelf::compute_scalar() double FixSpringSelf::memory_usage() { - double bytes = (double)atom->nmax*3 * sizeof(double); + double bytes = (double)atom->nmax*4 * sizeof(double); + if (kstyle == ATOM) bytes += (double)atom->nmax * sizeof(double); return bytes; } diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index f13f2be918..9699ca33f3 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -26,6 +26,7 @@ namespace LAMMPS_NS { class FixSpringSelf : public Fix { public: + enum { NONE, CONSTANT, EQUAL, ATOM }; FixSpringSelf(class LAMMPS *, int, char **); ~FixSpringSelf() override; int setmask() override; @@ -50,8 +51,11 @@ class FixSpringSelf : public Fix { protected: double k, espring; double **xoriginal; // original coords of atoms + char *kstr; // name of variable for K + double *kval; // per-atom variable values for K + int kvar, kstyle; int xflag, yflag, zflag; - int ilevel_respa; + int ilevel_respa, maxatom; }; } // namespace LAMMPS_NS diff --git a/src/fix_wall_region.cpp b/src/fix_wall_region.cpp index 3d817f34f4..8c245a3ed6 100644 --- a/src/fix_wall_region.cpp +++ b/src/fix_wall_region.cpp @@ -100,6 +100,7 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) : FixWallRegion::~FixWallRegion() { + if (copymode) return; delete[] idregion; } diff --git a/src/fix_wall_region.h b/src/fix_wall_region.h index 77b82d012c..fb07f00ee9 100644 --- a/src/fix_wall_region.h +++ b/src/fix_wall_region.h @@ -38,7 +38,7 @@ class FixWallRegion : public Fix { double compute_scalar() override; double compute_vector(int) override; - private: + protected: int style; double epsilon, sigma, cutoff; double alpha; diff --git a/src/group.cpp b/src/group.cpp index 909e741c6b..136faa4059 100644 --- a/src/group.cpp +++ b/src/group.cpp @@ -1141,10 +1141,10 @@ void Group::xcm(int igroup, double masstotal, double *cm, Region *region) /* ---------------------------------------------------------------------- compute the center-of-mass velocity of group of atoms masstotal = total mass - return center-of-mass velocity in cm[] + return center-of-mass velocity in vcm[] ------------------------------------------------------------------------- */ -void Group::vcm(int igroup, double masstotal, double *cm) +void Group::vcm(int igroup, double masstotal, double *vcm) { int groupbit = bitmask[igroup]; @@ -1176,21 +1176,21 @@ void Group::vcm(int igroup, double masstotal, double *cm) } } - MPI_Allreduce(p, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); if (masstotal > 0.0) { - cm[0] /= masstotal; - cm[1] /= masstotal; - cm[2] /= masstotal; + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; } } /* ---------------------------------------------------------------------- compute the center-of-mass velocity of group of atoms in region masstotal = total mass - return center-of-mass velocity in cm[] + return center-of-mass velocity in vcm[] ------------------------------------------------------------------------- */ -void Group::vcm(int igroup, double masstotal, double *cm, Region *region) +void Group::vcm(int igroup, double masstotal, double *vcm, Region *region) { int groupbit = bitmask[igroup]; region->prematch(); @@ -1224,11 +1224,11 @@ void Group::vcm(int igroup, double masstotal, double *cm, Region *region) } } - MPI_Allreduce(p, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); if (masstotal > 0.0) { - cm[0] /= masstotal; - cm[1] /= masstotal; - cm[2] /= masstotal; + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; } } @@ -1236,7 +1236,7 @@ void Group::vcm(int igroup, double masstotal, double *cm, Region *region) compute the total force on group of atoms ------------------------------------------------------------------------- */ -void Group::fcm(int igroup, double *cm) +void Group::fcm(int igroup, double *fcm) { int groupbit = bitmask[igroup]; @@ -1254,14 +1254,14 @@ void Group::fcm(int igroup, double *cm) flocal[2] += f[i][2]; } - MPI_Allreduce(flocal, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(flocal, fcm, 3, MPI_DOUBLE, MPI_SUM, world); } /* ---------------------------------------------------------------------- compute the total force on group of atoms in region ------------------------------------------------------------------------- */ -void Group::fcm(int igroup, double *cm, Region *region) +void Group::fcm(int igroup, double *fcm, Region *region) { int groupbit = bitmask[igroup]; region->prematch(); @@ -1281,7 +1281,7 @@ void Group::fcm(int igroup, double *cm, Region *region) flocal[2] += f[i][2]; } - MPI_Allreduce(flocal, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(flocal, fcm, 3, MPI_DOUBLE, MPI_SUM, world); } /* ---------------------------------------------------------------------- diff --git a/src/label_map.cpp b/src/label_map.cpp index c4865d5ace..9934868c49 100644 --- a/src/label_map.cpp +++ b/src/label_map.cpp @@ -43,8 +43,6 @@ LabelMap::LabelMap(LAMMPS *_lmp, int _natomtypes, int _nbondtypes, int _nanglety Pointers(_lmp), natomtypes(_natomtypes), nbondtypes(_nbondtypes), nangletypes(_nangletypes), ndihedraltypes(_ndihedraltypes), nimpropertypes(_nimpropertypes) { - if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); - lmap2lmap.atom = lmap2lmap.bond = lmap2lmap.angle = lmap2lmap.dihedral = lmap2lmap.improper = nullptr; reset_type_labels(); @@ -112,6 +110,8 @@ void LabelMap::modify_lmap(int narg, char **arg) if ((narg < 1) || ((narg > 2) && ((narg % 2) == 0))) error->all(FLERR, "Incorrect number of arguments for labelmap command"); + if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); + int ntypes; std::vector *labels; std::unordered_map *labels_map; @@ -238,6 +238,8 @@ int LabelMap::find_or_create(const std::string &mylabel, std::vectorsecond; + if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); + // if no match found, create new label at next available index // label map assumed to be intialized with numeric index // user labels are assumed to be alphanumeric (not a number) diff --git a/src/library.cpp b/src/library.cpp index 0065f10454..9876d363e5 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -2159,7 +2159,7 @@ int lammps_extract_atom_datatype(void *handle, const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 19Nov2024 This function returns an integer with the size of the per-atom property with the specified name. This allows to accurately determine diff --git a/src/molecule.cpp b/src/molecule.cpp index e7656acbbc..2a9b8a944d 100644 --- a/src/molecule.cpp +++ b/src/molecule.cpp @@ -958,7 +958,7 @@ void Molecule::diameters(char *line) } /* ---------------------------------------------------------------------- - read charges from file + read dipoles from file ------------------------------------------------------------------------- */ void Molecule::dipoles(char *line) diff --git a/src/region.cpp b/src/region.cpp index 0017186c5d..7399b14adb 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -53,7 +53,6 @@ Region::~Region() delete[] id; delete[] style; - delete[] xstr; delete[] ystr; delete[] zstr; diff --git a/src/region_block.cpp b/src/region_block.cpp index 9376016843..36c38f517c 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -262,6 +262,7 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : RegBlock::~RegBlock() { if (copymode) return; + delete[] xlostr; delete[] xhistr; delete[] ylostr; diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index f449978938..ec472c031c 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -101,6 +101,8 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : RegSphere::~RegSphere() { + if (copymode) return; + delete[] xstr; delete[] ystr; delete[] zstr; diff --git a/src/region_sphere.h b/src/region_sphere.h index f0e0bd195c..3053d98edb 100644 --- a/src/region_sphere.h +++ b/src/region_sphere.h @@ -36,7 +36,7 @@ class RegSphere : public Region { void set_velocity_shape() override; void velocity_contact_shape(double *, double *) override; - private: + protected: double xc, yc, zc; double radius; int xstyle, xvar; diff --git a/src/version.h b/src/version.h index 9c382b3768..88a65b1657 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1 @@ -#define LAMMPS_VERSION "29 Aug 2024" -#define LAMMPS_UPDATE "Development" +#define LAMMPS_VERSION "19 Nov 2024" diff --git a/tools/regression-tests/get_kokkos_input.py b/tools/regression-tests/get_kokkos_input.py index c03a813456..f94ca42c8e 100644 --- a/tools/regression-tests/get_kokkos_input.py +++ b/tools/regression-tests/get_kokkos_input.py @@ -7,6 +7,7 @@ # These 4 files will be read in by the regression tester run_tests.py from argparse import ArgumentParser +import random import subprocess import sys @@ -55,20 +56,37 @@ if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level") parser.add_argument("--filter-out", dest="filter_out", default="", help="Filter out input scripts that contain strings") + parser.add_argument("--batch-size", dest="batch_size", default=50, help="Batch size of scripts per input list") args = parser.parse_args() example_toplevel = args.example_toplevel filter_out = args.filter_out.split(";") - + batch_size = int(args.batch_size) + # print the list of the input scripts that has each feature to a separate file features = [ 'pair', 'fix', 'compute' ] for feature in features: input_list = [] generate_list(feature, example_toplevel, filter_out, input_list) - with open(f"input-list-{feature}-kk.txt", "w") as f: - for input in input_list: - if input != "": - f.write(f"{input}\n") + + num_batches = int(len(input_list) / batch_size) + if num_batches < 2: + with open(f"input-list-{feature}-kk.txt", "w") as f: + for input in input_list: + if input != "": + f.write(f"{input}\n") + else: + for idx in range(num_batches): + with open(f"input-list-{feature}-{idx}-kk.txt", "w") as f: + if len(input_list) > batch_size: + sampled = random.sample(input_list, batch_size) + else: + sampled = input_list + for input in sampled: + if input != "": + if input in input_list: + input_list.remove(input) + f.write(f"{input}\n") # combine the list of the input scripts that have these feature to a single file input-list-misc-kk.txt features = [ 'angle', 'bond', 'dihedral', 'improper', 'min' ] diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index 069f07abad..957226d22b 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -430,7 +430,8 @@ TEST(FixTimestep, plain) // fix nve/limit cannot work with r-RESPA ifix = lmp->modify->get_fix_by_id("test"); if (ifix && !utils::strmatch(ifix->style, "^rigid") && - !utils::strmatch(ifix->style, "^nve/limit")) { + !utils::strmatch(ifix->style, "^nve/limit") && + !utils::strmatch(ifix->style, "^recenter")) { if (!verbose) ::testing::internal::CaptureStdout(); cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); diff --git a/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml b/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml new file mode 100644 index 0000000000..00524a037d --- /dev/null +++ b/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml @@ -0,0 +1,177 @@ +--- +lammps_version: 29 Aug 2024 +tags: slow, unstable +date_generated: Fri Nov 15 13:22:57 2024 +epsilon: 2e-10 +skip_tests: kokkos_omp +prerequisites: ! | + pair reaxff + fix qtpie/reaxff +pre_commands: ! | + echo screen + variable newton_pair delete + variable newton_pair index on + atom_modify map array + units real + atom_style charge + lattice diamond 3.77 + region box block 0 2 0 2 0 2 + create_box 3 box + create_atoms 1 box + displace_atoms all random 0.1 0.1 0.1 623426 + mass 1 1.0 + mass 2 12.0 + mass 3 16.0 + set type 1 type/fraction 2 0.5 998877 + set type 2 type/fraction 3 0.5 887766 + set type 1 charge 0.00 + set type 2 charge 0.01 + set type 3 charge -0.01 + velocity all create 100 4534624 loop geom +post_commands: ! | + fix qeq all qtpie/reaxff 1 0.0 8.0 1.0e-20 reaxff ${input_dir}/gauss_exp.txt +input_file: in.empty +pair_style: reaxff NULL checkqeq yes +pair_coeff: ! | + * * ffield.reax.mattsson H C O +extract: ! "" +natoms: 64 +init_vdwl: -3296.3503506624793 +init_coul: -268.63677950571287 +init_stress: ! |- + -1.0115766891336298e+03 -1.2088131753999489e+03 -8.2791874767348656e+02 -2.3899453277851464e+02 1.9901049958702231e+02 -6.5090424115686358e+02 +init_forces: ! |2 + 1 -8.6436073169042203e+01 -2.4400814471028966e+01 1.0905778351740135e+02 + 2 -1.0912360851161381e+02 -1.7785390915789219e+02 -2.2174396944101937e+02 + 3 -1.7205686042941795e+02 1.8323463801374010e+02 1.3330073949860362e+01 + 4 3.5074883698050790e+01 -5.4658573523678996e+01 8.8946983653047340e+01 + 5 1.8037666038924445e+02 1.5258130212658664e+01 -8.1473951707266664e+01 + 6 1.3687016587487102e+02 -2.9993359047535205e+02 3.1610843762057812e+01 + 7 -5.3040478511967407e+01 -1.2906564054460426e+02 -1.6364626011721845e+02 + 8 -1.5098093043741406e+02 4.1651970160590970e+01 1.5340103366817354e+02 + 9 1.7340129928959293e+01 8.8054766076132950e+01 2.4310262794648661e+01 + 10 8.3732473664111822e+01 1.4163802420547773e+02 1.2086953212785974e+02 + 11 -4.2433035390388874e+01 6.5812517236648006e+01 1.3461956725415703e+02 + 12 -9.6266601388748128e+01 -2.5379001755851835e+01 7.7659919394842500e+00 + 13 -6.5393078499150931e+01 -4.9454699468880946e+01 -3.8254305288102898e+01 + 14 8.2607951316806449e+01 -6.8984439086417567e+01 -9.2468559306204710e+01 + 15 -6.6187434150450684e+01 2.1289685674401704e+02 7.8580288164394176e+01 + 16 1.8897543273751987e+02 5.3397684583320228e+01 6.3460608305143431e+01 + 17 1.4554759813765031e+02 -3.9370375612854211e+01 -9.7056561289015221e+01 + 18 -4.5104710861585204e+01 -1.8896520586433442e+02 1.1164278206176432e+02 + 19 2.6896526778434691e+02 3.6948523216182610e+02 -3.3986956263391414e+02 + 20 -7.5462858727084978e+01 -8.6024949892680127e+01 -4.6529428267542672e-01 + 21 -7.3095936427312608e+01 -5.7463824581552551e+01 -1.1787940398446622e+02 + 22 1.5517223233172072e+02 -1.2975611741340879e+02 8.0541352960578152e+01 + 23 7.9113780255105098e+01 -1.3159747874504722e+01 -2.5876270623789040e+01 + 24 -2.0721357932150298e+02 2.1426998940883044e+02 -1.2404916276617425e+02 + 25 -1.1375172605965993e+02 1.9145650691697844e+02 -8.3221527665980318e+01 + 26 2.8613004810395404e+02 -2.1814761019305757e+02 2.3221022419460809e+02 + 27 -6.4957092731555079e+01 6.8730894090822545e+01 1.7879679090575814e+02 + 28 -3.5591147454501368e+00 3.8919120211497962e+01 3.2899891202019738e+01 + 29 -7.1006654872719238e+01 3.4395396834237154e+01 2.5490290273424105e+01 + 30 -1.7028325267142560e+02 -1.9865767607180553e+02 -1.1525322636144483e+02 + 31 -1.4030343203666592e+02 1.6505252880438636e+02 -1.2536824728321348e+02 + 32 2.8083940532792852e+01 -5.9613080258602423e+01 -1.7589243121123940e+02 + 33 -6.0786787938223988e+01 -6.2914083425882403e+01 6.1724187337270543e+01 + 34 -2.2381506031982109e+01 1.0258481649700494e+02 -7.2581426601092289e+01 + 35 2.6826897000164860e+02 -2.1830766483611634e+02 1.2929910897210587e+02 + 36 1.0617788606545010e+02 1.8844480945948908e+02 -1.9952637621677195e+02 + 37 -1.8500771626166176e+02 1.2691494490100189e+02 -6.1799194093629900e+01 + 38 -2.9568079732966703e+02 1.0466330586524869e+02 1.5539102098367138e+02 + 39 9.0313772894092821e+01 -5.7763893527493209e+01 2.4405485803219346e+01 + 40 2.0368385039374967e+01 -1.0212243725288241e+02 5.8977256992383225e+01 + 41 -5.4472403223120203e+01 1.3664060494004097e+01 -7.0498722233072925e+01 + 42 -1.0110613172512772e+02 3.7733470557342038e+01 -7.1384152705391784e+01 + 43 2.2496360806160274e+02 9.5374846176895645e+01 1.2314600313131321e+02 + 44 8.9930414141415653e+01 -9.8548000406723986e+01 -8.6016793720029909e+01 + 45 1.8042451487789609e+01 1.2291681595021672e+02 4.8750513798444771e+01 + 46 1.4360528923605511e+01 -2.8393705665522205e+01 2.5000665776800748e+01 + 47 3.3430180733495789e+01 -1.9134003719608592e+02 -6.9378872162649785e+01 + 48 6.5977481507414737e+01 -2.0569144157798024e+02 -2.3305633430859448e+01 + 49 -3.6805309882726834e+02 -2.5080697208449138e+01 1.0725330582435194e+02 + 50 3.3568380074196661e+01 -7.2887959381521910e+01 8.4247363622347109e+01 + 51 1.3937571489940009e+02 -1.0241922204671263e+02 1.5837793886668237e+02 + 52 -1.1015736921987484e+02 1.2143381508599626e+02 -1.3301954780997897e+02 + 53 -5.5053511032747757e+01 3.3318719859853354e+02 -5.4695756604566270e-02 + 54 -9.3809048702850468e+00 6.3003589530582012e+01 1.8485428176102951e+02 + 55 1.1268442794862931e+02 -1.7912573126673601e+02 7.6216834182693233e+01 + 56 -4.5797371929680743e+01 -1.3685285921363297e+02 4.1670118112644616e+01 + 57 9.5486141883804535e+01 7.3675172824019725e+01 -6.4017779318177091e+01 + 58 -2.6940387352055815e+01 -9.1882719416077734e+00 -1.6496876672222488e+01 + 59 -6.1110604213739784e+01 1.0339564891274583e+02 8.0387279300896552e+01 + 60 -1.3438307290047304e+01 6.2821560489016619e+01 -1.4960910695536089e+02 + 61 -3.7436807177941901e+01 -1.0060440409572418e+02 -7.2129567761936670e+01 + 62 1.5913069360497735e+01 -7.4587444709681003e+00 2.7411930168532034e+01 + 63 1.3132402121938716e+02 -1.2633666867998599e+02 -5.9237822651056007e+01 + 64 1.0528416436964088e+02 1.1185676743727093e+02 7.6927706681792429e+01 +run_vdwl: -3296.3468820327244 +run_coul: -268.63661874354375 +run_stress: ! |- + -1.0114879837664473e+03 -1.2087430287015966e+03 -8.2783958944769802e+02 -2.3908224870200783e+02 1.9895929948999779e+02 -6.5093393277361292e+02 +run_forces: ! |2 + 1 -8.6437645087040067e+01 -2.4400562180957351e+01 1.0906066254553539e+02 + 2 -1.0912275665241053e+02 -1.7786252590616218e+02 -2.2173930761917526e+02 + 3 -1.7205763157578019e+02 1.8323288100722453e+02 1.3329429976402201e+01 + 4 3.5076762638460409e+01 -5.4659870228622246e+01 8.8949923949362500e+01 + 5 1.8037851727311079e+02 1.5258819157296621e+01 -8.1471824505046186e+01 + 6 1.3693758232722558e+02 -2.9996076554788232e+02 3.1544103577304767e+01 + 7 -5.3040940882157734e+01 -1.2906590024224255e+02 -1.6365072194268137e+02 + 8 -1.5098199162393547e+02 4.1652246872814757e+01 1.5340185354669362e+02 + 9 1.7338937131206805e+01 8.8052623671376480e+01 2.4311464733168261e+01 + 10 8.3731216255426517e+01 1.4163544349001512e+02 1.2086773194932107e+02 + 11 -4.2431645167783358e+01 6.5809284879610303e+01 1.3461058585127651e+02 + 12 -9.6268502974108401e+01 -2.5380508031933715e+01 7.7613919613213387e+00 + 13 -6.5394480799184038e+01 -4.9454690134569510e+01 -3.8251921533789378e+01 + 14 8.2607648143581144e+01 -6.8981518310093449e+01 -9.2465919648906919e+01 + 15 -6.6184911564685194e+01 2.1289827790699042e+02 7.8579764996083128e+01 + 16 1.8897212100631526e+02 5.3401557991805042e+01 6.3454265397142500e+01 + 17 1.4554879673647156e+02 -3.9369818695332363e+01 -9.7059433050641857e+01 + 18 -4.5104364652465691e+01 -1.8896630466961875e+02 1.1164283921546559e+02 + 19 2.6896544439509694e+02 3.6948380976016932e+02 -3.3986849873937234e+02 + 20 -7.5474652635907873e+01 -8.6015213272245504e+01 -4.8259676480768809e-01 + 21 -7.3095940937548960e+01 -5.7464788467137694e+01 -1.1787855066724634e+02 + 22 1.5517351794732136e+02 -1.2975689546114967e+02 8.0540859481818742e+01 + 23 7.9115865455975012e+01 -1.3150472802357561e+01 -2.5876503956346053e+01 + 24 -2.0722276003223385e+02 2.1428270990874043e+02 -1.2405357611708547e+02 + 25 -1.1375429959489580e+02 1.9146061264686494e+02 -8.3222256641294663e+01 + 26 2.8613970510624404e+02 -2.1815485596507278e+02 2.3221441960858138e+02 + 27 -6.4953545994003377e+01 6.8732504333245046e+01 1.7879777515705371e+02 + 28 -3.5609706858523174e+00 3.8914845051174211e+01 3.2895014258375397e+01 + 29 -7.1006962970615220e+01 3.4395946962585107e+01 2.5490302918559674e+01 + 30 -1.7028397298399719e+02 -1.9865780309887722e+02 -1.1525330878428163e+02 + 31 -1.4030595583104076e+02 1.6505149109610096e+02 -1.2536961295119134e+02 + 32 2.8084746879715730e+01 -5.9606705649399501e+01 -1.7588207877218977e+02 + 33 -6.0786242960574022e+01 -6.2913885176828828e+01 6.1724433850103161e+01 + 34 -2.2381168039316268e+01 1.0258398798459802e+02 -7.2580420302975753e+01 + 35 2.6830990081622787e+02 -2.1835204404249015e+02 1.2931018843147504e+02 + 36 1.0617963217845744e+02 1.8843796909876622e+02 -1.9952176446060335e+02 + 37 -1.8503402227685811e+02 1.2695504061699209e+02 -6.1823411355500355e+01 + 38 -2.9568399770704622e+02 1.0466662636959578e+02 1.5539009610875451e+02 + 39 9.0314570392701199e+01 -5.7764115394488343e+01 2.4405095735494033e+01 + 40 2.0369156227530599e+01 -1.0212187254286960e+02 5.8977622919450276e+01 + 41 -5.4470510869266015e+01 1.3662227244573693e+01 -7.0490380604491506e+01 + 42 -1.0110530047858049e+02 3.7731202989968210e+01 -7.1386374496910719e+01 + 43 2.2495960799582232e+02 9.5372714343303912e+01 1.2314241456430595e+02 + 44 8.9934882887196963e+01 -9.8543527246862695e+01 -8.6022172212180365e+01 + 45 1.8033520031139599e+01 1.2290938260195539e+02 4.8756688104804162e+01 + 46 1.4361839506594013e+01 -2.8393917262125949e+01 2.5000422631182474e+01 + 47 3.3429877782459641e+01 -1.9133540583883391e+02 -6.9377262749149708e+01 + 48 6.5990004078562464e+01 -2.0569338794984392e+02 -2.3314637198125183e+01 + 49 -3.6809322631363733e+02 -2.5025981981696823e+01 1.0734144239898833e+02 + 50 3.3569219922950602e+01 -7.2886698417202680e+01 8.4245785013977056e+01 + 51 1.3937840161131851e+02 -1.0242257034381630e+02 1.5838167570232804e+02 + 52 -1.1015632519571759e+02 1.2143397586854937e+02 -1.3302347496060023e+02 + 53 -5.5089426238213143e+01 3.3316281975316286e+02 -7.4271240172002706e-02 + 54 -9.3687742955109137e+00 6.2990741970756417e+01 1.8487238090605700e+02 + 55 1.1268678311310606e+02 -1.7912913164827566e+02 7.6217416717889748e+01 + 56 -4.5806154616486943e+01 -1.3684453580988600e+02 4.1676759373489766e+01 + 57 9.5485704227965911e+01 7.3672454441368231e+01 -6.4019152364471736e+01 + 58 -2.6940596532815384e+01 -9.1863734979434319e+00 -1.6497051289566816e+01 + 59 -6.1118963218972965e+01 1.0340128135495597e+02 8.0392569432165885e+01 + 60 -1.3439582130914836e+01 6.2820376074347955e+01 -1.4960914576393716e+02 + 61 -3.7435810911347573e+01 -1.0060262070691427e+02 -7.2129466739698870e+01 + 62 1.5913491338811767e+01 -7.4593609075659995e+00 2.7412825126279046e+01 + 63 1.3132146995609114e+02 -1.2633718643910591e+02 -5.9233334916534574e+01 + 64 1.0528511106781713e+02 1.1185795842149707e+02 7.6928226208764087e+01 +... diff --git a/unittest/force-styles/tests/data.cmap b/unittest/force-styles/tests/data.cmap new file mode 100644 index 0000000000..ad86b1c3ed --- /dev/null +++ b/unittest/force-styles/tests/data.cmap @@ -0,0 +1,232 @@ +LAMMPS data file via write_data, version 5 May 2020, timestep = 0 + +29 atoms +5 atom types +24 bonds +5 bond types +30 angles +4 angle types +31 dihedrals +5 dihedral types +2 impropers +2 improper types +5 crossterms + + -6.024572 8.975428 xlo xhi + -7.692866 7.307134 ylo yhi + -8.086924 6.913076 zlo zhi + +Masses + +1 12.0107 +2 4.00794 +3 14.0067 +4 15.9994 +5 15.9994 + +Pair Coeffs # zero + +1 +2 +3 +4 +5 + +Bond Coeffs # zero + +1 1.5 +2 1.1 +3 1.3 +4 1.2 +5 1 + +Angle Coeffs # zero + +1 110.1 +2 111 +3 120 +4 108.5 + +Dihedral Coeffs # zero + +1 +2 +3 +4 +5 + +Improper Coeffs # zero + +1 +2 + +Atoms # full + +10 2 1 7.0000000000000007e-02 2.0185283555536988e+00 -1.4283966846517357e+00 -9.6733527271133024e-01 0 0 0 +11 2 2 8.9999999999999997e-02 1.7929780509347666e+00 -1.9871047540768743e+00 -1.8840626643185674e+00 0 0 0 +12 2 1 -2.7000000000000002e-01 3.0030247876861225e+00 -4.8923319967572748e-01 -1.6188658531537248e+00 0 0 0 +13 2 2 8.9999999999999997e-02 4.0447273787895934e+00 -9.0131998547446246e-01 -1.6384447268320836e+00 0 0 0 +14 2 2 8.9999999999999997e-02 2.6033152817257075e+00 -4.0789761505963579e-01 -2.6554413538823063e+00 0 0 0 + 2 1 2 3.1000000000000000e-01 3.0197083955402204e-01 2.9515239068888608e+00 -8.5689735572907566e-01 0 0 0 + 3 1 1 -2.0000000000000000e-02 -6.9435377880558602e-01 1.2440473127136711e+00 -6.2233801468892025e-01 0 0 0 + 4 1 2 8.9999999999999997e-02 -1.5771614164685133e+00 1.4915333140468066e+00 -1.2487126845040522e+00 0 0 0 + 6 1 1 5.1000000000000001e-01 2.9412607937706009e-01 2.2719282656652909e-01 -1.2843094067857870e+00 0 0 0 + 7 1 4 -5.1000000000000001e-01 3.4019871062879609e-01 -9.1277350075786561e-03 -2.4633113224304561e+00 0 0 0 +19 3 2 4.2359999999999998e-01 1.5349125211132961e+00 2.6315969880333707e+00 -4.2472859440220647e+00 0 0 0 +15 2 2 8.9999999999999997e-02 2.9756315249791303e+00 5.6334269722969288e-01 -1.2437650754599008e+00 0 0 0 +18 3 4 -8.4719999999999995e-01 2.1384791188033843e+00 3.0177261773770208e+00 -3.5160827596876225e+00 0 0 0 +20 3 2 4.2359999999999998e-01 2.7641167828863153e+00 3.6833419064000221e+00 -3.9380850623312638e+00 0 0 0 + 8 2 3 -4.6999999999999997e-01 1.1641187171852805e+00 -4.8375305955385234e-01 -6.7659823767368688e-01 0 0 0 + 9 2 2 3.1000000000000000e-01 1.3777459838125838e+00 -2.5366338669522998e-01 2.6877644730326306e-01 0 0 0 +16 2 1 5.1000000000000001e-01 2.6517554244980306e+00 -2.3957110424978438e+00 3.2908335999178327e-02 0 0 0 +17 2 4 -5.1000000000000001e-01 2.2309964792710639e+00 -2.1022918943319384e+00 1.1491948328949437e+00 0 0 0 + 1 1 3 -4.6999999999999997e-01 -2.7993683669226832e-01 2.4726588069312840e+00 -1.7200860244148433e-01 0 0 0 + 5 1 2 8.9999999999999997e-02 -8.9501761359359255e-01 9.3568128743071344e-01 4.0227731871484346e-01 0 0 0 +21 4 5 -8.4719999999999995e-01 4.9064454390208301e+00 -4.0751205255383196e+00 -3.6215576073601046e+00 0 0 0 +22 4 2 4.2359999999999998e-01 4.3687453488627543e+00 -4.2054270536772504e+00 -4.4651491269372565e+00 0 0 0 +23 4 2 4.2359999999999998e-01 5.7374928154769504e+00 -3.5763355905184966e+00 -3.8820297194230728e+00 0 0 0 +24 5 5 -8.4719999999999995e-01 2.0684115301174013e+00 3.1518221747664397e+00 3.1554242678474576e+00 0 0 0 +25 5 2 4.2359999999999998e-01 1.2998381073113014e+00 3.2755513587518097e+00 2.5092990173114837e+00 0 0 0 +26 5 2 4.2359999999999998e-01 2.5807438597688113e+00 4.0120175892854135e+00 3.2133398379059099e+00 0 0 0 +27 6 5 -8.4719999999999995e-01 -1.9613581876744359e+00 -4.3556300596085160e+00 2.1101467673534788e+00 0 0 0 +28 6 2 4.2359999999999998e-01 -2.7406520384725965e+00 -4.0207251278130975e+00 1.5828689861678511e+00 0 0 0 +29 6 2 4.2359999999999998e-01 -1.3108232656499081e+00 -3.5992986322410760e+00 2.2680459788743503e+00 0 0 0 + +Velocities + +1 7.7867804888392077e-04 5.8970331623292821e-04 -2.2179517633030531e-04 +2 2.7129529964126462e-03 4.6286427111164284e-03 3.5805549693846352e-03 +3 -1.2736791029204805e-03 1.6108674226414498e-03 -3.3618185901550799e-04 +4 -9.2828595122009308e-04 -1.2537885319521818e-03 -4.1204974953432108e-03 +5 -1.1800848061603740e-03 7.5424401975844038e-04 6.9023177964912290e-05 +6 -3.0914004879905335e-04 1.2755385764678133e-03 7.9574303350202582e-04 +7 -1.1037894966874103e-04 -7.6764845099077425e-04 -7.7217630460203659e-04 +8 3.9060281273221989e-04 -8.1444231918053418e-04 1.5134641148324972e-04 +9 1.2475530960659720e-03 -2.6608454451432528e-03 1.1117602907112732e-03 +10 4.5008983776042893e-04 4.9530197647538077e-04 -2.3336234361093645e-04 +11 -3.6977669078869707e-04 -1.5289071951960539e-03 -2.9176389881837113e-03 +12 1.0850834530183159e-03 -6.4965897903201833e-04 -1.2971152622619948e-03 +13 4.0754559196230639e-03 3.5043502394946119e-03 -7.8324487687854666e-04 +14 -1.3837220448746613e-04 -4.0656048637594394e-03 -3.9333461173944500e-03 +15 -4.3301707382721859e-03 -3.1802661664634938e-03 3.2037919043360571e-03 +16 -9.6715751018414326e-05 -5.0016572678960377e-04 1.4945658875149626e-03 +17 6.5692180538157174e-04 3.6635779995305095e-04 8.3495414466050911e-04 +18 -6.0936815808025862e-04 -9.3774557532468582e-04 -3.3558072507805731e-04 +19 -6.9919768291957119e-04 -3.6060777270430031e-03 4.2833405289822791e-03 +20 4.7777805013736515e-03 5.1003745845520452e-03 1.8002873923729241e-03 +21 -9.5568188553430398e-04 1.6594630943762931e-04 -1.8199788009966615e-04 +22 -3.3137518957653462e-03 -2.8683968287936054e-03 3.6384389958326871e-03 +23 2.4209481134686401e-04 -4.5457709985051130e-03 2.7663581642115042e-03 +24 2.5447450568861086e-04 4.8412447786110117e-04 -4.8021914527341357e-04 +25 4.3722771097312743e-03 -4.5184411669545515e-03 2.5200952006556795e-03 +26 -1.9250110555001179e-03 -3.0342169883610837e-03 3.5062814567984532e-03 +27 -2.6510179146429716e-04 3.6306203629019116e-04 -5.6235585400647747e-04 +28 -2.3068708109787484e-04 -8.5663070212203200e-04 2.1302563179109169e-03 +29 -2.5054744388303732e-03 -1.6773997805290820e-04 2.8436699761004796e-03 + +Bonds + +1 5 1 2 +2 3 1 3 +3 2 3 4 +4 2 3 5 +5 1 3 6 +6 3 6 8 +7 4 6 7 +8 5 8 9 +9 3 8 10 +10 2 10 11 +11 1 10 12 +12 1 10 16 +13 2 12 13 +14 2 12 14 +15 2 12 15 +16 4 16 17 +17 5 18 19 +18 5 18 20 +19 5 21 22 +20 5 21 23 +21 5 24 25 +22 5 24 26 +23 5 27 28 +24 5 27 29 + +Angles + +1 4 2 1 3 +2 4 1 3 5 +3 4 1 3 4 +4 4 1 3 6 +5 4 4 3 5 +6 2 5 3 6 +7 2 4 3 6 +8 3 3 6 7 +9 3 3 6 8 +10 3 7 6 8 +11 2 6 8 9 +12 2 9 8 10 +13 3 6 8 10 +14 2 8 10 11 +15 3 8 10 16 +16 2 11 10 12 +17 1 12 10 16 +18 1 8 10 12 +19 2 11 10 16 +20 2 10 12 15 +21 2 10 12 14 +22 2 10 12 13 +23 4 13 12 15 +24 4 13 12 14 +25 4 14 12 15 +26 4 10 16 17 +27 1 19 18 20 +28 1 22 21 23 +29 1 25 24 26 +30 1 28 27 29 + +Dihedrals + +1 2 2 1 3 6 +2 2 2 1 3 4 +3 3 2 1 3 5 +4 1 1 3 6 8 +5 1 1 3 6 7 +6 5 4 3 6 8 +7 5 4 3 6 7 +8 5 5 3 6 8 +9 5 5 3 6 7 +10 4 3 6 8 9 +11 3 3 6 8 10 +12 3 7 6 8 9 +13 4 7 6 8 10 +14 2 6 8 10 12 +15 2 6 8 10 16 +16 2 6 8 10 11 +17 2 9 8 10 12 +18 4 9 8 10 16 +19 5 9 8 10 11 +20 5 8 10 12 13 +21 1 8 10 12 14 +22 5 8 10 12 15 +23 4 8 10 16 17 +24 5 11 10 12 13 +25 5 11 10 12 14 +26 5 11 10 12 15 +27 2 11 10 16 17 +28 2 12 10 16 17 +29 5 16 10 12 13 +30 5 16 10 12 14 +31 5 16 10 12 15 + +Impropers + +1 1 6 3 8 7 +2 2 8 6 10 9 + +CMAP + +1 1 8 10 12 18 20 +2 2 18 20 22 25 27 +3 3 2 4 5 6 7 +4 4 10 11 12 13 14 +5 5 5 10 15 20 25 diff --git a/unittest/force-styles/tests/fix-timestep-efield_const.yaml b/unittest/force-styles/tests/fix-timestep-efield_const.yaml index 932f11179e..21bfc808da 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Tue Aug 6 02:04:43 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml b/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml index 503f7253f1..aa2323fc03 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 21:46:33 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml b/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml index 069526c4c0..b6e636d719 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 22:46:18 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_region.yaml b/unittest/force-styles/tests/fix-timestep-efield_region_block.yaml similarity index 99% rename from unittest/force-styles/tests/fix-timestep-efield_region.yaml rename to unittest/force-styles/tests/fix-timestep-efield_region_block.yaml index d0770a1ab8..a155343e6d 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_region.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_region_block.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sat Aug 3 05:18:10 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml new file mode 100644 index 0000000000..ac3748ae11 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml @@ -0,0 +1,82 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Wed Oct 9 18:02:13 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix efield +pre_commands: ! "" +post_commands: ! | + region 1 sphere 0 0 0 1 + fix move all nve + fix test solute efield 0.1 0.1 0.1 region 1 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-efield_variable.yaml b/unittest/force-styles/tests/fix-timestep-efield_variable.yaml index 7680373091..1ec1e4098a 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_variable.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_variable.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sat Aug 3 05:18:19 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-gravity.yaml b/unittest/force-styles/tests/fix-timestep-gravity.yaml index c151723e8a..8195dd2643 100644 --- a/unittest/force-styles/tests/fix-timestep-gravity.yaml +++ b/unittest/force-styles/tests/fix-timestep-gravity.yaml @@ -1,6 +1,5 @@ --- lammps_version: 17 Apr 2024 -tags: generated date_generated: Fri Jun 7 18:18:25 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml index 40a4fc1c6e..b95a0265de 100644 --- a/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:15:54 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml index 807fcdc46f..12022224df 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:03:14 2024 epsilon: 4e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml index f3ee607578..717ab63162 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:03:30 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml index b31284da57..3724246956 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:05:01 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml index 0eaa00db36..b794fb2416 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:05:44 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml index 4ea520aba9..5d03cc7a05 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:06:12 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml index d95542744f..aa39b8f4c2 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:06:24 2024 epsilon: 1e-09 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml index 41f021897a..ff8020cc27 100644 --- a/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:11:49 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml new file mode 100644 index 0000000000..31c682fc07 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml @@ -0,0 +1,79 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 12:45:25 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter 0.45 0.5 0.55 shift all units fraction +input_file: in.fourmol +natoms: 29 +global_scalar: 4.868395120178045e-05 +global_vector: ! |- + 3 -4.5648814136156624e-05 -3.3198799039568794e-06 1.6591903748613968e-05 +run_pos: ! |2 + 1 -7.5132769500671226e-01 2.4986248943444171e+00 7.9802680459215103e-01 + 2 -1.7083180151387659e-01 2.9686443668858833e+00 1.1032169213735510e-01 + 3 -1.1848576112607669e+00 1.2379598993595067e+00 3.3721005306300700e-01 + 4 -2.0626880309028004e+00 1.4911496856694382e+00 -2.8888576109863040e-01 + 5 -1.3880697339807648e+00 9.3392994263319751e-01 1.3645274273913177e+00 + 6 -2.3255489200431190e-01 2.9053911875515726e-01 -2.6643801066357575e-01 + 7 -1.3943682083900774e-01 -1.5237647264946797e-02 -1.5642438189827357e+00 + 8 6.9348312565712877e-01 -4.8122338188209490e-01 3.2715099339931453e-01 + 9 8.9918032569715922e-01 -2.4533830652762220e-01 1.2485251813785214e+00 + 10 1.5702044248015166e+00 -1.4529974702538417e+00 -1.8252128309559742e-02 + 11 1.3069310971913839e+00 -1.9847774235184412e+00 -9.2407492225500776e-01 + 12 2.5254286066811331e+00 -4.8272460119318827e-01 -6.5820448823105326e-01 + 13 3.5706681986664290e+00 -8.8461121229009532e-01 -6.7501523048492595e-01 + 14 2.1258242373015102e+00 -4.1048363587869696e-01 -1.6984150383718699e+00 + 15 2.4886566213184191e+00 5.6163503542878490e-01 -2.6921687967144270e-01 + 16 2.1938308722699804e+00 -2.4050030016799853e+00 9.4154957635695380e-01 + 17 1.7344856812755074e+00 -2.0823896149143275e+00 2.1613004019555140e+00 + 18 1.6560980731586978e+00 3.0232596451395048e+00 -2.5529495112139315e+00 + 19 1.0547116163558656e+00 2.6329381393140117e+00 -3.2704134554803344e+00 + 20 2.2918852033150059e+00 3.6997999487374611e+00 -2.9680989234057789e+00 + 21 4.4231407100675586e+00 -4.0678259135193509e+00 -2.6560461484816007e+00 + 22 3.8773634581912133e+00 -4.2052030389522610e+00 -3.4962990971238348e+00 + 23 5.2630661876778886e+00 -3.5747868901510587e+00 -2.9116508070860116e+00 + 24 1.5880522609893917e+00 3.1587435945035454e+00 4.1200242979904500e+00 + 25 8.2366303589634160e-01 3.2739214743607290e+00 3.4761708482509213e+00 + 26 2.1000516430182552e+00 4.0191691643247269e+00 4.1861913754165592e+00 + 27 -2.4420064102885943e+00 -4.3489322893595315e+00 3.0748146340599405e+00 + 28 -3.2282283657042123e+00 -4.0126730894614893e+00 2.5479905388509652e+00 + 29 -1.7934721163888576e+00 -3.5888429001718487e+00 3.2396195693813534e+00 +run_vel: ! |2 + 1 8.1705744183262520e-03 1.6516406176274312e-02 4.7902264318912926e-03 + 2 5.4501493445687594e-03 5.1791699408496325e-03 -1.4372931530376343e-03 + 3 -8.2298292722385816e-03 -1.2926551614621412e-02 -4.0984181178163994e-03 + 4 -3.7699042590093445e-03 -6.5722892098813860e-03 -1.1184640360133362e-03 + 5 -1.1021961004346569e-02 -9.8906780939336126e-03 -2.8410737829284312e-03 + 6 -3.9676663166400034e-02 4.6817061464710250e-02 3.7148491979475992e-02 + 7 9.1033953013898092e-04 -1.0128524411938771e-02 -5.1568251805019630e-02 + 8 7.9064712058855725e-03 -3.3507254552631849e-03 3.4557098492564643e-02 + 9 1.5644176117320875e-03 3.7365546102722203e-03 1.5047408822037638e-02 + 10 2.9201446820573192e-02 -2.9249578745486140e-02 -1.5018077424322537e-02 + 11 -4.7835961513517603e-03 -3.7481385134185263e-03 -2.3464104142290176e-03 + 12 2.2696451841920920e-03 -3.4774154398130042e-04 -3.0640770327796979e-03 + 13 2.7531740451952145e-03 5.8171061612840866e-03 -7.9467454022159282e-04 + 14 3.5246182371994239e-03 -5.7939995585585425e-03 -3.9478431172750902e-03 + 15 -1.8547943640123080e-03 -5.8554729942777830e-03 6.2938485140538606e-03 + 16 1.8681499973445179e-02 -1.3262466204585288e-02 -4.5638651457003097e-02 + 17 -1.2896269981100333e-02 9.7527665265956121e-03 3.7296535360836651e-02 + 18 -8.0065794848260612e-04 -8.6270473212553646e-04 -1.4483040697508677e-03 + 19 1.2452390836182341e-03 -2.5061097118772879e-03 7.2998631009712671e-03 + 20 3.5930060229596903e-03 3.6938860309252827e-03 3.2322732687893093e-03 + 21 -1.4689220370766394e-03 -2.7352129761527014e-04 7.0581624215243781e-04 + 22 -7.0694199254630772e-03 -4.2577148924878711e-03 2.8079117614247622e-04 + 23 6.0446963117374731e-03 -1.4000131614795503e-03 2.5819754847014498e-03 + 24 3.1926367902286048e-04 -9.9445664749277913e-04 1.4999996959364468e-04 + 25 1.3789754514817134e-04 -4.4335894884532752e-03 -8.1808136725077560e-04 + 26 2.0485904035218057e-03 2.7813358633836725e-03 4.3245727149206822e-03 + 27 4.5604120293371412e-04 -1.0305523026921115e-03 2.1188058381359324e-04 + 28 -6.2544520861855810e-03 1.4127711176147026e-03 -1.8429821884794668e-03 + 29 6.4110631534402326e-04 3.1273432719593694e-03 3.7253671105656788e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml new file mode 100644 index 0000000000..ca539aa911 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -0,0 +1,79 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 12:45:46 2024 +epsilon: 1e-12 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter INIT INIT INIT shift all units box +input_file: in.fourmol +natoms: 29 +global_scalar: 4.868395120232176e-05 +global_vector: ! |- + 3 -4.5648814136711735e-05 -3.319879903845857e-06 1.6591903748697234e-05 +run_pos: ! |2 + 1 -2.7082064032386499e-01 2.4911895589422826e+00 -1.6682586822486273e-01 + 2 3.0967525316897071e-01 2.9612090314837487e+00 -8.5453098067965849e-01 + 3 -7.0435055657791923e-01 1.2305245639573710e+00 -6.2764261975400659e-01 + 4 -1.5821809762199530e+00 1.4837143502673025e+00 -1.2537384339156445e+00 + 5 -9.0756267929791723e-01 9.2649460723106236e-01 3.9967475457430401e-01 + 6 2.4795216267853529e-01 2.8310378335302211e-01 -1.2312906834805892e+00 + 7 3.4107023384383955e-01 -2.2672982667081999e-02 -2.5290964917997489e+00 + 8 1.1739901803399764e+00 -4.8865871728423016e-01 -6.3770167941769906e-01 + 9 1.3796873803800067e+00 -2.5277364192975738e-01 2.8367250856150772e-01 + 10 2.0507114794843648e+00 -1.4604328056559772e+00 -9.8310480112657339e-01 + 11 1.7874381518742311e+00 -1.9922127589205765e+00 -1.8889275950720215e+00 + 12 3.0059356613639814e+00 -4.9015993659532353e-01 -1.6230571610480671e+00 + 13 4.0511752533492755e+00 -8.9204654769223046e-01 -1.6398679033019394e+00 + 14 2.6063312919843575e+00 -4.1791897128083211e-01 -2.6632677111888836e+00 + 15 2.9691636760012665e+00 5.5419970002664964e-01 -1.2340695524884564e+00 + 16 2.6743379269528269e+00 -2.4124383370821203e+00 -2.3303096460060031e-02 + 17 2.2149927359583548e+00 -2.0898249503164625e+00 1.1964477291384996e+00 + 18 2.1366051278415465e+00 3.0158243097373703e+00 -3.5178021840309452e+00 + 19 1.5352186710387132e+00 2.6255028039118771e+00 -4.2352661282973489e+00 + 20 2.7723922579978524e+00 3.6923646133353265e+00 -3.9329515962227930e+00 + 21 4.9036477647504055e+00 -4.0752612489214854e+00 -3.6208988212986148e+00 + 22 4.3578705128740589e+00 -4.2126383743543956e+00 -4.4611517699408489e+00 + 23 5.7435732423607346e+00 -3.5822222255531937e+00 -3.8765034799030262e+00 + 24 2.0685593156722382e+00 3.1513082591014108e+00 3.1551716251734359e+00 + 25 1.3041700905791889e+00 3.2664861389585940e+00 2.5113181754339067e+00 + 26 2.5805586977011035e+00 4.0117338289225923e+00 3.2213387025995455e+00 + 27 -1.9614993556057472e+00 -4.3563676247616661e+00 2.1099619612429263e+00 + 28 -2.7477213110213659e+00 -4.0201084248636239e+00 1.5831378660339515e+00 + 29 -1.3129650617060100e+00 -3.5962782355739837e+00 2.2747668965643393e+00 +run_vel: ! |2 + 1 8.1705744183262104e-03 1.6516406176274218e-02 4.7902264318912665e-03 + 2 5.4501493445687794e-03 5.1791699408496421e-03 -1.4372931530376607e-03 + 3 -8.2298292722385487e-03 -1.2926551614621277e-02 -4.0984181178163560e-03 + 4 -3.7699042590093415e-03 -6.5722892098814042e-03 -1.1184640360133158e-03 + 5 -1.1021961004346589e-02 -9.8906780939336161e-03 -2.8410737829284308e-03 + 6 -3.9676663166400034e-02 4.6817061464710229e-02 3.7148491979476020e-02 + 7 9.1033953013898157e-04 -1.0128524411938776e-02 -5.1568251805019651e-02 + 8 7.9064712058856471e-03 -3.3507254552632795e-03 3.4557098492564615e-02 + 9 1.5644176117320901e-03 3.7365546102722182e-03 1.5047408822037646e-02 + 10 2.9201446820573056e-02 -2.9249578745486018e-02 -1.5018077424322512e-02 + 11 -4.7835961513517386e-03 -3.7481385134185206e-03 -2.3464104142289959e-03 + 12 2.2696451841920360e-03 -3.4774154398128042e-04 -3.0640770327796927e-03 + 13 2.7531740451953762e-03 5.8171061612840589e-03 -7.9467454022160203e-04 + 14 3.5246182371994326e-03 -5.7939995585585581e-03 -3.9478431172751110e-03 + 15 -1.8547943640122733e-03 -5.8554729942777882e-03 6.2938485140538684e-03 + 16 1.8681499973445276e-02 -1.3262466204585354e-02 -4.5638651457003278e-02 + 17 -1.2896269981100394e-02 9.7527665265956520e-03 3.7296535360836797e-02 + 18 -8.0065794848264635e-04 -8.6270473212556715e-04 -1.4483040697508916e-03 + 19 1.2452390836183188e-03 -2.5061097118772376e-03 7.2998631009713894e-03 + 20 3.5930060229597644e-03 3.6938860309253564e-03 3.2322732687892846e-03 + 21 -1.4689220370766513e-03 -2.7352129761527480e-04 7.0581624215242762e-04 + 22 -7.0694199254630373e-03 -4.2577148924878580e-03 2.8079117614252934e-04 + 23 6.0446963117374757e-03 -1.4000131614795444e-03 2.5819754847014359e-03 + 24 3.1926367902287810e-04 -9.9445664749280038e-04 1.4999996959366859e-04 + 25 1.3789754514808927e-04 -4.4335894884532361e-03 -8.1808136725085713e-04 + 26 2.0485904035218191e-03 2.7813358633837193e-03 4.3245727149206674e-03 + 27 4.5604120293371239e-04 -1.0305523026920900e-03 2.1188058381358600e-04 + 28 -6.2544520861855203e-03 1.4127711176146766e-03 -1.8429821884794249e-03 + 29 6.4110631534397057e-04 3.1273432719593091e-03 3.7253671105656658e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml new file mode 100644 index 0000000000..a860180cd7 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml @@ -0,0 +1,79 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 12:45:37 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter NULL NULL NULL shift all units lattice +input_file: in.fourmol +natoms: 29 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-setforce_region.yaml b/unittest/force-styles/tests/fix-timestep-setforce_region_block.yaml similarity index 100% rename from unittest/force-styles/tests/fix-timestep-setforce_region.yaml rename to unittest/force-styles/tests/fix-timestep-setforce_region_block.yaml diff --git a/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml new file mode 100644 index 0000000000..25990ef478 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml @@ -0,0 +1,78 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Wed Oct 9 17:59:56 2024 +epsilon: 5e-12 +skip_tests: +prerequisites: ! | + atom full + fix setforce +pre_commands: ! "" +post_commands: ! | + region 1 sphere 0 0 0 1 + fix move all nve + fix test solute setforce 0.0 0.0 0.0 region 1 +input_file: in.fourmol +natoms: 29 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml b/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml new file mode 100644 index 0000000000..e8b35e6455 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml @@ -0,0 +1,77 @@ +--- +lammps_version: 17 Feb 2022 +date_generated: Fri Mar 18 22:18:01 2022 +epsilon: 5e-14 +skip_tests: kokkos_omp +prerequisites: ! | + atom full + fix spring/self +pre_commands: ! | + variable kvar atom 10.0 +post_commands: ! | + fix move all nve + fix test solute spring/self v_kvar xyz +input_file: in.fourmol +natoms: 29 +global_scalar: 0.12623705370750438 +run_pos: ! |2 + 1 -2.7045669792379945e-01 2.4912140072031601e+00 -1.6695897908630153e-01 + 2 3.1003572362014503e-01 2.9612290242130319e+00 -8.5466689099875615e-01 + 3 -7.0398410505917419e-01 1.2305522448803927e+00 -6.2777452858703953e-01 + 4 -1.5818137373512378e+00 1.4837442978868092e+00 -1.2538665277734968e+00 + 5 -9.0719266809604937e-01 9.2652365891304722e-01 3.9954299416556610e-01 + 6 2.4832271436421161e-01 2.8312320769893828e-01 -1.2314303818391423e+00 + 7 3.4143518373063453e-01 -2.2645187306940717e-02 -2.5292229406165907e+00 + 8 1.1743540824610306e+00 -4.8863162985819381e-01 -6.3783828665914544e-01 + 9 1.3800508883119953e+00 -2.5274565574966718e-01 2.8353436538531862e-01 + 10 2.0510726036138696e+00 -1.4604027090169940e+00 -9.8323554549077119e-01 + 11 1.7878052123899795e+00 -1.9921835931655048e+00 -1.8890566136917575e+00 + 12 3.0063000763288268e+00 -4.9013323763786637e-01 -1.6231890083142151e+00 + 13 4.0515352007695906e+00 -8.9202569828585798e-01 -1.6399995011867139e+00 + 14 2.6066952690925067e+00 -4.1788633645045253e-01 -2.6633949696742012e+00 + 15 2.9695337662435719e+00 5.5423141568538492e-01 -1.2342076641871542e+00 + 16 2.6747001492056977e+00 -2.4124097322472577e+00 -2.3429048072365732e-02 + 17 2.2153591049025310e+00 -2.0897997214660862e+00 1.1963106355359285e+00 + 18 2.1369701704115056e+00 3.0158507413628213e+00 -3.5179348337213843e+00 + 19 1.5355837136087338e+00 2.6255292355375399e+00 -4.2353987779878857e+00 + 20 2.7727573005678758e+00 3.6923910449610102e+00 -3.9330842459133470e+00 + 21 4.9040128073205524e+00 -4.0752348172959030e+00 -3.6210314709893159e+00 + 22 4.3582355554440877e+00 -4.2126119427287101e+00 -4.4612844196314150e+00 + 23 5.7439382849307670e+00 -3.5821957939275060e+00 -3.8766361295935892e+00 + 24 2.0689243582422914e+00 3.1513346907271247e+00 3.1550389754829422e+00 + 25 1.3045351331492516e+00 3.2665125705842941e+00 2.5111855257434352e+00 + 26 2.5809237402711318e+00 4.0117602605482858e+00 3.2212060529089945e+00 + 27 -1.9611343130357277e+00 -4.3563411931359841e+00 2.1098293115523528e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433952e+00 + 29 -1.3126000191359812e+00 -3.5962518039482934e+00 2.2746342468737817e+00 +run_vel: ! |2 + 1 8.1685220941861477e-03 1.6512578512542727e-02 4.7892799147935001e-03 + 2 5.4427456394786321e-03 5.1693257879352533e-03 -1.4414043022813649e-03 + 3 -8.2272458036248362e-03 -1.2923813188884230e-02 -4.0970749471144546e-03 + 4 -3.7660861920462349e-03 -6.5659911420830365e-03 -1.1120922532834726e-03 + 5 -1.1012635909013241e-02 -9.8847866026321157e-03 -2.8391869073674538e-03 + 6 -3.9665990411620729e-02 4.6803722380071487e-02 3.7135522426802389e-02 + 7 9.1016763589152859e-04 -1.0126055720737583e-02 -5.1556610019025714e-02 + 8 7.9043585267658430e-03 -3.3496064544244345e-03 3.4549326598010660e-02 + 9 1.5620907286754389e-03 3.7378245105921431e-03 1.5036774253075934e-02 + 10 2.9193799040059056e-02 -2.9242248165535563e-02 -1.5014281912567770e-02 + 11 -4.7797459644718264e-03 -3.7436196398511232e-03 -2.3410499103477603e-03 + 12 2.2686069875175316e-03 -3.4732729502899497e-04 -3.0627334265471650e-03 + 13 2.7456854188010020e-03 5.8081889921879817e-03 -7.9308949311655092e-04 + 14 3.5223319737918667e-03 -5.7842699330258648e-03 -3.9396805101296825e-03 + 15 -1.8475459117759364e-03 -5.8469790281561471e-03 6.2849983323582511e-03 + 16 1.8676069228413028e-02 -1.3258381729410438e-02 -4.5625616778429308e-02 + 17 -1.2893668780819389e-02 9.7505325833410258e-03 3.7288200735675299e-02 + 18 -8.0065794869105819e-04 -8.6270473288011819e-04 -1.4483040693746142e-03 + 19 1.2452390836051499e-03 -2.5061097119616180e-03 7.2998631010316650e-03 + 20 3.5930060229538143e-03 3.6938860309035470e-03 3.2322732687958995e-03 + 21 -1.4689220366910704e-03 -2.7352129796142532e-04 7.0581624168175334e-04 + 22 -7.0694199254520765e-03 -4.2577148925037030e-03 2.8079117611209205e-04 + 23 6.0446963117617505e-03 -1.4000131614895336e-03 2.5819754846773601e-03 + 24 3.1926367911308686e-04 -9.9445664741642462e-04 1.4999996978363057e-04 + 25 1.3789754526895179e-04 -4.4335894884219599e-03 -8.1808136698604454e-04 + 26 2.0485904035342870e-03 2.7813358633902757e-03 4.3245727149365584e-03 + 27 4.5604120291626942e-04 -1.0305523027244966e-03 2.1188058375789067e-04 + 28 -6.2544520861839200e-03 1.4127711176141612e-03 -1.8429821884806304e-03 + 29 6.4110631535703737e-04 3.1273432719578029e-03 3.7253671105604122e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml b/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml new file mode 100644 index 0000000000..382cc2cc73 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml @@ -0,0 +1,77 @@ +--- +lammps_version: 17 Feb 2022 +date_generated: Fri Mar 18 22:18:01 2022 +epsilon: 5e-14 +skip_tests: kokkos_omp +prerequisites: ! | + atom full + fix spring/self +pre_commands: ! | + variable kvar equal 10.0 +post_commands: ! | + fix move all nve + fix test solute spring/self v_kvar xyz +input_file: in.fourmol +natoms: 29 +global_scalar: 0.12623705370750438 +run_pos: ! |2 + 1 -2.7045669792379945e-01 2.4912140072031601e+00 -1.6695897908630153e-01 + 2 3.1003572362014503e-01 2.9612290242130319e+00 -8.5466689099875615e-01 + 3 -7.0398410505917419e-01 1.2305522448803927e+00 -6.2777452858703953e-01 + 4 -1.5818137373512378e+00 1.4837442978868092e+00 -1.2538665277734968e+00 + 5 -9.0719266809604937e-01 9.2652365891304722e-01 3.9954299416556610e-01 + 6 2.4832271436421161e-01 2.8312320769893828e-01 -1.2314303818391423e+00 + 7 3.4143518373063453e-01 -2.2645187306940717e-02 -2.5292229406165907e+00 + 8 1.1743540824610306e+00 -4.8863162985819381e-01 -6.3783828665914544e-01 + 9 1.3800508883119953e+00 -2.5274565574966718e-01 2.8353436538531862e-01 + 10 2.0510726036138696e+00 -1.4604027090169940e+00 -9.8323554549077119e-01 + 11 1.7878052123899795e+00 -1.9921835931655048e+00 -1.8890566136917575e+00 + 12 3.0063000763288268e+00 -4.9013323763786637e-01 -1.6231890083142151e+00 + 13 4.0515352007695906e+00 -8.9202569828585798e-01 -1.6399995011867139e+00 + 14 2.6066952690925067e+00 -4.1788633645045253e-01 -2.6633949696742012e+00 + 15 2.9695337662435719e+00 5.5423141568538492e-01 -1.2342076641871542e+00 + 16 2.6747001492056977e+00 -2.4124097322472577e+00 -2.3429048072365732e-02 + 17 2.2153591049025310e+00 -2.0897997214660862e+00 1.1963106355359285e+00 + 18 2.1369701704115056e+00 3.0158507413628213e+00 -3.5179348337213843e+00 + 19 1.5355837136087338e+00 2.6255292355375399e+00 -4.2353987779878857e+00 + 20 2.7727573005678758e+00 3.6923910449610102e+00 -3.9330842459133470e+00 + 21 4.9040128073205524e+00 -4.0752348172959030e+00 -3.6210314709893159e+00 + 22 4.3582355554440877e+00 -4.2126119427287101e+00 -4.4612844196314150e+00 + 23 5.7439382849307670e+00 -3.5821957939275060e+00 -3.8766361295935892e+00 + 24 2.0689243582422914e+00 3.1513346907271247e+00 3.1550389754829422e+00 + 25 1.3045351331492516e+00 3.2665125705842941e+00 2.5111855257434352e+00 + 26 2.5809237402711318e+00 4.0117602605482858e+00 3.2212060529089945e+00 + 27 -1.9611343130357277e+00 -4.3563411931359841e+00 2.1098293115523528e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433952e+00 + 29 -1.3126000191359812e+00 -3.5962518039482934e+00 2.2746342468737817e+00 +run_vel: ! |2 + 1 8.1685220941861477e-03 1.6512578512542727e-02 4.7892799147935001e-03 + 2 5.4427456394786321e-03 5.1693257879352533e-03 -1.4414043022813649e-03 + 3 -8.2272458036248362e-03 -1.2923813188884230e-02 -4.0970749471144546e-03 + 4 -3.7660861920462349e-03 -6.5659911420830365e-03 -1.1120922532834726e-03 + 5 -1.1012635909013241e-02 -9.8847866026321157e-03 -2.8391869073674538e-03 + 6 -3.9665990411620729e-02 4.6803722380071487e-02 3.7135522426802389e-02 + 7 9.1016763589152859e-04 -1.0126055720737583e-02 -5.1556610019025714e-02 + 8 7.9043585267658430e-03 -3.3496064544244345e-03 3.4549326598010660e-02 + 9 1.5620907286754389e-03 3.7378245105921431e-03 1.5036774253075934e-02 + 10 2.9193799040059056e-02 -2.9242248165535563e-02 -1.5014281912567770e-02 + 11 -4.7797459644718264e-03 -3.7436196398511232e-03 -2.3410499103477603e-03 + 12 2.2686069875175316e-03 -3.4732729502899497e-04 -3.0627334265471650e-03 + 13 2.7456854188010020e-03 5.8081889921879817e-03 -7.9308949311655092e-04 + 14 3.5223319737918667e-03 -5.7842699330258648e-03 -3.9396805101296825e-03 + 15 -1.8475459117759364e-03 -5.8469790281561471e-03 6.2849983323582511e-03 + 16 1.8676069228413028e-02 -1.3258381729410438e-02 -4.5625616778429308e-02 + 17 -1.2893668780819389e-02 9.7505325833410258e-03 3.7288200735675299e-02 + 18 -8.0065794869105819e-04 -8.6270473288011819e-04 -1.4483040693746142e-03 + 19 1.2452390836051499e-03 -2.5061097119616180e-03 7.2998631010316650e-03 + 20 3.5930060229538143e-03 3.6938860309035470e-03 3.2322732687958995e-03 + 21 -1.4689220366910704e-03 -2.7352129796142532e-04 7.0581624168175334e-04 + 22 -7.0694199254520765e-03 -4.2577148925037030e-03 2.8079117611209205e-04 + 23 6.0446963117617505e-03 -1.4000131614895336e-03 2.5819754846773601e-03 + 24 3.1926367911308686e-04 -9.9445664741642462e-04 1.4999996978363057e-04 + 25 1.3789754526895179e-04 -4.4335894884219599e-03 -8.1808136698604454e-04 + 26 2.0485904035342870e-03 2.7813358633902757e-03 4.3245727149365584e-03 + 27 4.5604120291626942e-04 -1.0305523027244966e-03 2.1188058375789067e-04 + 28 -6.2544520861839200e-03 1.4127711176141612e-03 -1.8429821884806304e-03 + 29 6.4110631535703737e-04 3.1273432719578029e-03 3.7253671105604122e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml index a5eff76773..5431a8e0a8 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Fri Aug 2 23:56:34 2024 epsilon: 2e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml b/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml index ed9ac69350..56b9de4462 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml @@ -1,6 +1,5 @@ --- lammps_version: 17 Apr 2024 -tags: generated date_generated: Fri Jun 7 18:23:44 2024 epsilon: 4e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml similarity index 63% rename from unittest/force-styles/tests/fix-timestep-wall_region_harmonic_const.yaml rename to unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml index 5ae9df3a06..46455f68a0 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml @@ -1,6 +1,6 @@ --- -lammps_version: 3 Aug 2022 -date_generated: Mon Aug 15 01:14:02 2022 +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 12:59:45 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | @@ -51,33 +51,33 @@ run_pos: ! |2 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 run_vel: ! |2 - 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913212e-03 - 2 5.4501493445687828e-03 5.1791699408496412e-03 -1.4372931530376577e-03 - 3 -8.2298292722385591e-03 -1.2926551614621364e-02 -4.0984181178163734e-03 - 4 -3.7699042590093506e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 - 5 -1.1021961004346575e-02 -9.8906780939336039e-03 -2.8410737829284390e-03 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 - 7 9.1033953013898753e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 - 8 7.9064712058855742e-03 -3.3507254552631585e-03 3.4557098492564643e-02 - 9 1.5644176117320938e-03 3.7365546102722208e-03 1.5047408822037651e-02 - 10 2.9201446820573192e-02 -2.9249578745486140e-02 -1.5018077424322544e-02 - 11 -4.7835961513517542e-03 -3.7481385134185211e-03 -2.3464104142290089e-03 - 12 2.2696451841920672e-03 -3.4774154398129641e-04 -3.0640770327796966e-03 - 13 2.7531740451953164e-03 5.8171061612840502e-03 -7.9467454022160669e-04 - 14 3.5246182371994205e-03 -5.7939995585585538e-03 -3.9478431172751361e-03 - 15 -1.8547943640122950e-03 -5.8554729942777778e-03 6.2938485140538675e-03 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 - 18 -8.0065795274987550e-04 -8.6270473974390637e-04 -1.4483040536385791e-03 - 19 1.2452390067376827e-03 -2.5061097800836321e-03 7.2998639311871857e-03 - 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194727e-03 - 21 -1.4689219756961610e-03 -2.7352107824530291e-04 7.0581625180892197e-04 - 22 -7.0694199165145105e-03 -4.2577148692717545e-03 2.8079117911323598e-04 - 23 6.0446963236685230e-03 -1.4000131545098772e-03 2.5819754799379716e-03 - 24 3.1926368451268083e-04 -9.9445664487428820e-04 1.4999960207062409e-04 - 25 1.3789752933078488e-04 -4.4335894831520756e-03 -8.1808138106080120e-04 - 26 2.0485904023409989e-03 2.7813358660936129e-03 4.3245726853349256e-03 - 27 4.5604120293369840e-04 -1.0305523026921111e-03 2.1188058381358413e-04 - 28 -6.2544520861855151e-03 1.4127711176146879e-03 -1.8429821884794260e-03 - 29 6.4110631534402174e-04 3.1273432719593824e-03 3.7253671105656736e-03 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 ... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml new file mode 100644 index 0000000000..7b900ce6a3 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml @@ -0,0 +1,83 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 13:00:12 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj1043 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml new file mode 100644 index 0000000000..2dcb9eccbb --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml @@ -0,0 +1,83 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 12:59:56 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj126 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml new file mode 100644 index 0000000000..cc74769cb2 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml @@ -0,0 +1,83 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 13:00:05 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj93 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml new file mode 100644 index 0000000000..6bb6e8ce40 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml @@ -0,0 +1,83 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Tue Oct 1 13:00:18 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box morse 1.0 1.0 1.5 3.0 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml new file mode 100644 index 0000000000..cf7348194e --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml @@ -0,0 +1,83 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Mon Oct 7 17:02:09 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region 1 sphere 0 0 0 10 + fix test solute wall/region 1 lj93 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/gauss_exp.txt b/unittest/force-styles/tests/gauss_exp.txt new file mode 100644 index 0000000000..30c637d124 --- /dev/null +++ b/unittest/force-styles/tests/gauss_exp.txt @@ -0,0 +1,6 @@ +# Gaussian orbital exponents (required for fix qtpie/reaxff) taken from Table 2.2 +# of Chen, J. (2009). Theory and applications of fluctuating-charge models. +# The units of the exponents are 1 / (Bohr radius)^2 . +1 0.5434 # H +2 0.2069 # C +3 0.2240 # O diff --git a/unittest/force-styles/tests/in.cmap b/unittest/force-styles/tests/in.cmap new file mode 100644 index 0000000000..6a731ea759 --- /dev/null +++ b/unittest/force-styles/tests/in.cmap @@ -0,0 +1,33 @@ +variable newton_pair index on +variable newton_bond index on +variable bond_factor index 0.10 +variable angle_factor index 0.25 +variable dihedral_factor index 0.50 +variable units index real +variable input_dir index . +variable data_file index ${input_dir}/data.cmap +variable pair_style index 'zero 8.0' +variable bond_style index zero +variable angle_style index zero +variable dihedral_style index zero +variable improper_style index zero +variable t_target index 100.0 + +atom_style full +atom_modify map array +neigh_modify delay 2 every 2 check no +units ${units} +timestep 0.1 +newton ${newton_pair} ${newton_bond} +special_bonds lj/coul ${bond_factor} ${angle_factor} ${dihedral_factor} + +pair_style ${pair_style} +bond_style ${bond_style} +angle_style ${angle_style} +dihedral_style ${dihedral_style} +improper_style ${improper_style} + +fix cmap all cmap charmm36.cmap +fix_modify cmap energy yes + +read_data ${data_file} fix cmap crossterm CMAP diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp index 921d469e31..f41584876b 100644 --- a/unittest/formats/test_atom_styles.cpp +++ b/unittest/formats/test_atom_styles.cpp @@ -347,8 +347,8 @@ void ASSERT_ATOM_STATE_EQ(Atom *atom, const AtomState &expected) ASSERT_ARRAY_ALLOCATED(atom->x, expected.has_x); ASSERT_ARRAY_ALLOCATED(atom->v, expected.has_v); ASSERT_ARRAY_ALLOCATED(atom->f, expected.has_f); - ASSERT_ARRAY_ALLOCATED(atom->q, expected.q_flag); ASSERT_ARRAY_ALLOCATED(atom->mu, expected.mu_flag); + ASSERT_ARRAY_ALLOCATED(atom->q, expected.q_flag); ASSERT_ARRAY_ALLOCATED(atom->omega, expected.omega_flag); ASSERT_ARRAY_ALLOCATED(atom->angmom, expected.angmom_flag);