Merge pull request #4330 from akohlmey/rheo-gsl-to-lapack

Convert RHEO package to use plain LAPACK instead of GSL
This commit is contained in:
Axel Kohlmeyer
2024-09-21 00:00:13 -04:00
committed by GitHub
22 changed files with 327 additions and 76 deletions

View File

@ -27,9 +27,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache \ sudo apt-get install -y ccache \
libeigen3-dev \ libeigen3-dev \
libgsl-dev \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
mold \ mold \
mpi-default-bin \ mpi-default-bin \

View File

@ -30,8 +30,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache ninja-build libeigen3-dev \ sudo apt-get install -y ccache ninja-build libeigen3-dev \
libgsl-dev libcurl4-openssl-dev python3-dev \ libcurl4-openssl-dev python3-dev \
mpi-default-bin mpi-default-dev mpi-default-bin mpi-default-dev
- name: Create Build Environment - name: Create Build Environment

View File

@ -34,8 +34,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache ninja-build libeigen3-dev \ sudo apt-get install -y ccache ninja-build libeigen3-dev \
libgsl-dev libcurl4-openssl-dev python3-dev \ libcurl4-openssl-dev python3-dev \
mpi-default-bin mpi-default-dev mpi-default-bin mpi-default-dev
- name: Create Build Environment - name: Create Build Environment

View File

@ -31,9 +31,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache \ sudo apt-get install -y ccache \
libeigen3-dev \ libeigen3-dev \
libgsl-dev \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
mold \ mold \
ninja-build \ ninja-build \

View File

@ -497,7 +497,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA
PROPERTIES COMPILE_OPTIONS "-std=c++14") PROPERTIES COMPILE_OPTIONS "-std=c++14")
endif() endif()
if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS)
enable_language(C) enable_language(C)
if (NOT USE_INTERNAL_LINALG) if (NOT USE_INTERNAL_LINALG)
find_package(LAPACK) find_package(LAPACK)
@ -572,7 +572,7 @@ else()
endif() endif()
foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF
PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON RHEO EXTRA-COMMAND) PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON EXTRA-COMMAND)
if(PKG_${PKG_WITH_INCL}) if(PKG_${PKG_WITH_INCL})
include(Packages/${PKG_WITH_INCL}) include(Packages/${PKG_WITH_INCL})
endif() endif()

View File

@ -1,2 +0,0 @@
find_package(GSL 2.6 REQUIRED)
target_link_libraries(lammps PRIVATE GSL::gsl)

View File

@ -67,6 +67,7 @@ set(WIN_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SMTBQ SMTBQ

View File

@ -60,6 +60,7 @@ set(ALL_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SPH SPH

View File

@ -60,6 +60,7 @@ set(WIN_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SMTBQ SMTBQ

View File

@ -2251,28 +2251,38 @@ verified to work in February 2020 with Quantum Espresso versions 6.3 to
RHEO package RHEO package
------------ ------------
To build with this package you must have the `GNU Scientific Library This package depends on the BPM package.
(GSL) <https://www.gnu.org/software/gsl/>` installed in locations that
are accessible in your environment. The GSL library should be at least
version 2.7.
.. tabs:: .. tabs::
.. tab:: CMake build .. tab:: CMake build
If CMake cannot find the GSL library or include files, you can set:
.. code-block:: bash .. code-block:: bash
-D GSL_ROOT_DIR=path # path to root of GSL installation -D PKG_RHEO=yes # enable the package itself
-D PKG_BPM=yes # the RHEO package requires BPM
-D USE_INTERNAL_LINALG=value # prefer internal LAPACK if true
Some features in the RHEO package are dependent on code in the BPM
package so the latter one *must* be enabled as well.
The RHEO package also requires LAPACK (and BLAS) and CMake
can identify their locations and pass that info to the RHEO
build script. But on some systems this may cause problems when
linking or the dependency is not desired. By using the setting
``-D USE_INTERNAL_LINALG=yes`` when running the CMake
configuration, you will select compiling and linking the bundled
linear algebra library and work around the limitations.
.. tab:: Traditional make .. tab:: Traditional make
LAMMPS will try to auto-detect the GSL compiler and linker flags The RHEO package requires LAPACK (and BLAS) which can be either
from the corresponding ``pkg-config`` file (``gsl.pc``), otherwise a system provided library or the bundled "linalg" library. This
you can edit the file ``lib/rheo/Makefile.lammps`` is a subset of LAPACK translated to C++. For that, one of the
to specify the paths and library names where indicated by comments. provided ``Makefile.lammps.<config>`` files needs to be copied
This must be done **before** the package is installed. to ``Makefile.lammps`` and edited as needed. The default file
uses the bundled "linalg" library, which can be built by
``make lib-linalg args='-m serial'`` in the ``src`` folder.
---------- ----------

77
lib/linalg/dlauu2.cpp Normal file
View File

@ -0,0 +1,77 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static doublereal c_b7 = 1.;
static integer c__1 = 1;
int dlauu2_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len)
{
integer a_dim1, a_offset, i__1, i__2, i__3;
integer i__;
doublereal aii;
extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *);
extern int dscal_(integer *, doublereal *, doublereal *, integer *);
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen);
logical upper;
extern int xerbla_(char *, integer *, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
*info = 0;
upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1);
if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAUU2", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
if (upper) {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
aii = a[i__ + i__ * a_dim1];
if (i__ < *n) {
i__2 = *n - i__ + 1;
a[i__ + i__ * a_dim1] =
ddot_(&i__2, &a[i__ + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda);
i__2 = i__ - 1;
i__3 = *n - i__;
dgemv_((char *)"No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) * a_dim1 + 1], lda,
&a[i__ + (i__ + 1) * a_dim1], lda, &aii, &a[i__ * a_dim1 + 1], &c__1,
(ftnlen)12);
} else {
dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
}
}
} else {
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
aii = a[i__ + i__ * a_dim1];
if (i__ < *n) {
i__2 = *n - i__ + 1;
a[i__ + i__ * a_dim1] =
ddot_(&i__2, &a[i__ + i__ * a_dim1], &c__1, &a[i__ + i__ * a_dim1], &c__1);
i__2 = *n - i__;
i__3 = i__ - 1;
dgemv_((char *)"Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1], lda,
&a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__ + a_dim1], lda, (ftnlen)9);
} else {
dscal_(&i__, &aii, &a[i__ + a_dim1], lda);
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

101
lib/linalg/dlauum.cpp Normal file
View File

@ -0,0 +1,101 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
static integer c__1 = 1;
static integer c_n1 = -1;
static doublereal c_b15 = 1.;
int dlauum_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len)
{
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
integer i__, ib, nb;
extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, doublereal *, doublereal *, integer *,
ftnlen, ftnlen);
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen,
ftnlen);
logical upper;
extern int dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *,
doublereal *, doublereal *, integer *, ftnlen, ftnlen),
dlauu2_(char *, integer *, doublereal *, integer *, integer *, ftnlen),
xerbla_(char *, integer *, ftnlen);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *,
ftnlen, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
*info = 0;
upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1);
if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DLAUUM", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
nb = ilaenv_(&c__1, (char *)"DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
if (nb <= 1 || nb >= *n) {
dlauu2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1);
} else {
if (upper) {
i__1 = *n;
i__2 = nb;
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
i__3 = nb, i__4 = *n - i__ + 1;
ib = min(i__3, i__4);
i__3 = i__ - 1;
dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", &i__3, &ib, &c_b15,
&a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5,
(ftnlen)9, (ftnlen)8);
dlauu2_((char *)"Upper", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5);
if (i__ + ib <= *n) {
i__3 = i__ - 1;
i__4 = *n - i__ - ib + 1;
dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &ib, &i__4, &c_b15,
&a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda,
&c_b15, &a[i__ * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)9);
i__3 = *n - i__ - ib + 1;
dsyrk_((char *)"Upper", (char *)"No transpose", &ib, &i__3, &c_b15,
&a[i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1], lda,
(ftnlen)5, (ftnlen)12);
}
}
} else {
i__2 = *n;
i__1 = nb;
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
i__3 = nb, i__4 = *n - i__ + 1;
ib = min(i__3, i__4);
i__3 = i__ - 1;
dtrmm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Non-unit", &ib, &i__3, &c_b15,
&a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda, (ftnlen)4, (ftnlen)5,
(ftnlen)9, (ftnlen)8);
dlauu2_((char *)"Lower", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5);
if (i__ + ib <= *n) {
i__3 = i__ - 1;
i__4 = *n - i__ - ib + 1;
dgemm_((char *)"Transpose", (char *)"No transpose", &ib, &i__3, &i__4, &c_b15,
&a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b15,
&a[i__ + a_dim1], lda, (ftnlen)9, (ftnlen)12);
i__3 = *n - i__ - ib + 1;
dsyrk_((char *)"Lower", (char *)"Transpose", &ib, &i__3, &c_b15, &a[i__ + ib + i__ * a_dim1],
lda, &c_b15, &a[i__ + i__ * a_dim1], lda, (ftnlen)5, (ftnlen)9);
}
}
}
}
return 0;
}
#ifdef __cplusplus
}
#endif

40
lib/linalg/dpotri.cpp Normal file
View File

@ -0,0 +1,40 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "lmp_f2c.h"
int dpotri_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len)
{
integer a_dim1, a_offset, i__1;
extern logical lsame_(char *, char *, ftnlen, ftnlen);
extern int xerbla_(char *, integer *, ftnlen),
dlauum_(char *, integer *, doublereal *, integer *, integer *, ftnlen),
dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen, ftnlen);
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
*info = 0;
if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1, *n)) {
*info = -4;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_((char *)"DPOTRI", &i__1, (ftnlen)6);
return 0;
}
if (*n == 0) {
return 0;
}
dtrtri_(uplo, (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)8);
if (*info > 0) {
return 0;
}
dlauum_(uplo, n, &a[a_offset], lda, info, (ftnlen)1);
return 0;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,14 +1,5 @@
# Settings that the LAMMPS build will import when this package is installed # Settings that the LAMMPS build will import when this package library is used
ifeq ($(strip $(shell pkg-config --version)),) rheo_SYSINC =
# manual configuration w/o pkg-config/pkgconf rheo_SYSLIB = -llinalg
# change this to -I/path/to/your/lib/gsl/include/ rheo_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)
rheo_SYSINC = -I../../lib/rheo/gsl/include/
# change this to -L/path/to/your/lib/gsl/lib/
rheo_SYSLIB = -L../../lib/rheo/gsl/lib/ -lgsl -lgslcblas
else
# autodetect GSL settings from pkg-config/pkgconf
rheo_SYSINC = $(shell pkg-config --cflags gsl)
rheo_SYSLIB = $(shell pkg-config --libs gsl)
endif

View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
rheo_SYSINC =
rheo_SYSLIB =
rheo_SYSPATH =

View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
rheo_SYSINC =
rheo_SYSLIB = -lblas -llapack
rheo_SYSPATH =

View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
rheo_SYSINC =
rheo_SYSLIB = -llinalg
rheo_SYSPATH = -L../../lib/linalg$(LIBOBJDIR)

View File

@ -1,7 +1,5 @@
This directory has a Makefile.lammps file with settings that allows LAMMPS to This directory has multiple Makefile.lammps variant files with settings that
dynamically link to the GSL library. This is required to use the RHEO package allows LAMMPS to link with a BLAS/LAPACK or compatible library or the bundled
in a LAMMPS input script. If you have the pkg-config command available, it linalg library (which is subset of BLAS/LAPACK). Copy the suitable file
will automatically import the GSL settings. Otherwise they will have to be to Makefile.lammps and edit, if needed.
added manually. This is required to use the RHEO package in a LAMMPS input script.
See the header of Makefile.lammps for more info.

View File

@ -47,6 +47,7 @@ if (test $1 = 1) then
sed -i -e 's/[^ \t]*rheo[^ \t]* //' ../Makefile.package sed -i -e 's/[^ \t]*rheo[^ \t]* //' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(rheo_SYSINC) |' ../Makefile.package sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(rheo_SYSINC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(rheo_SYSLIB) |' ../Makefile.package sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(rheo_SYSLIB) |' ../Makefile.package
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(rheo_SYSPATH) |' ../Makefile.package
fi fi
if (test -e ../Makefile.package.settings) then if (test -e ../Makefile.package.settings) then

View File

@ -3,8 +3,9 @@ multiphase fluid systems. The authors include Joel Clemmer (Sandia), Thomas
O'Connor (Carnegie Mellon), and Eric Palermo (Carnegie Mellon). O'Connor (Carnegie Mellon), and Eric Palermo (Carnegie Mellon).
Bond style rheo/shell, compute style rheo/property/atom, and fix style Bond style rheo/shell, compute style rheo/property/atom, and fix style
rheo/temperature all depend on the BPM package. rheo/temperature depend on the BPM package, so it is required to install
the BPM package with RHEO.
This package requires the GNU scientific library (GSL). We recommend version This package requires the BLAS/LAPACK. This can be either a seperate installation
2.7 or later. To build this package, one must first separately install GSL in or you can use the bundled "linalg" library. Please see the LAMMPS manual at
a location that can be found by your environment. https://docs.lammps.org/Build_extras.html#rheo for details.

View File

@ -37,10 +37,6 @@
#include "utils.h" #include "utils.h"
#include <cmath> #include <cmath>
#include <gsl/gsl_cblas.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_matrix.h>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace RHEO_NS; using namespace RHEO_NS;
@ -50,6 +46,13 @@ using namespace MathExtra;
// max value of Mdim 1 + dim + dim * (dim + 1) / 2 with dim = 3 // max value of Mdim 1 + dim + dim * (dim + 1) / 2 with dim = 3
static constexpr int MAX_MDIM = 12; static constexpr int MAX_MDIM = 12;
// declare LAPACK functions
extern "C" {
void dpotrf_(const char *uplo, const int *n, double *a, const int *lda, int *info);
void dpotri_(const char *uplo, const int *n, double *a, const int *lda, int *info);
}
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ComputeRHEOKernel::ComputeRHEOKernel(LAMMPS *lmp, int narg, char **arg) : ComputeRHEOKernel::ComputeRHEOKernel(LAMMPS *lmp, int narg, char **arg) :
@ -89,7 +92,7 @@ ComputeRHEOKernel::ComputeRHEOKernel(LAMMPS *lmp, int narg, char **arg) :
comm_forward_save = comm_forward; comm_forward_save = comm_forward;
corrections_calculated = 0; corrections_calculated = 0;
gsl_error_flag = 0; lapack_error_flag = 0;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -156,9 +159,9 @@ void ComputeRHEOKernel::init_list(int /*id*/, NeighList *ptr)
int ComputeRHEOKernel::check_corrections(int i) int ComputeRHEOKernel::check_corrections(int i)
{ {
// Skip if there were gsl errors for this atom // Skip if there were lapack errors for this atom
if (gsl_error_flag) if (lapack_error_flag)
if (gsl_error_tags.find(atom->tag[i]) != gsl_error_tags.end()) return 0; if (lapack_error_tags.find(atom->tag[i]) != lapack_error_tags.end()) return 0;
// Skip if undercoordinated // Skip if undercoordinated
if (coordination[i] < zmin) return 0; if (coordination[i] < zmin) return 0;
@ -558,19 +561,15 @@ void ComputeRHEOKernel::calc_dw_rk2(int i, double delx, double dely, double delz
void ComputeRHEOKernel::compute_peratom() void ComputeRHEOKernel::compute_peratom()
{ {
gsl_error_flag = 0; lapack_error_flag = 0;
gsl_error_tags.clear(); lapack_error_tags.clear();
if (kernel_style == QUINTIC) return; if (kernel_style == QUINTIC) return;
corrections_calculated = 1; corrections_calculated = 1;
int i, j, ii, jj, inum, jnum, a, b, gsl_error; int i, j, ii, jj, inum, jnum, a, b, lapack_error;
double xtmp, ytmp, ztmp, r, rsq, w, vj, rhoj; double xtmp, ytmp, ztmp, r, rsq, w, vj, rhoj;
double dx[3]; double dx[3];
gsl_matrix_view gM;
// Turn off GSL error handler, revert RK to Quintic when insufficient neighbors
gsl_set_error_handler_off();
double **x = atom->x; double **x = atom->x;
int *type = atom->type; int *type = atom->type;
@ -633,7 +632,7 @@ void ComputeRHEOKernel::compute_peratom()
} }
} else if (correction_order > 0) { } else if (correction_order > 0) {
// Moment matrix M and polynomial basis vector cut (1d for gsl compatibility) // Moment matrix M and polynomial basis vector cut (1d for LAPACK compatibility)
double H[MAX_MDIM], M[MAX_MDIM * MAX_MDIM]; double H[MAX_MDIM], M[MAX_MDIM * MAX_MDIM];
for (ii = 0; ii < inum; ii++) { for (ii = 0; ii < inum; ii++) {
@ -647,7 +646,9 @@ void ComputeRHEOKernel::compute_peratom()
// Zero upper-triangle M and cut (will be symmetric): // Zero upper-triangle M and cut (will be symmetric):
for (a = 0; a < Mdim; a++) { for (a = 0; a < Mdim; a++) {
for (b = a; b < Mdim; b++) { M[a * Mdim + b] = 0; } for (b = a; b < Mdim; b++) {
M[a * Mdim + b] = 0;
}
} }
for (jj = 0; jj < jnum; jj++) { for (jj = 0; jj < jnum; jj++) {
@ -700,37 +701,50 @@ void ComputeRHEOKernel::compute_peratom()
// Populate the upper triangle // Populate the upper triangle
for (a = 0; a < Mdim; a++) { for (a = 0; a < Mdim; a++) {
for (b = a; b < Mdim; b++) { M[a * Mdim + b] += H[a] * H[b] * w * vj; } for (b = a; b < Mdim; b++) {
M[a * Mdim + b] += H[a] * H[b] * w * vj;
}
} }
} }
} }
// Populate the lower triangle from the symmetric entries of M: // Populate the lower triangle from the symmetric entries of M:
for (a = 0; a < Mdim; a++) { for (a = 0; a < Mdim; a++) {
for (b = a; b < Mdim; b++) { M[b * Mdim + a] = M[a * Mdim + b]; } for (b = a; b < Mdim; b++) {
M[b * Mdim + a] = M[a * Mdim + b];
}
} }
// Skip if undercoordinated // Skip if undercoordinated
if (coordination[i] < zmin) continue; if (coordination[i] < zmin) continue;
// Use gsl to get Minv, use Cholesky decomposition since the // Use LAPACK to get Minv, use Cholesky decomposition since the
// polynomials are independent, M is symmetrix & positive-definite // polynomials are independent, M is symmetrix & positive-definite
gM = gsl_matrix_view_array(M, Mdim, Mdim); const char uplo = 'U';
gsl_error = gsl_linalg_cholesky_decomp1(&gM.matrix); dpotrf_(&uplo, &Mdim, M, &Mdim, &lapack_error);
if (gsl_error) { if (lapack_error) {
//Revert to uncorrected SPH for this particle // Revert to uncorrected SPH for this particle
gsl_error_flag = 1; lapack_error_flag = 1;
gsl_error_tags.insert(tag[i]); lapack_error_tags.insert(tag[i]);
//check if not positive-definite // check if not positive-definite
if (gsl_error != GSL_EDOM) if (lapack_error > 0)
error->warning(FLERR, "Failed decomposition in rheo/kernel, gsl_error = {}", gsl_error); error->warning(FLERR, "Failed DPOTRF2 decomposition in rheo/kernel, info = {}",
lapack_error);
continue; continue;
} }
gsl_linalg_cholesky_invert(&gM.matrix); //M is now M^-1 // M is now M^-1
dpotri_(&uplo, &Mdim, M, &Mdim, &lapack_error);
// make result matrix symmetric
for (int i = 0; i < Mdim; ++i) {
for (int j = i+1; j < Mdim; ++j) {
M[i * Mdim + j] = M[j * Mdim + i];
}
}
// Correction coefficients are columns of M^-1 multiplied by an appropriate coefficient // Correction coefficients are columns of M^-1 multiplied by an appropriate coefficient
// Solve the linear system several times to get coefficientns // Solve the linear system several times to get coefficientns

View File

@ -53,8 +53,8 @@ class ComputeRHEOKernel : public Compute {
private: private:
int comm_stage, comm_forward_save; int comm_stage, comm_forward_save;
int interface_flag; int interface_flag;
int gsl_error_flag; int lapack_error_flag;
std::unordered_set<tagint> gsl_error_tags; std::unordered_set<tagint> lapack_error_tags;
int corrections_calculated; int corrections_calculated;
int kernel_style, zmin, dim, Mdim, ncor; int kernel_style, zmin, dim, Mdim, ncor;