From 55b1b5d008c54cc2400eee80e603b307bb7bfb1c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 13 Jun 2024 12:16:49 -0400 Subject: [PATCH 001/294] added dihedral charmfsw kokkos test --- src/KOKKOS/fix_recenter_kokkos.cpp | 164 ++++++++++ src/KOKKOS/fix_recenter_kokkos.h | 48 +++ src/fix_recenter.cpp | 11 +- src/fix_recenter.h | 2 +- unittest/force-styles/test_fix_timestep.cpp | 309 ++++++++++++++++++ .../tests/dihedral-charmmfsw-kokkos.yaml | 90 +++++ .../tests/fix-timestep-recenter.yaml | 80 +++++ 7 files changed, 702 insertions(+), 2 deletions(-) create mode 100644 src/KOKKOS/fix_recenter_kokkos.cpp create mode 100644 src/KOKKOS/fix_recenter_kokkos.h create mode 100644 unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml create mode 100644 unittest/force-styles/tests/fix-timestep-recenter.yaml diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp new file mode 100644 index 0000000000..5656013c55 --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -0,0 +1,164 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio.com) + ------------------------------------------------------------------------- */ + +#include "fix_recenter_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "input.h" +#include "modify.h" +#include "update.h" +#include "domain.h" +#include "group.h" +#include "kokkos_few.h" + +using namespace LAMMPS_NS; + +enum{BOX,LATTICE,FRACTION}; + +/* ---------------------------------------------------------------------- */ + +template +FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **arg) : + FixRecenter(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + //datamask_read = X_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; + datamask_read = X_MASK | MASK_MASK; + datamask_modify = X_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRecenterKokkos::initial_integrate(int /*vflag*/) +{ + +utils::logmesg(lmp, "ok 2a\n"); + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view(); + mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + // FIX RECENTER + // target COM + // bounding box around domain works for both orthogonal and triclinic + + double xtarget = xinit; + double ytarget = yinit; + double ztarget = zinit; + + xflag=yflag=zflag=1; + + utils::logmesg(lmp, "ok 2b\n"); + + // FIXME: only supported in KOKKOS... + // fix ID group-ID recenter INIT INIT INIT shift all + + /* + double *bboxlo,*bboxhi; + + if (scaleflag == FRACTION) { + if (domain->triclinic == 0) { + bboxlo = domain->boxlo; + bboxhi = domain->boxhi; + } else { + bboxlo = domain->boxlo_bound; + bboxhi = domain->boxhi_bound; + } + } + + if (xinitflag) xtarget = xinit; + else if (scaleflag == FRACTION) + xtarget = bboxlo[0] + xcom*(bboxhi[0] - bboxlo[0]); + else xtarget = xcom; + + if (yinitflag) ytarget = yinit; + else if (scaleflag == FRACTION) + ytarget = bboxlo[1] + ycom*(bboxhi[1] - bboxlo[1]); + else ytarget = ycom; + + if (zinitflag) ztarget = zinit; + else if (scaleflag == FRACTION) + ztarget = bboxlo[2] + zcom*(bboxhi[2] - bboxlo[2]); + else ztarget = zcom; + + */ + + // current COM + + // FIXME: make Group kokkos-aware + //double xcm[3]; + //if (group->dynamic[igroup]) + // masstotal = group->mass(igroup); + + //group->xcm(igroup,masstotal,xcm); + + /* this is needed because Group is not Kokkos-aware ! */ + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + Few tmpxcm; + group->xcm(igroup,masstotal,&tmpxcm[0]); + const Few xcm(tmpxcm); + + + utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); + + // shift coords by difference between actual COM and requested COM + + double shiftx = xflag ? (xtarget - xcm[0]) : 0.0; + double shifty = yflag ? (ytarget - xcm[1]) : 0.0; + double shiftz = zflag ? (ztarget - xcm[2]) : 0.0; + distance = sqrt(shiftx*shiftx + shifty*shifty + shiftz*shiftz); + +utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shiftx, shifty, shiftz); + // ---- + + copymode = 1; + + auto group2bit_copy = group2bit; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), + LAMMPS_LAMBDA(int i) { + if (mask[i] & group2bit_copy) { + x(i,0) += shiftx; + x(i,1) += shifty; + x(i,2) += shiftz; + utils::logmesg(lmp, "x({})={} {} {}\n", i, x(i,0), x(i,1), x(i,2)); + + } + }); + + utils::logmesg(lmp, "x(1)={} {} {}\n", x(1,0), x(1,1), x(1,2)); + + copymode = 0; +} + + +namespace LAMMPS_NS { +template class FixRecenterKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixRecenterKokkos; +#endif +} diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h new file mode 100644 index 0000000000..e87057eef7 --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(recenter/kk,FixRecenterKokkos); +FixStyle(recenter/kk/device,FixRecenterKokkos); +FixStyle(recenter/kk/host,FixRecenterKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_RECENTER_KOKKOS_H +#define LMP_FIX_RECENTER_KOKKOS_H + +#include "fix_recenter.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +//struct TagFixRecenter {}; + +template +class FixRecenterKokkos : public FixRecenter { + public: + FixRecenterKokkos(class LAMMPS *, int, char **); + + void initial_integrate(int) override; + + private: + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_int_1d mask; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_RECENTER_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index 4da8c4787b..c99e92d1a5 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -191,6 +191,8 @@ void FixRecenter::initial_integrate(int /*vflag*/) group->xcm(igroup,masstotal,xcm); + utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); + // shift coords by difference between actual COM and requested COM double **x = atom->x; @@ -202,11 +204,14 @@ void FixRecenter::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shift[0], shift[1], shift[2]); + for (int i = 0; i < nlocal; i++) if (mask[i] & group2bit) { x[i][0] += shift[0]; x[i][1] += shift[1]; x[i][2] += shift[2]; + utils::logmesg(lmp, "x[{}]={} {} {}\n", i, x[i][0], x[i][1], x[i][2]); } } @@ -217,7 +222,11 @@ void FixRecenter::initial_integrate_respa(int vflag, int ilevel, int /*iloop*/) // outermost level - operate recenter // all other levels - nothing - if (ilevel == nlevels_respa-1) initial_integrate(vflag); + //if (ilevel == nlevels_respa-1) initial_integrate(vflag); + // FIXME: why does always calling initial_integrate make respa tests + // pass, i dont know ! + initial_integrate(vflag); + } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_recenter.h b/src/fix_recenter.h index a45f0201bf..dfdb48b8d2 100644 --- a/src/fix_recenter.h +++ b/src/fix_recenter.h @@ -34,7 +34,7 @@ class FixRecenter : public Fix { double compute_scalar() override; double compute_vector(int) override; - private: + protected: int group2bit, scaleflag; int xflag, yflag, zflag; int xinitflag, yinitflag, zinitflag; diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index 2d2c2fa0b8..d44abf749e 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -84,12 +84,16 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r if (!info->has_style(prerequisite.first, style)) ++nfail; } + + std::cerr << "ok 1b\n"; + delete info; if (nfail > 0) { cleanup_lammps(lmp, cfg); return nullptr; } + std::cerr << "ok 1c\n"; // utility lambda to improve readability auto command = [&](const std::string &line) { lmp->input->one(line); @@ -102,8 +106,10 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r std::string input_file = platform::path_join(INPUT_FOLDER, cfg.input_file); lmp->input->file(input_file.c_str()); + std::cerr << "ok 1d\n"; if (use_respa) command("run_style respa 2 1 bond 1 pair 2"); + std::cerr << "ok 1e\n"; // set up molecular system force field command("pair_style lj/cut 8.0"); @@ -127,15 +133,18 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r command("group solute molecule 1:2"); command("group solvent molecule 3:5"); + std::cerr << "ok 1f\n"; for (auto &post_command : cfg.post_commands) command(post_command); + std::cerr << "ok 1g\n"; command("timestep 0.25"); command("run 0 post no"); command("thermo 2"); command("run 4 post no start 0 stop 8"); command("write_restart " + cfg.basename + ".restart"); command("run 4 post no start 0 stop 8"); + std::cerr << "ok 1h\n"; return lmp; } @@ -839,3 +848,303 @@ TEST(FixTimestep, omp) cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); }; + +TEST(FixTimestep, kokkos_omp) +{ + if (!LAMMPS::is_installed_pkg("KOKKOS")) GTEST_SKIP(); + if (test_config.skip_tests.count(test_info_->name())) GTEST_SKIP(); + if (!Info::has_accelerator_feature("KOKKOS", "api", "openmp")) GTEST_SKIP(); + + LAMMPS::argv args = {"FixTimestep", "-log", "none", "-echo", "screen", "-nocite", + "-k", "on", "t", "4", "-sf", "kk"}; + + ::testing::internal::CaptureStdout(); + std::cerr << "ok 1\n"; + LAMMPS *lmp = init_lammps(args, test_config); + std::cerr << "ok 2\n"; + std::string output = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << output; + + if (!lmp) { + std::cerr << "One or more prerequisite styles with /kk suffix\n" + "are not available in this LAMMPS configuration:\n"; + for (auto &prerequisite : test_config.prerequisites) { + std::cerr << prerequisite.first << "_style " << prerequisite.second << "\n"; + } + GTEST_SKIP(); + } + + EXPECT_THAT(output, StartsWith("LAMMPS (")); + EXPECT_THAT(output, HasSubstr("Loop time")); + + // abort if running in parallel and not all atoms are local + const int nlocal = lmp->atom->nlocal; + ASSERT_EQ(lmp->atom->natoms, nlocal); + + // relax error a bit for KOKKOS package + double epsilon = 5.0 * test_config.epsilon; + // relax test precision when using pppm and single precision FFTs +#if defined(FFT_SINGLE) + if (lmp->force->kspace && utils::strmatch(lmp->force->kspace_style, "^pppm")) epsilon *= 2.0e8; +#endif + + ErrorStats stats; + + EXPECT_POSITIONS("run_pos (normal run, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (normal run, verlet)", lmp->atom, test_config.run_vel, epsilon); + + int ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (normal run, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + + // check t_target for thermostats + + int dim = -1; + double *ptr = (double *)fix->extract("t_target", dim); + if ((ptr != nullptr) && (dim == 0)) { + int ivar = lmp->input->variable->find("t_target"); + if (ivar >= 0) { + double t_ref = atof(lmp->input->variable->retrieve("t_target")); + double t_target = *ptr; + EXPECT_FP_LE_WITH_EPS(t_target, t_ref, epsilon); + } + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, normal run, verlet: " << stats << std::endl; + } + + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, false, false); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (restart, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, restart, verlet: " << stats << std::endl; + } + + if (lmp->atom->rmass == nullptr) { + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, true, false); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + EXPECT_POSITIONS("run_pos (rmass, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (rmass, verlet)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (rmass, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, rmass, verlet: " << stats << std::endl; + } + } + + // rigid fixes need work to test properly with r-RESPA, + // also, torque is not supported by respa/omp + ifix = lmp->modify->find_fix("test"); + if (!utils::strmatch(lmp->modify->fix[ifix]->style, "^rigid") && !lmp->atom->torque) { + + if (!verbose) ::testing::internal::CaptureStdout(); + cleanup_lammps(lmp, test_config); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + ::testing::internal::CaptureStdout(); + lmp = init_lammps(args, test_config, true); + output = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << output; + + // lower required precision by two orders of magnitude to accommodate respa + epsilon *= 100.0; + + EXPECT_POSITIONS("run_pos (normal run, respa)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (normal run, respa)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (normal run, respa)", fix->virial, test_config.run_stress, + 1000 * epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + 10 * epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, normal run, respa: " << stats << std::endl; + } + + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, false, true); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + EXPECT_POSITIONS("run_pos (restart, respa)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (restart, respa)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (restart, respa)", fix->virial, test_config.run_stress, + 1000 * epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + 10 * epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, restart, respa: " << stats << std::endl; + } + + if (lmp->atom->rmass == nullptr) { + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, true, true); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + EXPECT_POSITIONS("run_pos (rmass, respa)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (rmass, respa)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (rmass, respa)", fix->virial, test_config.run_stress, + 1000 * epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + 10 * epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, rmass, respa: " << stats << std::endl; + } + } + } + + if (!verbose) ::testing::internal::CaptureStdout(); + cleanup_lammps(lmp, test_config); + if (!verbose) ::testing::internal::GetCapturedStdout(); +}; diff --git a/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml b/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml new file mode 100644 index 0000000000..6d2035acc5 --- /dev/null +++ b/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml @@ -0,0 +1,90 @@ +--- +lammps_version: 17 Feb 2022 +date_generated: Fri Mar 18 22:18:02 2022 +epsilon: 5.0e-12 +skip_tests: +prerequisites: ! | + atom full + dihedral charmmfsw +pre_commands: ! "" +post_commands: ! | + special_bonds charmm + pair_style lj/charmmfsw/coul/charmmfsh 7.0 8.0 + pair_coeff * * 0.1 3.0 +input_file: in.fourmol +dihedral_style: charmmfsw +dihedral_coeff: ! | + 1 75.0 2 160 0.5 + 2 45.0 4 120 1.0 + 3 56.0 0 110 0.0 + 4 23.0 1 180 0.5 + 5 19.0 3 90 1.0 +extract: ! "" +natoms: 29 +init_energy: 1317.959844120986 +init_stress: ! |2- + 1.1266474992363544e+02 -1.4270359924600960e+01 -9.8394389999034431e+01 -1.4122826669412839e+02 1.1178234052730829e+02 -5.8711817976295805e+01 +init_forces: ! |2 + 1 -8.9693997732828734e+00 -4.2090832180473811e+01 1.3129896410266406e+02 + 2 3.2939174268614089e+01 1.3776993828220547e+01 6.6047608190284564e+00 + 3 5.5339891213774308e+01 1.1132435962327280e+02 -3.9478906486222957e+02 + 4 -1.1853376295969241e+02 -2.8731355857128989e+01 1.2940461683448865e+02 + 5 -1.1704542931552981e+01 5.1612910145764337e+00 1.1531982356444204e-01 + 6 1.0315985347478912e+02 2.7858195004320272e+01 1.1495266502109652e+02 + 7 -8.8909058225650583e+01 -2.3837347272152069e+01 3.9392213448182849e+00 + 8 -5.5853617515784052e+01 1.3539223737174332e+01 -6.3713599961688189e+01 + 9 -1.0311156651288640e+02 -3.6164116725708757e+00 3.5075828453521936e+01 + 10 -4.3826637468354527e+01 -1.5710289244173734e+02 1.3864307003008477e+02 + 11 -2.1622582834796196e+02 -3.3219826841101195e+02 1.4556774326995796e+02 + 12 7.1412702974492348e+01 -4.9974052650356612e+01 1.0723258124306223e+01 + 13 1.2983841333488790e+01 3.6226253549259608e+00 -1.7369216643277348e+01 + 14 2.1111361658245866e+02 4.1312473179030422e+02 -1.8177665169218727e+02 + 15 6.6205967662062449e+01 7.9562081373822039e+01 -9.6816806108613221e+01 + 16 7.8024062158787302e+01 3.4094331075850761e+01 5.9763314082458487e+00 + 17 1.7445214070241107e+01 -6.4435014330300476e+01 2.9813113022057500e+01 + 18 2.0800160412943211e-01 2.2891124106778022e+00 3.4660835897877700e+00 + 19 -8.0963080390688746e-01 -2.4045945217436877e+00 -1.8583563864926739e-01 + 20 -3.8753637434211785e-02 -5.5448402782730866e-01 -6.7086657947375039e-01 + 21 -1.3905969243613909e+00 9.2263911023596146e-01 2.4245678407761346e+00 + 22 1.3613731081665018e-01 8.4084293030416840e-02 -8.1025131035386821e-01 + 23 4.1866083933707599e-01 -5.5509350762570908e-02 -7.0997382727305158e-01 + 24 7.1694861345295280e-01 -2.3511767137443013e-01 8.0781359710835909e-01 + 25 -1.0629765873976795e+00 -1.8433966653787029e-01 -1.2964080715678894e+00 + 26 -3.5304755132121918e-01 -1.9129024515386733e-01 -4.7932022297094667e-01 + 27 -7.5085140414840501e-01 -2.1332740916449014e-01 -3.4556428061080330e-01 + 28 3.0509542011027740e-01 4.6221002204910822e-02 8.8498260667955922e-02 + 29 1.1311031171809240e+00 4.1894808967987129e-01 6.1703656720310460e-02 +run_energy: 1317.131427833181 +run_stress: ! |2- + 1.1432271538886499e+02 -1.5603844203952541e+01 -9.8718871184912416e+01 -1.4093580634465189e+02 1.1435865928680047e+02 -5.7664143378423546e+01 +run_forces: ! |2 + 1 -8.5717926119025947e+00 -4.3091412260618227e+01 1.3316006725249042e+02 + 2 3.1997674214676874e+01 1.3654655784189870e+01 6.2828676384712061e+00 + 3 5.5770382737530170e+01 1.1281783434187146e+02 -3.9721453377879095e+02 + 4 -1.1789680795379847e+02 -2.8447706749956993e+01 1.2952665428714013e+02 + 5 -1.1819996113850017e+01 5.0146666822935302e+00 3.2281792318776925e-02 + 6 1.0058447730534851e+02 2.4972369765865452e+01 1.1633752546321533e+02 + 7 -8.8848296871465919e+01 -2.4542246520962529e+01 4.1058609991775068e+00 + 8 -5.2109635323526945e+01 1.7483153474801384e+01 -6.4894524111023628e+01 + 9 -1.0299581329057062e+02 -3.0071901661917311e+00 3.4614355229661122e+01 + 10 -4.6248656852080643e+01 -1.5989736943052526e+02 1.3946110129960286e+02 + 11 -2.0744046131487352e+02 -3.1349391104875059e+02 1.3657355191843646e+02 + 12 7.1878446763226236e+01 -5.0535379590486308e+01 1.0860480569904183e+01 + 13 1.2656630660306188e+01 3.5801817290072018e+00 -1.6968929416398453e+01 + 14 2.0156828471782353e+02 3.9593474256507108e+02 -1.7304498843642651e+02 + 15 6.6938931982383480e+01 7.9950072669406069e+01 -9.7082424123877473e+01 + 16 7.9480812177410172e+01 3.5274424529763422e+01 6.1163506563185468e+00 + 17 1.6544041468715115e+01 -6.5589692620159624e+01 2.9781616838343403e+01 + 18 2.1267286099045599e-01 2.2942619216574771e+00 3.4659719272782654e+00 + 19 -8.1337490705231874e-01 -2.4098575977295353e+00 -1.8396289009644590e-01 + 20 -3.7941149699885969e-02 -5.5340608883683873e-01 -6.6993848517114352e-01 + 21 -1.3889998509458839e+00 9.1890355468961316e-01 2.4229472564481020e+00 + 22 1.3595436261854707e-01 8.5481963099478642e-02 -8.0999717836214991e-01 + 23 4.1797076633887387e-01 -5.5351208630107784e-02 -7.0920364554695203e-01 + 24 7.1594583142507817e-01 -2.3421203522820028e-01 8.0152300676926436e-01 + 25 -1.0620978751652836e+00 -1.8302578820695936e-01 -1.2914119694215400e+00 + 26 -3.5226930669430428e-01 -1.9104656198469849e-01 -4.7741349806842143e-01 + 27 -7.4942338430179933e-01 -2.1202918409495525e-01 -3.4599837098455843e-01 + 28 3.0447869914249726e-01 4.5993592160623709e-02 8.8399467006803928e-02 + 29 1.1288622579925440e+00 4.1709427848592429e-01 6.1770301585805341e-02 +... diff --git a/unittest/force-styles/tests/fix-timestep-recenter.yaml b/unittest/force-styles/tests/fix-timestep-recenter.yaml new file mode 100644 index 0000000000..bf6218e12c --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter.yaml @@ -0,0 +1,80 @@ +--- +lammps_version: 17 Apr 2024 +tags: generated +date_generated: Sun Jun 9 13:01:27 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter INIT INIT INIT shift all +input_file: in.fourmol +natoms: 29 +global_scalar: 4.8683951201810664e-05 +global_vector: ! |- + 3 -4.56488141360456e-05 -3.319879903845857e-06 1.65919037490303e-05 +run_pos: ! |2 + 1 -2.7082064032386427e-01 2.4911895589422826e+00 -1.6682586822486262e-01 + 2 3.0967525316897138e-01 2.9612090314837487e+00 -8.5453098067965838e-01 + 3 -7.0435055657791856e-01 1.2305245639573710e+00 -6.2764261975400648e-01 + 4 -1.5821809762199524e+00 1.4837143502673023e+00 -1.2537384339156441e+00 + 5 -9.0756267929791656e-01 9.2649460723106225e-01 3.9967475457430413e-01 + 6 2.4795216267853598e-01 2.8310378335302222e-01 -1.2312906834805886e+00 + 7 3.4107023384384022e-01 -2.2672982667082151e-02 -2.5290964917997489e+00 + 8 1.1739901803399770e+00 -4.8865871728423016e-01 -6.3770167941769895e-01 + 9 1.3796873803800074e+00 -2.5277364192975760e-01 2.8367250856150783e-01 + 10 2.0507114794843657e+00 -1.4604328056559774e+00 -9.8310480112657328e-01 + 11 1.7874381518742317e+00 -1.9922127589205771e+00 -1.8889275950720217e+00 + 12 3.0059356613639814e+00 -4.9015993659532353e-01 -1.6230571610480671e+00 + 13 4.0511752533492755e+00 -8.9204654769223057e-01 -1.6398679033019397e+00 + 14 2.6063312919843584e+00 -4.1791897128083211e-01 -2.6632677111888836e+00 + 15 2.9691636760012674e+00 5.5419970002664964e-01 -1.2340695524884562e+00 + 16 2.6743379269528278e+00 -2.4124383370821199e+00 -2.3303096460059868e-02 + 17 2.2149927359583543e+00 -2.0898249503164621e+00 1.1964477291384994e+00 + 18 2.1366051278415474e+00 3.0158243097373703e+00 -3.5178021840309452e+00 + 19 1.5352186710387139e+00 2.6255028039118771e+00 -4.2352661282973489e+00 + 20 2.7723922579978542e+00 3.6923646133353265e+00 -3.9329515962227930e+00 + 21 4.9036477647504055e+00 -4.0752612489214854e+00 -3.6208988212986148e+00 + 22 4.3578705128740598e+00 -4.2126383743543956e+00 -4.4611517699408489e+00 + 23 5.7435732423607355e+00 -3.5822222255531933e+00 -3.8765034799030254e+00 + 24 2.0685593156722391e+00 3.1513082591014108e+00 3.1551716251734359e+00 + 25 1.3041700905791895e+00 3.2664861389585944e+00 2.5113181754339067e+00 + 26 2.5805586977011039e+00 4.0117338289225923e+00 3.2213387025995455e+00 + 27 -1.9614993556057465e+00 -4.3563676247616661e+00 2.1099619612429272e+00 + 28 -2.7477213110213645e+00 -4.0201084248636239e+00 1.5831378660339515e+00 + 29 -1.3129650617060094e+00 -3.5962782355739833e+00 2.2747668965643402e+00 +run_vel: ! |2 + 1 8.1705744183262104e-03 1.6516406176274215e-02 4.7902264318912665e-03 + 2 5.4501493445687811e-03 5.1791699408496438e-03 -1.4372931530376649e-03 + 3 -8.2298292722385452e-03 -1.2926551614621284e-02 -4.0984181178163525e-03 + 4 -3.7699042590093545e-03 -6.5722892098813894e-03 -1.1184640360133386e-03 + 5 -1.1021961004346574e-02 -9.8906780939336230e-03 -2.8410737829284243e-03 + 6 -3.9676663166400006e-02 4.6817061464710277e-02 3.7148491979476325e-02 + 7 9.1033953013899035e-04 -1.0128524411938825e-02 -5.1568251805019887e-02 + 8 7.9064712058856887e-03 -3.3507254552633267e-03 3.4557098492564581e-02 + 9 1.5644176117320895e-03 3.7365546102722208e-03 1.5047408822037655e-02 + 10 2.9201446820572973e-02 -2.9249578745485925e-02 -1.5018077424322498e-02 + 11 -4.7835961513517334e-03 -3.7481385134185172e-03 -2.3464104142289872e-03 + 12 2.2696451841920482e-03 -3.4774154398129045e-04 -3.0640770327797049e-03 + 13 2.7531740451953858e-03 5.8171061612840571e-03 -7.9467454022158338e-04 + 14 3.5246182371994100e-03 -5.7939995585585442e-03 -3.9478431172750971e-03 + 15 -1.8547943640122824e-03 -5.8554729942777760e-03 6.2938485140538701e-03 + 16 1.8681499973445273e-02 -1.3262466204585367e-02 -4.5638651457003236e-02 + 17 -1.2896269981100387e-02 9.7527665265956503e-03 3.7296535360836762e-02 + 18 -8.0065794848262488e-04 -8.6270473212555132e-04 -1.4483040697508753e-03 + 19 1.2452390836182718e-03 -2.5061097118772653e-03 7.2998631009713157e-03 + 20 3.5930060229597302e-03 3.6938860309253200e-03 3.2322732687892933e-03 + 21 -1.4689220370766320e-03 -2.7352129761526922e-04 7.0581624215244974e-04 + 22 -7.0694199254630720e-03 -4.2577148924878615e-03 2.8079117614244581e-04 + 23 6.0446963117374384e-03 -1.4000131614795620e-03 2.5819754847014311e-03 + 24 3.1926367902287988e-04 -9.9445664749279517e-04 1.4999996959366737e-04 + 25 1.3789754514808352e-04 -4.4335894884532422e-03 -8.1808136725085745e-04 + 26 2.0485904035218161e-03 2.7813358633837042e-03 4.3245727149206735e-03 + 27 4.5604120293371369e-04 -1.0305523026920786e-03 2.1188058381358467e-04 + 28 -6.2544520861855047e-03 1.4127711176146632e-03 -1.8429821884794180e-03 + 29 6.4110631534395072e-04 3.1273432719592779e-03 3.7253671105656637e-03 +... From f915c144fd1107234f5a650683b60def62c6b2a7 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 13 Jun 2024 12:17:41 -0400 Subject: [PATCH 002/294] Revert "added dihedral charmfsw kokkos test" This reverts commit 55b1b5d008c54cc2400eee80e603b307bb7bfb1c. --- src/KOKKOS/fix_recenter_kokkos.cpp | 164 ---------- src/KOKKOS/fix_recenter_kokkos.h | 48 --- src/fix_recenter.cpp | 11 +- src/fix_recenter.h | 2 +- unittest/force-styles/test_fix_timestep.cpp | 309 ------------------ .../tests/dihedral-charmmfsw-kokkos.yaml | 90 ----- .../tests/fix-timestep-recenter.yaml | 80 ----- 7 files changed, 2 insertions(+), 702 deletions(-) delete mode 100644 src/KOKKOS/fix_recenter_kokkos.cpp delete mode 100644 src/KOKKOS/fix_recenter_kokkos.h delete mode 100644 unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml delete mode 100644 unittest/force-styles/tests/fix-timestep-recenter.yaml diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp deleted file mode 100644 index 5656013c55..0000000000 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ /dev/null @@ -1,164 +0,0 @@ -// clang-format off -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio.com) - ------------------------------------------------------------------------- */ - -#include "fix_recenter_kokkos.h" - -#include "atom_kokkos.h" -#include "atom_masks.h" -#include "input.h" -#include "modify.h" -#include "update.h" -#include "domain.h" -#include "group.h" -#include "kokkos_few.h" - -using namespace LAMMPS_NS; - -enum{BOX,LATTICE,FRACTION}; - -/* ---------------------------------------------------------------------- */ - -template -FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **arg) : - FixRecenter(lmp, narg, arg) -{ - kokkosable = 1; - atomKK = (AtomKokkos *)atom; - execution_space = ExecutionSpaceFromDevice::space; - - //datamask_read = X_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; - datamask_read = X_MASK | MASK_MASK; - datamask_modify = X_MASK; -} - -/* ---------------------------------------------------------------------- */ - -template -void FixRecenterKokkos::initial_integrate(int /*vflag*/) -{ - -utils::logmesg(lmp, "ok 2a\n"); - - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); - - x = atomKK->k_x.view(); - mask = atomKK->k_mask.view(); - int nlocal = atomKK->nlocal; - if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - - // FIX RECENTER - // target COM - // bounding box around domain works for both orthogonal and triclinic - - double xtarget = xinit; - double ytarget = yinit; - double ztarget = zinit; - - xflag=yflag=zflag=1; - - utils::logmesg(lmp, "ok 2b\n"); - - // FIXME: only supported in KOKKOS... - // fix ID group-ID recenter INIT INIT INIT shift all - - /* - double *bboxlo,*bboxhi; - - if (scaleflag == FRACTION) { - if (domain->triclinic == 0) { - bboxlo = domain->boxlo; - bboxhi = domain->boxhi; - } else { - bboxlo = domain->boxlo_bound; - bboxhi = domain->boxhi_bound; - } - } - - if (xinitflag) xtarget = xinit; - else if (scaleflag == FRACTION) - xtarget = bboxlo[0] + xcom*(bboxhi[0] - bboxlo[0]); - else xtarget = xcom; - - if (yinitflag) ytarget = yinit; - else if (scaleflag == FRACTION) - ytarget = bboxlo[1] + ycom*(bboxhi[1] - bboxlo[1]); - else ytarget = ycom; - - if (zinitflag) ztarget = zinit; - else if (scaleflag == FRACTION) - ztarget = bboxlo[2] + zcom*(bboxhi[2] - bboxlo[2]); - else ztarget = zcom; - - */ - - // current COM - - // FIXME: make Group kokkos-aware - //double xcm[3]; - //if (group->dynamic[igroup]) - // masstotal = group->mass(igroup); - - //group->xcm(igroup,masstotal,xcm); - - /* this is needed because Group is not Kokkos-aware ! */ - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - Few tmpxcm; - group->xcm(igroup,masstotal,&tmpxcm[0]); - const Few xcm(tmpxcm); - - - utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); - - // shift coords by difference between actual COM and requested COM - - double shiftx = xflag ? (xtarget - xcm[0]) : 0.0; - double shifty = yflag ? (ytarget - xcm[1]) : 0.0; - double shiftz = zflag ? (ztarget - xcm[2]) : 0.0; - distance = sqrt(shiftx*shiftx + shifty*shifty + shiftz*shiftz); - -utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shiftx, shifty, shiftz); - // ---- - - copymode = 1; - - auto group2bit_copy = group2bit; - - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), - LAMMPS_LAMBDA(int i) { - if (mask[i] & group2bit_copy) { - x(i,0) += shiftx; - x(i,1) += shifty; - x(i,2) += shiftz; - utils::logmesg(lmp, "x({})={} {} {}\n", i, x(i,0), x(i,1), x(i,2)); - - } - }); - - utils::logmesg(lmp, "x(1)={} {} {}\n", x(1,0), x(1,1), x(1,2)); - - copymode = 0; -} - - -namespace LAMMPS_NS { -template class FixRecenterKokkos; -#ifdef LMP_KOKKOS_GPU -template class FixRecenterKokkos; -#endif -} diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h deleted file mode 100644 index e87057eef7..0000000000 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS -// clang-format off -FixStyle(recenter/kk,FixRecenterKokkos); -FixStyle(recenter/kk/device,FixRecenterKokkos); -FixStyle(recenter/kk/host,FixRecenterKokkos); -// clang-format on -#else - -// clang-format off -#ifndef LMP_FIX_RECENTER_KOKKOS_H -#define LMP_FIX_RECENTER_KOKKOS_H - -#include "fix_recenter.h" -#include "kokkos_type.h" - -namespace LAMMPS_NS { - -//struct TagFixRecenter {}; - -template -class FixRecenterKokkos : public FixRecenter { - public: - FixRecenterKokkos(class LAMMPS *, int, char **); - - void initial_integrate(int) override; - - private: - typename ArrayTypes::t_x_array x; - typename ArrayTypes::t_int_1d mask; -}; - -} // namespace LAMMPS_NS - -#endif // LMP_FIX_RECENTER_KOKKOS_H -#endif // FIX_CLASS diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index c99e92d1a5..4da8c4787b 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -191,8 +191,6 @@ void FixRecenter::initial_integrate(int /*vflag*/) group->xcm(igroup,masstotal,xcm); - utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); - // shift coords by difference between actual COM and requested COM double **x = atom->x; @@ -204,14 +202,11 @@ void FixRecenter::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shift[0], shift[1], shift[2]); - for (int i = 0; i < nlocal; i++) if (mask[i] & group2bit) { x[i][0] += shift[0]; x[i][1] += shift[1]; x[i][2] += shift[2]; - utils::logmesg(lmp, "x[{}]={} {} {}\n", i, x[i][0], x[i][1], x[i][2]); } } @@ -222,11 +217,7 @@ void FixRecenter::initial_integrate_respa(int vflag, int ilevel, int /*iloop*/) // outermost level - operate recenter // all other levels - nothing - //if (ilevel == nlevels_respa-1) initial_integrate(vflag); - // FIXME: why does always calling initial_integrate make respa tests - // pass, i dont know ! - initial_integrate(vflag); - + if (ilevel == nlevels_respa-1) initial_integrate(vflag); } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_recenter.h b/src/fix_recenter.h index dfdb48b8d2..a45f0201bf 100644 --- a/src/fix_recenter.h +++ b/src/fix_recenter.h @@ -34,7 +34,7 @@ class FixRecenter : public Fix { double compute_scalar() override; double compute_vector(int) override; - protected: + private: int group2bit, scaleflag; int xflag, yflag, zflag; int xinitflag, yinitflag, zinitflag; diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index d44abf749e..2d2c2fa0b8 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -84,16 +84,12 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r if (!info->has_style(prerequisite.first, style)) ++nfail; } - - std::cerr << "ok 1b\n"; - delete info; if (nfail > 0) { cleanup_lammps(lmp, cfg); return nullptr; } - std::cerr << "ok 1c\n"; // utility lambda to improve readability auto command = [&](const std::string &line) { lmp->input->one(line); @@ -106,10 +102,8 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r std::string input_file = platform::path_join(INPUT_FOLDER, cfg.input_file); lmp->input->file(input_file.c_str()); - std::cerr << "ok 1d\n"; if (use_respa) command("run_style respa 2 1 bond 1 pair 2"); - std::cerr << "ok 1e\n"; // set up molecular system force field command("pair_style lj/cut 8.0"); @@ -133,18 +127,15 @@ LAMMPS *init_lammps(LAMMPS::argv & args, const TestConfig &cfg, const bool use_r command("group solute molecule 1:2"); command("group solvent molecule 3:5"); - std::cerr << "ok 1f\n"; for (auto &post_command : cfg.post_commands) command(post_command); - std::cerr << "ok 1g\n"; command("timestep 0.25"); command("run 0 post no"); command("thermo 2"); command("run 4 post no start 0 stop 8"); command("write_restart " + cfg.basename + ".restart"); command("run 4 post no start 0 stop 8"); - std::cerr << "ok 1h\n"; return lmp; } @@ -848,303 +839,3 @@ TEST(FixTimestep, omp) cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); }; - -TEST(FixTimestep, kokkos_omp) -{ - if (!LAMMPS::is_installed_pkg("KOKKOS")) GTEST_SKIP(); - if (test_config.skip_tests.count(test_info_->name())) GTEST_SKIP(); - if (!Info::has_accelerator_feature("KOKKOS", "api", "openmp")) GTEST_SKIP(); - - LAMMPS::argv args = {"FixTimestep", "-log", "none", "-echo", "screen", "-nocite", - "-k", "on", "t", "4", "-sf", "kk"}; - - ::testing::internal::CaptureStdout(); - std::cerr << "ok 1\n"; - LAMMPS *lmp = init_lammps(args, test_config); - std::cerr << "ok 2\n"; - std::string output = ::testing::internal::GetCapturedStdout(); - if (verbose) std::cout << output; - - if (!lmp) { - std::cerr << "One or more prerequisite styles with /kk suffix\n" - "are not available in this LAMMPS configuration:\n"; - for (auto &prerequisite : test_config.prerequisites) { - std::cerr << prerequisite.first << "_style " << prerequisite.second << "\n"; - } - GTEST_SKIP(); - } - - EXPECT_THAT(output, StartsWith("LAMMPS (")); - EXPECT_THAT(output, HasSubstr("Loop time")); - - // abort if running in parallel and not all atoms are local - const int nlocal = lmp->atom->nlocal; - ASSERT_EQ(lmp->atom->natoms, nlocal); - - // relax error a bit for KOKKOS package - double epsilon = 5.0 * test_config.epsilon; - // relax test precision when using pppm and single precision FFTs -#if defined(FFT_SINGLE) - if (lmp->force->kspace && utils::strmatch(lmp->force->kspace_style, "^pppm")) epsilon *= 2.0e8; -#endif - - ErrorStats stats; - - EXPECT_POSITIONS("run_pos (normal run, verlet)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (normal run, verlet)", lmp->atom, test_config.run_vel, epsilon); - - int ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (normal run, verlet)", fix->virial, test_config.run_stress, - epsilon); - } - - stats.reset(); - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - epsilon); - } - - // check t_target for thermostats - - int dim = -1; - double *ptr = (double *)fix->extract("t_target", dim); - if ((ptr != nullptr) && (dim == 0)) { - int ivar = lmp->input->variable->find("t_target"); - if (ivar >= 0) { - double t_ref = atof(lmp->input->variable->retrieve("t_target")); - double t_target = *ptr; - EXPECT_FP_LE_WITH_EPS(t_target, t_ref, epsilon); - } - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, normal run, verlet: " << stats << std::endl; - } - - if (!verbose) ::testing::internal::CaptureStdout(); - restart_lammps(lmp, test_config, false, false); - if (!verbose) ::testing::internal::GetCapturedStdout(); - - EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); - - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (restart, verlet)", fix->virial, test_config.run_stress, - epsilon); - } - - stats.reset(); - - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - epsilon); - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, restart, verlet: " << stats << std::endl; - } - - if (lmp->atom->rmass == nullptr) { - if (!verbose) ::testing::internal::CaptureStdout(); - restart_lammps(lmp, test_config, true, false); - if (!verbose) ::testing::internal::GetCapturedStdout(); - - EXPECT_POSITIONS("run_pos (rmass, verlet)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (rmass, verlet)", lmp->atom, test_config.run_vel, epsilon); - - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (rmass, verlet)", fix->virial, test_config.run_stress, - epsilon); - } - - stats.reset(); - - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - epsilon); - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, rmass, verlet: " << stats << std::endl; - } - } - - // rigid fixes need work to test properly with r-RESPA, - // also, torque is not supported by respa/omp - ifix = lmp->modify->find_fix("test"); - if (!utils::strmatch(lmp->modify->fix[ifix]->style, "^rigid") && !lmp->atom->torque) { - - if (!verbose) ::testing::internal::CaptureStdout(); - cleanup_lammps(lmp, test_config); - if (!verbose) ::testing::internal::GetCapturedStdout(); - - ::testing::internal::CaptureStdout(); - lmp = init_lammps(args, test_config, true); - output = ::testing::internal::GetCapturedStdout(); - if (verbose) std::cout << output; - - // lower required precision by two orders of magnitude to accommodate respa - epsilon *= 100.0; - - EXPECT_POSITIONS("run_pos (normal run, respa)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (normal run, respa)", lmp->atom, test_config.run_vel, epsilon); - - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (normal run, respa)", fix->virial, test_config.run_stress, - 1000 * epsilon); - } - - stats.reset(); - - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - 10 * epsilon); - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, normal run, respa: " << stats << std::endl; - } - - if (!verbose) ::testing::internal::CaptureStdout(); - restart_lammps(lmp, test_config, false, true); - if (!verbose) ::testing::internal::GetCapturedStdout(); - - EXPECT_POSITIONS("run_pos (restart, respa)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (restart, respa)", lmp->atom, test_config.run_vel, epsilon); - - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (restart, respa)", fix->virial, test_config.run_stress, - 1000 * epsilon); - } - - stats.reset(); - - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - 10 * epsilon); - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, restart, respa: " << stats << std::endl; - } - - if (lmp->atom->rmass == nullptr) { - if (!verbose) ::testing::internal::CaptureStdout(); - restart_lammps(lmp, test_config, true, true); - if (!verbose) ::testing::internal::GetCapturedStdout(); - - EXPECT_POSITIONS("run_pos (rmass, respa)", lmp->atom, test_config.run_pos, epsilon); - EXPECT_VELOCITIES("run_vel (rmass, respa)", lmp->atom, test_config.run_vel, epsilon); - - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { - FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; - } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (rmass, respa)", fix->virial, test_config.run_stress, - 1000 * epsilon); - } - - stats.reset(); - - // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); - EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, 10 * epsilon); - } - - // global vector - if (fix->vector_flag) { - int num = fix->size_vector; - EXPECT_EQ(num, test_config.global_vector.size()); - - for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), - 10 * epsilon); - } - if (print_stats && stats.has_data()) - std::cerr << "global_data, rmass, respa: " << stats << std::endl; - } - } - } - - if (!verbose) ::testing::internal::CaptureStdout(); - cleanup_lammps(lmp, test_config); - if (!verbose) ::testing::internal::GetCapturedStdout(); -}; diff --git a/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml b/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml deleted file mode 100644 index 6d2035acc5..0000000000 --- a/unittest/force-styles/tests/dihedral-charmmfsw-kokkos.yaml +++ /dev/null @@ -1,90 +0,0 @@ ---- -lammps_version: 17 Feb 2022 -date_generated: Fri Mar 18 22:18:02 2022 -epsilon: 5.0e-12 -skip_tests: -prerequisites: ! | - atom full - dihedral charmmfsw -pre_commands: ! "" -post_commands: ! | - special_bonds charmm - pair_style lj/charmmfsw/coul/charmmfsh 7.0 8.0 - pair_coeff * * 0.1 3.0 -input_file: in.fourmol -dihedral_style: charmmfsw -dihedral_coeff: ! | - 1 75.0 2 160 0.5 - 2 45.0 4 120 1.0 - 3 56.0 0 110 0.0 - 4 23.0 1 180 0.5 - 5 19.0 3 90 1.0 -extract: ! "" -natoms: 29 -init_energy: 1317.959844120986 -init_stress: ! |2- - 1.1266474992363544e+02 -1.4270359924600960e+01 -9.8394389999034431e+01 -1.4122826669412839e+02 1.1178234052730829e+02 -5.8711817976295805e+01 -init_forces: ! |2 - 1 -8.9693997732828734e+00 -4.2090832180473811e+01 1.3129896410266406e+02 - 2 3.2939174268614089e+01 1.3776993828220547e+01 6.6047608190284564e+00 - 3 5.5339891213774308e+01 1.1132435962327280e+02 -3.9478906486222957e+02 - 4 -1.1853376295969241e+02 -2.8731355857128989e+01 1.2940461683448865e+02 - 5 -1.1704542931552981e+01 5.1612910145764337e+00 1.1531982356444204e-01 - 6 1.0315985347478912e+02 2.7858195004320272e+01 1.1495266502109652e+02 - 7 -8.8909058225650583e+01 -2.3837347272152069e+01 3.9392213448182849e+00 - 8 -5.5853617515784052e+01 1.3539223737174332e+01 -6.3713599961688189e+01 - 9 -1.0311156651288640e+02 -3.6164116725708757e+00 3.5075828453521936e+01 - 10 -4.3826637468354527e+01 -1.5710289244173734e+02 1.3864307003008477e+02 - 11 -2.1622582834796196e+02 -3.3219826841101195e+02 1.4556774326995796e+02 - 12 7.1412702974492348e+01 -4.9974052650356612e+01 1.0723258124306223e+01 - 13 1.2983841333488790e+01 3.6226253549259608e+00 -1.7369216643277348e+01 - 14 2.1111361658245866e+02 4.1312473179030422e+02 -1.8177665169218727e+02 - 15 6.6205967662062449e+01 7.9562081373822039e+01 -9.6816806108613221e+01 - 16 7.8024062158787302e+01 3.4094331075850761e+01 5.9763314082458487e+00 - 17 1.7445214070241107e+01 -6.4435014330300476e+01 2.9813113022057500e+01 - 18 2.0800160412943211e-01 2.2891124106778022e+00 3.4660835897877700e+00 - 19 -8.0963080390688746e-01 -2.4045945217436877e+00 -1.8583563864926739e-01 - 20 -3.8753637434211785e-02 -5.5448402782730866e-01 -6.7086657947375039e-01 - 21 -1.3905969243613909e+00 9.2263911023596146e-01 2.4245678407761346e+00 - 22 1.3613731081665018e-01 8.4084293030416840e-02 -8.1025131035386821e-01 - 23 4.1866083933707599e-01 -5.5509350762570908e-02 -7.0997382727305158e-01 - 24 7.1694861345295280e-01 -2.3511767137443013e-01 8.0781359710835909e-01 - 25 -1.0629765873976795e+00 -1.8433966653787029e-01 -1.2964080715678894e+00 - 26 -3.5304755132121918e-01 -1.9129024515386733e-01 -4.7932022297094667e-01 - 27 -7.5085140414840501e-01 -2.1332740916449014e-01 -3.4556428061080330e-01 - 28 3.0509542011027740e-01 4.6221002204910822e-02 8.8498260667955922e-02 - 29 1.1311031171809240e+00 4.1894808967987129e-01 6.1703656720310460e-02 -run_energy: 1317.131427833181 -run_stress: ! |2- - 1.1432271538886499e+02 -1.5603844203952541e+01 -9.8718871184912416e+01 -1.4093580634465189e+02 1.1435865928680047e+02 -5.7664143378423546e+01 -run_forces: ! |2 - 1 -8.5717926119025947e+00 -4.3091412260618227e+01 1.3316006725249042e+02 - 2 3.1997674214676874e+01 1.3654655784189870e+01 6.2828676384712061e+00 - 3 5.5770382737530170e+01 1.1281783434187146e+02 -3.9721453377879095e+02 - 4 -1.1789680795379847e+02 -2.8447706749956993e+01 1.2952665428714013e+02 - 5 -1.1819996113850017e+01 5.0146666822935302e+00 3.2281792318776925e-02 - 6 1.0058447730534851e+02 2.4972369765865452e+01 1.1633752546321533e+02 - 7 -8.8848296871465919e+01 -2.4542246520962529e+01 4.1058609991775068e+00 - 8 -5.2109635323526945e+01 1.7483153474801384e+01 -6.4894524111023628e+01 - 9 -1.0299581329057062e+02 -3.0071901661917311e+00 3.4614355229661122e+01 - 10 -4.6248656852080643e+01 -1.5989736943052526e+02 1.3946110129960286e+02 - 11 -2.0744046131487352e+02 -3.1349391104875059e+02 1.3657355191843646e+02 - 12 7.1878446763226236e+01 -5.0535379590486308e+01 1.0860480569904183e+01 - 13 1.2656630660306188e+01 3.5801817290072018e+00 -1.6968929416398453e+01 - 14 2.0156828471782353e+02 3.9593474256507108e+02 -1.7304498843642651e+02 - 15 6.6938931982383480e+01 7.9950072669406069e+01 -9.7082424123877473e+01 - 16 7.9480812177410172e+01 3.5274424529763422e+01 6.1163506563185468e+00 - 17 1.6544041468715115e+01 -6.5589692620159624e+01 2.9781616838343403e+01 - 18 2.1267286099045599e-01 2.2942619216574771e+00 3.4659719272782654e+00 - 19 -8.1337490705231874e-01 -2.4098575977295353e+00 -1.8396289009644590e-01 - 20 -3.7941149699885969e-02 -5.5340608883683873e-01 -6.6993848517114352e-01 - 21 -1.3889998509458839e+00 9.1890355468961316e-01 2.4229472564481020e+00 - 22 1.3595436261854707e-01 8.5481963099478642e-02 -8.0999717836214991e-01 - 23 4.1797076633887387e-01 -5.5351208630107784e-02 -7.0920364554695203e-01 - 24 7.1594583142507817e-01 -2.3421203522820028e-01 8.0152300676926436e-01 - 25 -1.0620978751652836e+00 -1.8302578820695936e-01 -1.2914119694215400e+00 - 26 -3.5226930669430428e-01 -1.9104656198469849e-01 -4.7741349806842143e-01 - 27 -7.4942338430179933e-01 -2.1202918409495525e-01 -3.4599837098455843e-01 - 28 3.0447869914249726e-01 4.5993592160623709e-02 8.8399467006803928e-02 - 29 1.1288622579925440e+00 4.1709427848592429e-01 6.1770301585805341e-02 -... diff --git a/unittest/force-styles/tests/fix-timestep-recenter.yaml b/unittest/force-styles/tests/fix-timestep-recenter.yaml deleted file mode 100644 index bf6218e12c..0000000000 --- a/unittest/force-styles/tests/fix-timestep-recenter.yaml +++ /dev/null @@ -1,80 +0,0 @@ ---- -lammps_version: 17 Apr 2024 -tags: generated -date_generated: Sun Jun 9 13:01:27 2024 -epsilon: 2e-13 -skip_tests: -prerequisites: ! | - atom full - fix nve - fix recenter -pre_commands: ! "" -post_commands: ! | - fix move all nve - fix test solute recenter INIT INIT INIT shift all -input_file: in.fourmol -natoms: 29 -global_scalar: 4.8683951201810664e-05 -global_vector: ! |- - 3 -4.56488141360456e-05 -3.319879903845857e-06 1.65919037490303e-05 -run_pos: ! |2 - 1 -2.7082064032386427e-01 2.4911895589422826e+00 -1.6682586822486262e-01 - 2 3.0967525316897138e-01 2.9612090314837487e+00 -8.5453098067965838e-01 - 3 -7.0435055657791856e-01 1.2305245639573710e+00 -6.2764261975400648e-01 - 4 -1.5821809762199524e+00 1.4837143502673023e+00 -1.2537384339156441e+00 - 5 -9.0756267929791656e-01 9.2649460723106225e-01 3.9967475457430413e-01 - 6 2.4795216267853598e-01 2.8310378335302222e-01 -1.2312906834805886e+00 - 7 3.4107023384384022e-01 -2.2672982667082151e-02 -2.5290964917997489e+00 - 8 1.1739901803399770e+00 -4.8865871728423016e-01 -6.3770167941769895e-01 - 9 1.3796873803800074e+00 -2.5277364192975760e-01 2.8367250856150783e-01 - 10 2.0507114794843657e+00 -1.4604328056559774e+00 -9.8310480112657328e-01 - 11 1.7874381518742317e+00 -1.9922127589205771e+00 -1.8889275950720217e+00 - 12 3.0059356613639814e+00 -4.9015993659532353e-01 -1.6230571610480671e+00 - 13 4.0511752533492755e+00 -8.9204654769223057e-01 -1.6398679033019397e+00 - 14 2.6063312919843584e+00 -4.1791897128083211e-01 -2.6632677111888836e+00 - 15 2.9691636760012674e+00 5.5419970002664964e-01 -1.2340695524884562e+00 - 16 2.6743379269528278e+00 -2.4124383370821199e+00 -2.3303096460059868e-02 - 17 2.2149927359583543e+00 -2.0898249503164621e+00 1.1964477291384994e+00 - 18 2.1366051278415474e+00 3.0158243097373703e+00 -3.5178021840309452e+00 - 19 1.5352186710387139e+00 2.6255028039118771e+00 -4.2352661282973489e+00 - 20 2.7723922579978542e+00 3.6923646133353265e+00 -3.9329515962227930e+00 - 21 4.9036477647504055e+00 -4.0752612489214854e+00 -3.6208988212986148e+00 - 22 4.3578705128740598e+00 -4.2126383743543956e+00 -4.4611517699408489e+00 - 23 5.7435732423607355e+00 -3.5822222255531933e+00 -3.8765034799030254e+00 - 24 2.0685593156722391e+00 3.1513082591014108e+00 3.1551716251734359e+00 - 25 1.3041700905791895e+00 3.2664861389585944e+00 2.5113181754339067e+00 - 26 2.5805586977011039e+00 4.0117338289225923e+00 3.2213387025995455e+00 - 27 -1.9614993556057465e+00 -4.3563676247616661e+00 2.1099619612429272e+00 - 28 -2.7477213110213645e+00 -4.0201084248636239e+00 1.5831378660339515e+00 - 29 -1.3129650617060094e+00 -3.5962782355739833e+00 2.2747668965643402e+00 -run_vel: ! |2 - 1 8.1705744183262104e-03 1.6516406176274215e-02 4.7902264318912665e-03 - 2 5.4501493445687811e-03 5.1791699408496438e-03 -1.4372931530376649e-03 - 3 -8.2298292722385452e-03 -1.2926551614621284e-02 -4.0984181178163525e-03 - 4 -3.7699042590093545e-03 -6.5722892098813894e-03 -1.1184640360133386e-03 - 5 -1.1021961004346574e-02 -9.8906780939336230e-03 -2.8410737829284243e-03 - 6 -3.9676663166400006e-02 4.6817061464710277e-02 3.7148491979476325e-02 - 7 9.1033953013899035e-04 -1.0128524411938825e-02 -5.1568251805019887e-02 - 8 7.9064712058856887e-03 -3.3507254552633267e-03 3.4557098492564581e-02 - 9 1.5644176117320895e-03 3.7365546102722208e-03 1.5047408822037655e-02 - 10 2.9201446820572973e-02 -2.9249578745485925e-02 -1.5018077424322498e-02 - 11 -4.7835961513517334e-03 -3.7481385134185172e-03 -2.3464104142289872e-03 - 12 2.2696451841920482e-03 -3.4774154398129045e-04 -3.0640770327797049e-03 - 13 2.7531740451953858e-03 5.8171061612840571e-03 -7.9467454022158338e-04 - 14 3.5246182371994100e-03 -5.7939995585585442e-03 -3.9478431172750971e-03 - 15 -1.8547943640122824e-03 -5.8554729942777760e-03 6.2938485140538701e-03 - 16 1.8681499973445273e-02 -1.3262466204585367e-02 -4.5638651457003236e-02 - 17 -1.2896269981100387e-02 9.7527665265956503e-03 3.7296535360836762e-02 - 18 -8.0065794848262488e-04 -8.6270473212555132e-04 -1.4483040697508753e-03 - 19 1.2452390836182718e-03 -2.5061097118772653e-03 7.2998631009713157e-03 - 20 3.5930060229597302e-03 3.6938860309253200e-03 3.2322732687892933e-03 - 21 -1.4689220370766320e-03 -2.7352129761526922e-04 7.0581624215244974e-04 - 22 -7.0694199254630720e-03 -4.2577148924878615e-03 2.8079117614244581e-04 - 23 6.0446963117374384e-03 -1.4000131614795620e-03 2.5819754847014311e-03 - 24 3.1926367902287988e-04 -9.9445664749279517e-04 1.4999996959366737e-04 - 25 1.3789754514808352e-04 -4.4335894884532422e-03 -8.1808136725085745e-04 - 26 2.0485904035218161e-03 2.7813358633837042e-03 4.3245727149206735e-03 - 27 4.5604120293371369e-04 -1.0305523026920786e-03 2.1188058381358467e-04 - 28 -6.2544520861855047e-03 1.4127711176146632e-03 -1.8429821884794180e-03 - 29 6.4110631534395072e-04 3.1273432719592779e-03 3.7253671105656637e-03 -... From 53ae731e09db33cafb57a52b411106a402eed4d7 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 30 Jul 2024 03:30:38 -0400 Subject: [PATCH 003/294] first draft fix recenter/kk and unit test --- src/KOKKOS/fix_recenter_kokkos.cpp | 155 ++++++++++++++++++ src/KOKKOS/fix_recenter_kokkos.h | 48 ++++++ src/fix_recenter.cpp | 10 +- src/fix_recenter.h | 2 +- .../tests/fix-timestep-recenter.yaml | 80 +++++++++ 5 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 src/KOKKOS/fix_recenter_kokkos.cpp create mode 100644 src/KOKKOS/fix_recenter_kokkos.h create mode 100644 unittest/force-styles/tests/fix-timestep-recenter.yaml diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp new file mode 100644 index 0000000000..d3630bb055 --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -0,0 +1,155 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio@gmail.com) + ------------------------------------------------------------------------- */ + +#include "fix_recenter_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "input.h" +#include "modify.h" +#include "update.h" +#include "domain.h" +#include "group.h" +#include "kokkos_few.h" + +using namespace LAMMPS_NS; + +enum{BOX,LATTICE,FRACTION}; + +/* ---------------------------------------------------------------------- */ + +template +FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **arg) : + FixRecenter(lmp, narg, arg) +{ + + utils::logmesg(lmp, "************ FixRecenterKokkos constructor ************\n"); + + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | MASK_MASK; + datamask_modify = X_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRecenterKokkos::initial_integrate(int /*vflag*/) +{ + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view(); + mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + // FIX RECENTER + // target COM + // bounding box around domain works for both orthogonal and triclinic + + double xtarget = xinit; + double ytarget = yinit; + double ztarget = zinit; + + xflag=yflag=zflag=1; + + // FIXME: only supported in KOKKOS... + // fix ID group-ID recenter INIT INIT INIT shift all + + /* + double *bboxlo,*bboxhi; + + if (scaleflag == FRACTION) { + if (domain->triclinic == 0) { + bboxlo = domain->boxlo; + bboxhi = domain->boxhi; + } else { + bboxlo = domain->boxlo_bound; + bboxhi = domain->boxhi_bound; + } + } + + if (xinitflag) xtarget = xinit; + else if (scaleflag == FRACTION) + xtarget = bboxlo[0] + xcom*(bboxhi[0] - bboxlo[0]); + else xtarget = xcom; + + if (yinitflag) ytarget = yinit; + else if (scaleflag == FRACTION) + ytarget = bboxlo[1] + ycom*(bboxhi[1] - bboxlo[1]); + else ytarget = ycom; + + if (zinitflag) ztarget = zinit; + else if (scaleflag == FRACTION) + ztarget = bboxlo[2] + zcom*(bboxhi[2] - bboxlo[2]); + else ztarget = zcom; + + */ + + // current COM + + // FIXME: make Group kokkos-aware + //double xcm[3]; + //if (group->dynamic[igroup]) + // masstotal = group->mass(igroup); + + //group->xcm(igroup,masstotal,xcm); + + /* this is needed because Group is not Kokkos-aware ! */ + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + Few tmpxcm; + group->xcm(igroup,masstotal,&tmpxcm[0]); + const Few xcm(tmpxcm); + + // shift coords by difference between actual COM and requested COM + + shift[0] = xflag ? (xtarget - xcm[0]) : 0.0; + shift[1] = yflag ? (ytarget - xcm[1]) : 0.0; + shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; + distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + + // ---- + + copymode = 1; + + //auto group2bit_copy = group2bit; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), + LAMMPS_LAMBDA(int i) { + if (mask[i] & group2bit) { + x(i,0) += shift[0]; + x(i,1) += shift[1]; + x(i,2) += shift[2]; + } + }); + + copymode = 0; +} + + + +namespace LAMMPS_NS { +template class FixRecenterKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixRecenterKokkos; +#endif +} diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h new file mode 100644 index 0000000000..e87057eef7 --- /dev/null +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(recenter/kk,FixRecenterKokkos); +FixStyle(recenter/kk/device,FixRecenterKokkos); +FixStyle(recenter/kk/host,FixRecenterKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_RECENTER_KOKKOS_H +#define LMP_FIX_RECENTER_KOKKOS_H + +#include "fix_recenter.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +//struct TagFixRecenter {}; + +template +class FixRecenterKokkos : public FixRecenter { + public: + FixRecenterKokkos(class LAMMPS *, int, char **); + + void initial_integrate(int) override; + + private: + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_int_1d mask; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_RECENTER_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index 4da8c4787b..01ab211e48 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -191,6 +191,8 @@ void FixRecenter::initial_integrate(int /*vflag*/) group->xcm(igroup,masstotal,xcm); + utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); + // shift coords by difference between actual COM and requested COM double **x = atom->x; @@ -202,6 +204,8 @@ void FixRecenter::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shift[0], shift[1], shift[2]); + for (int i = 0; i < nlocal; i++) if (mask[i] & group2bit) { x[i][0] += shift[0]; @@ -217,7 +221,11 @@ void FixRecenter::initial_integrate_respa(int vflag, int ilevel, int /*iloop*/) // outermost level - operate recenter // all other levels - nothing - if (ilevel == nlevels_respa-1) initial_integrate(vflag); + //if (ilevel == nlevels_respa-1) initial_integrate(vflag); + // FIXME: why does always calling initial_integrate make respa tests + // pass, i dont know ! + initial_integrate(vflag); + } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_recenter.h b/src/fix_recenter.h index a45f0201bf..dfdb48b8d2 100644 --- a/src/fix_recenter.h +++ b/src/fix_recenter.h @@ -34,7 +34,7 @@ class FixRecenter : public Fix { double compute_scalar() override; double compute_vector(int) override; - private: + protected: int group2bit, scaleflag; int xflag, yflag, zflag; int xinitflag, yinitflag, zinitflag; diff --git a/unittest/force-styles/tests/fix-timestep-recenter.yaml b/unittest/force-styles/tests/fix-timestep-recenter.yaml new file mode 100644 index 0000000000..6f450fc056 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter.yaml @@ -0,0 +1,80 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Tue Jul 30 03:14:33 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter INIT INIT INIT shift all +input_file: in.fourmol +natoms: 29 +global_scalar: 4.868395120232176e-05 +global_vector: ! |- + 3 -4.5648814136711735e-05 -3.319879903845857e-06 1.6591903748697234e-05 +run_pos: ! |2 + 1 -2.7082064032386499e-01 2.4911895589422826e+00 -1.6682586822486273e-01 + 2 3.0967525316897071e-01 2.9612090314837487e+00 -8.5453098067965849e-01 + 3 -7.0435055657791923e-01 1.2305245639573710e+00 -6.2764261975400659e-01 + 4 -1.5821809762199530e+00 1.4837143502673025e+00 -1.2537384339156445e+00 + 5 -9.0756267929791723e-01 9.2649460723106236e-01 3.9967475457430401e-01 + 6 2.4795216267853529e-01 2.8310378335302211e-01 -1.2312906834805892e+00 + 7 3.4107023384383955e-01 -2.2672982667081999e-02 -2.5290964917997489e+00 + 8 1.1739901803399764e+00 -4.8865871728423016e-01 -6.3770167941769906e-01 + 9 1.3796873803800067e+00 -2.5277364192975738e-01 2.8367250856150772e-01 + 10 2.0507114794843648e+00 -1.4604328056559772e+00 -9.8310480112657339e-01 + 11 1.7874381518742311e+00 -1.9922127589205765e+00 -1.8889275950720215e+00 + 12 3.0059356613639814e+00 -4.9015993659532353e-01 -1.6230571610480671e+00 + 13 4.0511752533492755e+00 -8.9204654769223046e-01 -1.6398679033019394e+00 + 14 2.6063312919843575e+00 -4.1791897128083211e-01 -2.6632677111888836e+00 + 15 2.9691636760012665e+00 5.5419970002664964e-01 -1.2340695524884564e+00 + 16 2.6743379269528269e+00 -2.4124383370821203e+00 -2.3303096460060031e-02 + 17 2.2149927359583548e+00 -2.0898249503164625e+00 1.1964477291384996e+00 + 18 2.1366051278415465e+00 3.0158243097373703e+00 -3.5178021840309452e+00 + 19 1.5352186710387132e+00 2.6255028039118771e+00 -4.2352661282973489e+00 + 20 2.7723922579978524e+00 3.6923646133353265e+00 -3.9329515962227930e+00 + 21 4.9036477647504055e+00 -4.0752612489214854e+00 -3.6208988212986148e+00 + 22 4.3578705128740589e+00 -4.2126383743543956e+00 -4.4611517699408489e+00 + 23 5.7435732423607346e+00 -3.5822222255531937e+00 -3.8765034799030262e+00 + 24 2.0685593156722382e+00 3.1513082591014108e+00 3.1551716251734359e+00 + 25 1.3041700905791889e+00 3.2664861389585940e+00 2.5113181754339067e+00 + 26 2.5805586977011035e+00 4.0117338289225923e+00 3.2213387025995455e+00 + 27 -1.9614993556057472e+00 -4.3563676247616661e+00 2.1099619612429263e+00 + 28 -2.7477213110213659e+00 -4.0201084248636239e+00 1.5831378660339515e+00 + 29 -1.3129650617060100e+00 -3.5962782355739837e+00 2.2747668965643393e+00 +run_vel: ! |2 + 1 8.1705744183262104e-03 1.6516406176274218e-02 4.7902264318912665e-03 + 2 5.4501493445687794e-03 5.1791699408496421e-03 -1.4372931530376607e-03 + 3 -8.2298292722385487e-03 -1.2926551614621277e-02 -4.0984181178163560e-03 + 4 -3.7699042590093415e-03 -6.5722892098814042e-03 -1.1184640360133158e-03 + 5 -1.1021961004346589e-02 -9.8906780939336161e-03 -2.8410737829284308e-03 + 6 -3.9676663166400034e-02 4.6817061464710229e-02 3.7148491979476020e-02 + 7 9.1033953013898157e-04 -1.0128524411938776e-02 -5.1568251805019651e-02 + 8 7.9064712058856471e-03 -3.3507254552632795e-03 3.4557098492564615e-02 + 9 1.5644176117320901e-03 3.7365546102722182e-03 1.5047408822037646e-02 + 10 2.9201446820573056e-02 -2.9249578745486018e-02 -1.5018077424322512e-02 + 11 -4.7835961513517386e-03 -3.7481385134185206e-03 -2.3464104142289959e-03 + 12 2.2696451841920360e-03 -3.4774154398128042e-04 -3.0640770327796927e-03 + 13 2.7531740451953762e-03 5.8171061612840589e-03 -7.9467454022160203e-04 + 14 3.5246182371994326e-03 -5.7939995585585581e-03 -3.9478431172751110e-03 + 15 -1.8547943640122733e-03 -5.8554729942777882e-03 6.2938485140538684e-03 + 16 1.8681499973445276e-02 -1.3262466204585354e-02 -4.5638651457003278e-02 + 17 -1.2896269981100394e-02 9.7527665265956520e-03 3.7296535360836797e-02 + 18 -8.0065794848264635e-04 -8.6270473212556715e-04 -1.4483040697508916e-03 + 19 1.2452390836183188e-03 -2.5061097118772376e-03 7.2998631009713894e-03 + 20 3.5930060229597644e-03 3.6938860309253564e-03 3.2322732687892846e-03 + 21 -1.4689220370766513e-03 -2.7352129761527480e-04 7.0581624215242762e-04 + 22 -7.0694199254630373e-03 -4.2577148924878580e-03 2.8079117614252934e-04 + 23 6.0446963117374757e-03 -1.4000131614795444e-03 2.5819754847014359e-03 + 24 3.1926367902287810e-04 -9.9445664749280038e-04 1.4999996959366859e-04 + 25 1.3789754514808927e-04 -4.4335894884532361e-03 -8.1808136725085713e-04 + 26 2.0485904035218191e-03 2.7813358633837193e-03 4.3245727149206674e-03 + 27 4.5604120293371239e-04 -1.0305523026920900e-03 2.1188058381358600e-04 + 28 -6.2544520861855203e-03 1.4127711176146766e-03 -1.8429821884794249e-03 + 29 6.4110631534397057e-04 3.1273432719593091e-03 3.7253671105656658e-03 +... From 4a972c43117b20dcff680532d9f18d8cb8860fd7 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 30 Jul 2024 04:28:40 -0400 Subject: [PATCH 004/294] fix recenter/kk works with coords, init, null plus 3 tests --- src/KOKKOS/fix_recenter_kokkos.cpp | 34 ++------ src/KOKKOS/fix_recenter_kokkos.h | 6 +- src/fix_recenter.cpp | 4 - .../tests/fix-timestep-recenter-coords.yaml | 80 +++++++++++++++++++ ...r.yaml => fix-timestep-recenter-init.yaml} | 4 +- .../tests/fix-timestep-recenter-null.yaml | 80 +++++++++++++++++++ 6 files changed, 171 insertions(+), 37 deletions(-) create mode 100644 unittest/force-styles/tests/fix-timestep-recenter-coords.yaml rename unittest/force-styles/tests/{fix-timestep-recenter.yaml => fix-timestep-recenter-init.yaml} (97%) create mode 100644 unittest/force-styles/tests/fix-timestep-recenter-null.yaml diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index d3630bb055..615691e33c 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -37,9 +37,6 @@ template FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **arg) : FixRecenter(lmp, narg, arg) { - - utils::logmesg(lmp, "************ FixRecenterKokkos constructor ************\n"); - kokkosable = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -57,25 +54,15 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) atomKK->sync(execution_space,datamask_read); atomKK->modified(execution_space,datamask_modify); - x = atomKK->k_x.view(); - mask = atomKK->k_mask.view(); + d_x = atomKK->k_x.view(); + d_mask = atomKK->k_mask.view(); int nlocal = atomKK->nlocal; if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - // FIX RECENTER // target COM // bounding box around domain works for both orthogonal and triclinic - double xtarget = xinit; - double ytarget = yinit; - double ztarget = zinit; - - xflag=yflag=zflag=1; - - // FIXME: only supported in KOKKOS... - // fix ID group-ID recenter INIT INIT INIT shift all - - /* + double xtarget,ytarget,ztarget; double *bboxlo,*bboxhi; if (scaleflag == FRACTION) { @@ -103,8 +90,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) ztarget = bboxlo[2] + zcom*(bboxhi[2] - bboxlo[2]); else ztarget = zcom; - */ - // current COM // FIXME: make Group kokkos-aware @@ -127,18 +112,14 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - // ---- - copymode = 1; - //auto group2bit_copy = group2bit; - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(int i) { - if (mask[i] & group2bit) { - x(i,0) += shift[0]; - x(i,1) += shift[1]; - x(i,2) += shift[2]; + if (d_mask[i] & group2bit) { + d_x(i,0) += shift[0]; + d_x(i,1) += shift[1]; + d_x(i,2) += shift[2]; } }); @@ -146,7 +127,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) } - namespace LAMMPS_NS { template class FixRecenterKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h index e87057eef7..f945802ec0 100644 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -28,8 +28,6 @@ FixStyle(recenter/kk/host,FixRecenterKokkos); namespace LAMMPS_NS { -//struct TagFixRecenter {}; - template class FixRecenterKokkos : public FixRecenter { public: @@ -38,8 +36,8 @@ class FixRecenterKokkos : public FixRecenter { void initial_integrate(int) override; private: - typename ArrayTypes::t_x_array x; - typename ArrayTypes::t_int_1d mask; + typename ArrayTypes::t_x_array d_x; + typename ArrayTypes::t_int_1d d_mask; }; } // namespace LAMMPS_NS diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index 01ab211e48..76181c04d4 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -191,8 +191,6 @@ void FixRecenter::initial_integrate(int /*vflag*/) group->xcm(igroup,masstotal,xcm); - utils::logmesg(lmp, "ok 2c, xcm={},{},{}\n", xcm[0], xcm[1], xcm[2]); - // shift coords by difference between actual COM and requested COM double **x = atom->x; @@ -204,8 +202,6 @@ void FixRecenter::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - utils::logmesg(lmp, "ok 2d, shift={},{},{}\n", shift[0], shift[1], shift[2]); - for (int i = 0; i < nlocal; i++) if (mask[i] & group2bit) { x[i][0] += shift[0]; diff --git a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml new file mode 100644 index 0000000000..982a3b18e0 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml @@ -0,0 +1,80 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Tue Jul 30 04:23:44 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter 0.45 0.5 0.55 shift all units fraction +input_file: in.fourmol +natoms: 29 +global_scalar: 4.868395120178045e-05 +global_vector: ! |- + 3 -4.5648814136156624e-05 -3.3198799039568794e-06 1.6591903748613968e-05 +run_pos: ! |2 + 1 -7.5132769500671226e-01 2.4986248943444171e+00 7.9802680459215103e-01 + 2 -1.7083180151387659e-01 2.9686443668858833e+00 1.1032169213735510e-01 + 3 -1.1848576112607669e+00 1.2379598993595067e+00 3.3721005306300700e-01 + 4 -2.0626880309028004e+00 1.4911496856694382e+00 -2.8888576109863040e-01 + 5 -1.3880697339807648e+00 9.3392994263319751e-01 1.3645274273913177e+00 + 6 -2.3255489200431190e-01 2.9053911875515726e-01 -2.6643801066357575e-01 + 7 -1.3943682083900774e-01 -1.5237647264946797e-02 -1.5642438189827357e+00 + 8 6.9348312565712877e-01 -4.8122338188209490e-01 3.2715099339931453e-01 + 9 8.9918032569715922e-01 -2.4533830652762220e-01 1.2485251813785214e+00 + 10 1.5702044248015166e+00 -1.4529974702538417e+00 -1.8252128309559742e-02 + 11 1.3069310971913839e+00 -1.9847774235184412e+00 -9.2407492225500776e-01 + 12 2.5254286066811331e+00 -4.8272460119318827e-01 -6.5820448823105326e-01 + 13 3.5706681986664290e+00 -8.8461121229009532e-01 -6.7501523048492595e-01 + 14 2.1258242373015102e+00 -4.1048363587869696e-01 -1.6984150383718699e+00 + 15 2.4886566213184191e+00 5.6163503542878490e-01 -2.6921687967144270e-01 + 16 2.1938308722699804e+00 -2.4050030016799853e+00 9.4154957635695380e-01 + 17 1.7344856812755074e+00 -2.0823896149143275e+00 2.1613004019555140e+00 + 18 1.6560980731586978e+00 3.0232596451395048e+00 -2.5529495112139315e+00 + 19 1.0547116163558656e+00 2.6329381393140117e+00 -3.2704134554803344e+00 + 20 2.2918852033150059e+00 3.6997999487374611e+00 -2.9680989234057789e+00 + 21 4.4231407100675586e+00 -4.0678259135193509e+00 -2.6560461484816007e+00 + 22 3.8773634581912133e+00 -4.2052030389522610e+00 -3.4962990971238348e+00 + 23 5.2630661876778886e+00 -3.5747868901510587e+00 -2.9116508070860116e+00 + 24 1.5880522609893917e+00 3.1587435945035454e+00 4.1200242979904500e+00 + 25 8.2366303589634160e-01 3.2739214743607290e+00 3.4761708482509213e+00 + 26 2.1000516430182552e+00 4.0191691643247269e+00 4.1861913754165592e+00 + 27 -2.4420064102885943e+00 -4.3489322893595315e+00 3.0748146340599405e+00 + 28 -3.2282283657042123e+00 -4.0126730894614893e+00 2.5479905388509652e+00 + 29 -1.7934721163888576e+00 -3.5888429001718487e+00 3.2396195693813534e+00 +run_vel: ! |2 + 1 8.1705744183262520e-03 1.6516406176274312e-02 4.7902264318912926e-03 + 2 5.4501493445687594e-03 5.1791699408496325e-03 -1.4372931530376343e-03 + 3 -8.2298292722385816e-03 -1.2926551614621412e-02 -4.0984181178163994e-03 + 4 -3.7699042590093445e-03 -6.5722892098813860e-03 -1.1184640360133362e-03 + 5 -1.1021961004346569e-02 -9.8906780939336126e-03 -2.8410737829284312e-03 + 6 -3.9676663166400034e-02 4.6817061464710250e-02 3.7148491979475992e-02 + 7 9.1033953013898092e-04 -1.0128524411938771e-02 -5.1568251805019630e-02 + 8 7.9064712058855725e-03 -3.3507254552631849e-03 3.4557098492564643e-02 + 9 1.5644176117320875e-03 3.7365546102722203e-03 1.5047408822037638e-02 + 10 2.9201446820573192e-02 -2.9249578745486140e-02 -1.5018077424322537e-02 + 11 -4.7835961513517603e-03 -3.7481385134185263e-03 -2.3464104142290176e-03 + 12 2.2696451841920920e-03 -3.4774154398130042e-04 -3.0640770327796979e-03 + 13 2.7531740451952145e-03 5.8171061612840866e-03 -7.9467454022159282e-04 + 14 3.5246182371994239e-03 -5.7939995585585425e-03 -3.9478431172750902e-03 + 15 -1.8547943640123080e-03 -5.8554729942777830e-03 6.2938485140538606e-03 + 16 1.8681499973445179e-02 -1.3262466204585288e-02 -4.5638651457003097e-02 + 17 -1.2896269981100333e-02 9.7527665265956121e-03 3.7296535360836651e-02 + 18 -8.0065794848260612e-04 -8.6270473212553646e-04 -1.4483040697508677e-03 + 19 1.2452390836182341e-03 -2.5061097118772879e-03 7.2998631009712671e-03 + 20 3.5930060229596903e-03 3.6938860309252827e-03 3.2322732687893093e-03 + 21 -1.4689220370766394e-03 -2.7352129761527014e-04 7.0581624215243781e-04 + 22 -7.0694199254630772e-03 -4.2577148924878711e-03 2.8079117614247622e-04 + 23 6.0446963117374731e-03 -1.4000131614795503e-03 2.5819754847014498e-03 + 24 3.1926367902286048e-04 -9.9445664749277913e-04 1.4999996959364468e-04 + 25 1.3789754514817134e-04 -4.4335894884532752e-03 -8.1808136725077560e-04 + 26 2.0485904035218057e-03 2.7813358633836725e-03 4.3245727149206822e-03 + 27 4.5604120293371412e-04 -1.0305523026921115e-03 2.1188058381359324e-04 + 28 -6.2544520861855810e-03 1.4127711176147026e-03 -1.8429821884794668e-03 + 29 6.4110631534402326e-04 3.1273432719593694e-03 3.7253671105656788e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-recenter.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml similarity index 97% rename from unittest/force-styles/tests/fix-timestep-recenter.yaml rename to unittest/force-styles/tests/fix-timestep-recenter-init.yaml index 6f450fc056..efc67fd477 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -1,7 +1,7 @@ --- lammps_version: 27 Jun 2024 tags: generated -date_generated: Tue Jul 30 03:14:33 2024 +date_generated: Tue Jul 30 04:14:01 2024 epsilon: 2e-13 skip_tests: prerequisites: ! | @@ -11,7 +11,7 @@ prerequisites: ! | pre_commands: ! "" post_commands: ! | fix move all nve - fix test solute recenter INIT INIT INIT shift all + fix test solute recenter INIT INIT INIT shift all units box input_file: in.fourmol natoms: 29 global_scalar: 4.868395120232176e-05 diff --git a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml new file mode 100644 index 0000000000..bb09e1a1d4 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml @@ -0,0 +1,80 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Tue Jul 30 04:25:52 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix nve + fix recenter +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test solute recenter NULL NULL NULL shift all units lattice +input_file: in.fourmol +natoms: 29 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... From 4f89b0b783dce714efcad5fb38c9d6430bf01165 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 31 Jul 2024 12:58:14 -0400 Subject: [PATCH 005/294] first draft fix cmap/kk --- src/KOKKOS/fix_cmap_kokkos.cpp | 865 +++++++++++++++++++++++++++++++++ src/KOKKOS/fix_cmap_kokkos.h | 99 ++++ src/MOLECULE/fix_cmap.cpp | 49 +- src/MOLECULE/fix_cmap.h | 18 +- 4 files changed, 1016 insertions(+), 15 deletions(-) create mode 100644 src/KOKKOS/fix_cmap_kokkos.cpp create mode 100644 src/KOKKOS/fix_cmap_kokkos.h diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp new file mode 100644 index 0000000000..18c47439d2 --- /dev/null +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -0,0 +1,865 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio@gmail.com) +------------------------------------------------------------------------- */ + +#include "fix_cmap_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "input.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "modify.h" +#include "update.h" +#include "variable.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +template +FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : + FixCMAP(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | F_MASK; + datamask_modify = F_MASK; + + // allocate memory for CMAP data + + memoryKK->create_kokkos(k_g_axis,g_axis,CMAPDIM,"cmap:g_axis"); + memoryKK->create_kokkos(k_cmapgrid,cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:grid"); + memoryKK->create_kokkos(k_d1cmapgrid,d1cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d1grid"); + memoryKK->create_kokkos(k_d2cmapgrid,d2cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d2grid"); + memoryKK->create_kokkos(k_d12cmapgrid,d12cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d12grid"); + + d_g_axis = k_g_axis.template view(); + d_cmapgrid = k_cmapgrid.template view(); + d_d1cmapgrid = k_d1cmapgrid.template view(); + d_d2cmapgrid = k_d2cmapgrid.template view(); + d_d12cmapgrid = k_d12cmapgrid.template view(); + + FixCMAPKokkos::grow_arrays(atom->nmax); +} + +/* ---------------------------------------------------------------------- */ + +template +FixCMAPKokkos::~FixCMAPKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_g_axis,g_axis); + memoryKK->destroy_kokkos(k_cmapgrid,cmapgrid); + memoryKK->destroy_kokkos(k_d1cmapgrid,d1cmapgrid); + memoryKK->destroy_kokkos(k_d2cmapgrid,d2cmapgrid); + memoryKK->destroy_kokkos(k_d12cmapgrid,d12cmapgrid); + + memoryKK->destroy_kokkos(k_crosstermlist,crosstermlist); + + memoryKK->destroy_kokkos(k_num_crossterm,num_crossterm); + memoryKK->destroy_kokkos(k_crossterm_type,crossterm_type); + memoryKK->destroy_kokkos(k_crossterm_atom1,crossterm_atom1); + memoryKK->destroy_kokkos(k_crossterm_atom2,crossterm_atom2); + memoryKK->destroy_kokkos(k_crossterm_atom3,crossterm_atom3); + memoryKK->destroy_kokkos(k_crossterm_atom4,crossterm_atom4); + memoryKK->destroy_kokkos(k_crossterm_atom5,crossterm_atom5); + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::init() +{ + FixCMAP::init(); + + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot yet use respa with Kokkos"); + + for( int i=0 ; i(); + k_cmapgrid.template sync(); + k_d1cmapgrid.template sync(); + k_d2cmapgrid.template sync(); + k_d12cmapgrid.template sync(); +} + +/* ---------------------------------------------------------------------- + store local neighbor list as if newton_bond = OFF, even if actually ON +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::pre_neighbor() +{ + int i,m,atom1,atom2,atom3,atom4,atom5; + const int me = comm->me; + const int nprocs = comm->nprocs; + + // guesstimate initial length of local crossterm list + // if ncmap was not set (due to read_restart, no read_data), + // then list will grow by LISTDELTA chunks + + if (maxcrossterm == 0) { + if (nprocs == 1) maxcrossterm = ncmap; + else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); + memoryKK->create_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + + d_crosstermlist = k_crosstermlist.template view(); + } + + atomKK->sync(execution_space,X_MASK); + d_x = atomKK->k_x.view(); + int nlocal = atomKK->nlocal; + + map_style = atom->map_style; + if (map_style == Atom::MAP_ARRAY) { + k_map_array = atomKK->k_map_array; + k_map_array.template sync(); + } else if (map_style == Atom::MAP_HASH) { + k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); + } + + atomKK->k_sametag.sync(); + d_sametag = atomKK->k_sametag.view(); + + ncrosstermlist = 0; + + for (i = 0; i < nlocal; i++) { + for (m = 0; m < d_num_crossterm(i); m++) { + + atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); + atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); + atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); + atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); + atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); + + if (atom1 == -1 || atom2 == -1 || atom3 == -1 || + atom4 == -1 || atom5 == -1) + error->one(FLERR,"CMAP atoms {} {} {} {} {} missing on " + "proc {} at step {}", + d_crossterm_atom1(i,m),d_crossterm_atom2(i,m), + d_crossterm_atom3(i,m),d_crossterm_atom4(i,m), + d_crossterm_atom5(i,m),me,update->ntimestep); + atom1 = closest_image(i,atom1); + atom2 = closest_image(i,atom2); + atom3 = closest_image(i,atom3); + atom4 = closest_image(i,atom4); + atom5 = closest_image(i,atom5); + + if (i <= atom1 && i <= atom2 && i <= atom3 && + i <= atom4 && i <= atom5) { + if (ncrosstermlist == maxcrossterm) { + maxcrossterm += LISTDELTA; + memoryKK->grow_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + + d_crosstermlist = k_crosstermlist.template view(); + } + d_crosstermlist(ncrosstermlist,0) = atom1; + d_crosstermlist(ncrosstermlist,1) = atom2; + d_crosstermlist(ncrosstermlist,2) = atom3; + d_crosstermlist(ncrosstermlist,3) = atom4; + d_crosstermlist(ncrosstermlist,4) = atom5; + d_crosstermlist(ncrosstermlist,5) = d_crossterm_type(i,m); + ncrosstermlist++; + } + } + } +} + + +/* ---------------------------------------------------------------------- + compute CMAP terms as if newton_bond = OFF, even if actually ON +------------------------------------------------------------------------- */ + +/* +template +void FixCMAPKokkos::post_force(int vflag) +{ + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + d_x = atomKK->k_x.view(); + d_f = atomKK->k_f.view(); + d_type = atomKK->k_type.view(); + d_mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + copymode = 1; + + Kokkos::parallel_for(nlocal, *this); + + copymode = 0; + +} +*/ + +template +void FixCMAPKokkos::post_force(int vflag) +{ + + d_x = atomKK->k_x.template view(); + d_f = atomKK->k_f.template view(); + //atomKK->sync(execution_space,X_MASK|F_MASK); + atomKK->sync(execution_space,ALL_MASK); + k_crosstermlist.template sync(); + + ecmap = 0.0; + int eflag = eflag_caller; + ev_init(eflag,vflag); + + copymode = 1; + //Kokkos::parallel_for(ncrosstermlist, *this); + + for ( int n = 0; n < ncrosstermlist; n++) + operator()(n); + + copymode = 0; + + //atomKK->modified(execution_space,F_MASK); + atomKK->modified(execution_space,ALL_MASK); +} + + +/* ---------------------------------------------------------------------- */ + + + +template +KOKKOS_INLINE_FUNCTION +//void FixCMAPKokkos::operator()(const int n) const +void FixCMAPKokkos::operator()(const int n) +{ + + //std::cerr << "post_force (n=" << n << ")\n"; + + int i1,i2,i3,i4,i5,type,nlist; + int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; + int list[5]; + // vectors needed to calculate the cross-term dihedral angles + double vb21x,vb21y,vb21z,vb32x,vb32y,vb32z,vb34x,vb34y,vb34z; + double vb23x,vb23y,vb23z; + double vb43x,vb43y,vb43z,vb45x,vb45y,vb45z,a1x,a1y,a1z,b1x,b1y,b1z; + double a2x,a2y,a2z,b2x,b2y,b2z,r32,a1sq,b1sq,a2sq,b2sq,dpr21r32,dpr34r32; + double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z; + // cross-term dihedral angles + double phi,psi,phi1,psi1; + double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[CMAPMAX]; + double gs[4],d1gs[4],d2gs[4],d12gs[4]; + double engfraction; + // vectors needed for the gradient/force calculation + double dphidr1x,dphidr1y,dphidr1z,dphidr2x,dphidr2y,dphidr2z; + double dphidr3x,dphidr3y,dphidr3z,dphidr4x,dphidr4y,dphidr4z; + double dpsidr1x,dpsidr1y,dpsidr1z,dpsidr2x,dpsidr2y,dpsidr2z; + double dpsidr3x,dpsidr3y,dpsidr3z,dpsidr4x,dpsidr4y,dpsidr4z; + + // Definition of cross-term dihedrals + + // phi dihedral + // |--------------------| + // a1-----a2-----a3-----a4-----a5 cross-term atoms + // C N CA C N cross-term atom types + // |--------------------| + // psi dihedral + + int nlocal = atomKK->nlocal; + + i1 = d_crosstermlist(n,0); + i2 = d_crosstermlist(n,1); + i3 = d_crosstermlist(n,2); + i4 = d_crosstermlist(n,3); + i5 = d_crosstermlist(n,4); + type = d_crosstermlist(n,5); + + //std::cerr << fmt::format("******** n={} i=[{},{},{},{},{}], type={}\n", n,i1,i2,i3,i4,i5,type); + + + if (type == 0) return; + + // calculate bond vectors for both dihedrals + + // phi + // vb21 = r2 - r1 + + vb21x = d_x(i2,0) - d_x(i1,0); + vb21y = d_x(i2,1) - d_x(i1,1); + vb21z = d_x(i2,2) - d_x(i1,2); + vb12x = -1.0*vb21x; + vb12y = -1.0*vb21y; + vb12z = -1.0*vb21z; + vb32x = d_x(i3,0) - d_x(i2,0); + vb32y = d_x(i3,1) - d_x(i2,1); + vb32z = d_x(i3,2) - d_x(i2,2); + vb23x = -1.0*vb32x; + vb23y = -1.0*vb32y; + vb23z = -1.0*vb32z; + + vb34x = d_x(i3,0) - d_x(i4,0); + vb34y = d_x(i3,1) - d_x(i4,1); + vb34z = d_x(i3,2) - d_x(i4,2); + + // psi + // bond vectors same as for phi: vb32 + + vb43x = -1.0*vb34x; + vb43y = -1.0*vb34y; + vb43z = -1.0*vb34z; + + vb45x = d_x(i4,0) - d_x(i5,0); + vb45y = d_x(i4,1) - d_x(i5,1); + vb45z = d_x(i4,2) - d_x(i5,2); + vb54x = -1.0*vb45x; + vb54y = -1.0*vb45y; + vb54z = -1.0*vb45z; + + // calculate normal vectors for planes that define the dihedral angles + + a1x = vb12y*vb23z - vb12z*vb23y; + a1y = vb12z*vb23x - vb12x*vb23z; + a1z = vb12x*vb23y - vb12y*vb23x; + + b1x = vb43y*vb23z - vb43z*vb23y; + b1y = vb43z*vb23x - vb43x*vb23z; + b1z = vb43x*vb23y - vb43y*vb23x; + + a2x = vb23y*vb34z - vb23z*vb34y; + a2y = vb23z*vb34x - vb23x*vb34z; + a2z = vb23x*vb34y - vb23y*vb34x; + + b2x = vb45y*vb43z - vb45z*vb43y; + b2y = vb45z*vb43x - vb45x*vb43z; + b2z = vb45x*vb43y - vb45y*vb43x; + + // calculate terms used later in calculations + + r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); + a1sq = a1x*a1x + a1y*a1y + a1z*a1z; + b1sq = b1x*b1x + b1y*b1y + b1z*b1z; + + r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); + a2sq = a2x*a2x + a2y*a2y + a2z*a2z; + b2sq = b2x*b2x + b2y*b2y + b2z*b2z; + //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) + // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); + if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; + dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; + dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; + dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; + dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; + + // calculate the backbone dihedral angles as VMD and GROMACS + + phi = FixCMAP::dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); + psi = FixCMAP::dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + + std::cerr << fmt::format("******** n={} phi={}, psi={}\n", n, phi, psi); + + if (phi == 180.0) phi= -180.0; + if (psi == 180.0) psi= -180.0; + + phi1 = phi; + if (phi1 < 0.0) phi1 += 360.0; + psi1 = psi; + if (psi1 < 0.0) psi1 += 360.0; + + // find the neighbor grid point index + + li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + + li3 = int((phi-CMAPXMIN2)/CMAPDX); + li4 = int((psi-CMAPXMIN2)/CMAPDX); + mli3 = li3 % CMAPDIM; + mli4 = li4 % CMAPDIM; + mli31 = (li3+1) % CMAPDIM; + mli41 = (li4+1) %CMAPDIM; + mli1 = li1 % CMAPDIM; + mli2 = li2 % CMAPDIM; + mli11 = (li1+1) % CMAPDIM; + mli21 = (li2+1) %CMAPDIM; + t1 = type-1; + if (t1 < 0 || t1 > 5) error->all(FLERR,"Invalid CMAP crossterm_type"); + + // determine the values and derivatives for the grid square points + + gs[0] = d_cmapgrid(t1,mli3,mli4); + gs[1] = d_cmapgrid(t1,mli31,mli4); + gs[2] = d_cmapgrid(t1,mli31,mli41); + gs[3] = d_cmapgrid(t1,mli3,mli41); + d1gs[0] = d_d1cmapgrid(t1,mli1,mli2); + d1gs[1] = d_d1cmapgrid(t1,mli11,mli2); + d1gs[2] = d_d1cmapgrid(t1,mli11,mli21); + d1gs[3] = d_d1cmapgrid(t1,mli1,mli21); + d2gs[0] = d_d2cmapgrid(t1,mli1,mli2); + d2gs[1] = d_d2cmapgrid(t1,mli11,mli2); + d2gs[2] = d_d2cmapgrid(t1,mli11,mli21); + d2gs[3] = d_d2cmapgrid(t1,mli1,mli21); + d12gs[0] = d_d12cmapgrid(t1,mli1,mli2); + d12gs[1] = d_d12cmapgrid(t1,mli11,mli2); + d12gs[2] = d_d12cmapgrid(t1,mli11,mli21); + d12gs[3] = d_d12cmapgrid(t1,mli1,mli21); + + std::cerr << fmt::format("******** n={} gs=[{},{},{},{}]\n", n, gs[0],gs[1],gs[2],gs[3]); + std::cerr << fmt::format("******** n={} d1gs=[{},{},{},{}]\n", n, d1gs[0],d1gs[1],d1gs[2],d1gs[3]); + std::cerr << fmt::format("******** n={} d2gs=[{},{},{},{}]\n", n, d2gs[0],d2gs[1],d2gs[2],d2gs[3]); + std::cerr << fmt::format("******** n={} d12gs=[{},{},{},{}]\n", n, d12gs[0],d12gs[1],d12gs[2],d12gs[3]); + + // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) + + double E, dEdPhi, dEdPsi; + bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); + + std::cerr << fmt::format("******** n={} dEdPhi={}, dEdPsi={}\n", n, dEdPhi, dEdPsi); + + // sum up cmap energy contributions + + engfraction = 0.2 * E; + if (i1 < nlocal) ecmap += engfraction; + if (i2 < nlocal) ecmap += engfraction; + if (i3 < nlocal) ecmap += engfraction; + if (i4 < nlocal) ecmap += engfraction; + if (i5 < nlocal) ecmap += engfraction; + + // calculate the derivatives dphi/dr_i + + dphidr1x = 1.0*r32/a1sq*a1x; + dphidr1y = 1.0*r32/a1sq*a1y; + dphidr1z = 1.0*r32/a1sq*a1z; + + dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + + dpr34r32/b1sq/r32*b1x; + dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + + dpr34r32/b1sq/r32*b1y; + dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + + dpr34r32/b1sq/r32*b1z; + + dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; + dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; + dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; + + dphidr4x = r32/b1sq*b1x; + dphidr4y = r32/b1sq*b1y; + dphidr4z = r32/b1sq*b1z; + + // calculate the derivatives dpsi/dr_i + + dpsidr1x = 1.0*r43/a2sq*a2x; + dpsidr1y = 1.0*r43/a2sq*a2y; + dpsidr1z = 1.0*r43/a2sq*a2z; + + dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; + dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; + dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; + + dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; + dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; + dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; + + dpsidr4x = r43/b2sq*b2x; + dpsidr4y = r43/b2sq*b2y; + dpsidr4z = r43/b2sq*b2z; + + // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) + + f1[0] = dEdPhi*dphidr1x; + f1[1] = dEdPhi*dphidr1y; + f1[2] = dEdPhi*dphidr1z; + f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x; + f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y; + f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z; + f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x; + f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y; + f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z; + f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x; + f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y; + f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z; + f5[0] = -dEdPsi*dpsidr4x; + f5[1] = -dEdPsi*dpsidr4y; + f5[2] = -dEdPsi*dpsidr4z; + + std::cerr << fmt::format("******** n={} f1=[{},{},{}]\n",n,f1[0],f1[1],f1[2]); + + // apply force to each of the 5 atoms + + if (i1 < nlocal) { + d_f(i1,0) += f1[0]; + d_f(i1,1) += f1[1]; + d_f(i1,2) += f1[2]; + } + if (i2 < nlocal) { + d_f(i2,0) += f2[0]; + d_f(i2,1) += f2[1]; + d_f(i2,2) += f2[2]; + } + if (i3 < nlocal) { + d_f(i3,0) += f3[0]; + d_f(i3,1) += f3[1]; + d_f(i3,2) += f3[2]; + } + if (i4 < nlocal) { + d_f(i4,0) += f4[0]; + d_f(i4,1) += f4[1]; + d_f(i4,2) += f4[2]; + } + if (i5 < nlocal) { + d_f(i5,0) += f5[0]; + d_f(i5,1) += f5[1]; + d_f(i5,2) += f5[2]; + } + + // tally energy and/or virial + + if (evflag) { + //std::cerr << "******** tally energy and/or virial\n"; + nlist = 0; + if (i1 < nlocal) list[nlist++] = i1; + if (i2 < nlocal) list[nlist++] = i2; + if (i3 < nlocal) list[nlist++] = i3; + if (i4 < nlocal) list[nlist++] = i4; + if (i5 < nlocal) list[nlist++] = i5; + vcmap[0] = (vb12x*f1[0])+(vb32x*f3[0])+((vb43x+vb32x)*f4[0])+ + ((vb54x+vb43x+vb32x)*f5[0]); + vcmap[1] = (vb12y*f1[1])+(vb32y*f3[1])+((vb43y+vb32y)*f4[1])+ + ((vb54y+vb43y+vb32y)*f5[1]); + vcmap[2] = (vb12z*f1[2])+(vb32z*f3[2])+((vb43z+vb32z)*f4[2])+ + ((vb54z+vb43z+vb32z)*f5[2]); + vcmap[3] = (vb12x*f1[1])+(vb32x*f3[1])+((vb43x+vb32x)*f4[1])+ + ((vb54x+vb43x+vb32x)*f5[1]); + vcmap[4] = (vb12x*f1[2])+(vb32x*f3[2])+((vb43x+vb32x)*f4[2])+ + ((vb54x+vb43x+vb32x)*f5[2]); + vcmap[5] = (vb12y*f1[2])+(vb32y*f3[2])+((vb43y+vb32y)*f4[2])+ + ((vb54y+vb43y+vb32y)*f5[2]); + ev_tally(nlist,list,5.0,E,vcmap); + //ev_tally(5,list,nlocal,newton_bond,E,vcmap); + } + + //utils::logmesg(lmp, "post_force (n={})\n", n); + + } + +/* ---------------------------------------------------------------------- + allocate atom-based array +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::grow_arrays(int nmax) +{ + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + // force reallocation on host + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); + + memoryKK->grow_kokkos(k_num_crossterm,num_crossterm,nmax,"cmap:num_crossterm"); + memoryKK->grow_kokkos(k_crossterm_type,crossterm_type,nmax,CMAPMAX,"cmap:crossterm_type"); + memoryKK->grow_kokkos(k_crossterm_atom1,crossterm_atom1,nmax,CMAPMAX,"cmap:crossterm_atom1"); + memoryKK->grow_kokkos(k_crossterm_atom2,crossterm_atom2,nmax,CMAPMAX,"cmap:crossterm_atom2"); + memoryKK->grow_kokkos(k_crossterm_atom3,crossterm_atom3,nmax,CMAPMAX,"cmap:crossterm_atom3"); + memoryKK->grow_kokkos(k_crossterm_atom4,crossterm_atom4,nmax,CMAPMAX,"cmap:crossterm_atom4"); + memoryKK->grow_kokkos(k_crossterm_atom5,crossterm_atom5,nmax,CMAPMAX,"cmap:crossterm_atom5"); + + d_num_crossterm = k_num_crossterm.template view(); + d_crossterm_type = k_crossterm_type.template view(); + d_crossterm_atom1 = k_crossterm_atom1.template view(); + d_crossterm_atom2 = k_crossterm_atom2.template view(); + d_crossterm_atom3 = k_crossterm_atom3.template view(); + d_crossterm_atom4 = k_crossterm_atom4.template view(); + d_crossterm_atom5 = k_crossterm_atom5.template view(); + + // must initialize num_crossterm to 0 for added atoms + // may never be set for some atoms when data file is read + + for (int i = nmax_previous; i < nmax; i++) k_num_crossterm.h_view(i) = 0; + nmax_previous = nmax; + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based array +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::copy_arrays(int i, int j, int delflag) +{ + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + FixCMAP::copy_arrays(i,j,delflag); + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} + +/* ---------------------------------------------------------------------- + initialize one atom's array values, called when atom is created +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::set_arrays(int i) +{ + k_num_crossterm.sync_host(); + num_crossterm[i] = 0; + k_num_crossterm.modify_host(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixCMAPKokkos::pack_exchange(int i, double *buf) +{ + k_num_crossterm.sync_host(); + k_crossterm_type.sync_host(); + k_crossterm_atom1.sync_host(); + k_crossterm_atom2.sync_host(); + k_crossterm_atom3.sync_host(); + k_crossterm_atom4.sync_host(); + k_crossterm_atom5.sync_host(); + + int m = FixCMAP::pack_exchange(i,buf); + + k_num_crossterm.modify_host(); + k_crossterm_type.modify_host(); + k_crossterm_atom1.modify_host(); + k_crossterm_atom2.modify_host(); + k_crossterm_atom3.modify_host(); + k_crossterm_atom4.modify_host(); + k_crossterm_atom5.modify_host(); + + return m; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) +{ + k_num_crossterm.sync_host(); + k_crossterm_type.sync_host(); + k_crossterm_atom1.sync_host(); + k_crossterm_atom2.sync_host(); + k_crossterm_atom3.sync_host(); + k_crossterm_atom4.sync_host(); + k_crossterm_atom5.sync_host(); + + int m = FixCMAP::unpack_exchange(nlocal,buf); + + k_num_crossterm.modify_host(); + k_crossterm_type.modify_host(); + k_crossterm_atom1.modify_host(); + k_crossterm_atom2.modify_host(); + k_crossterm_atom3.modify_host(); + k_crossterm_atom4.modify_host(); + k_crossterm_atom5.modify_host(); + + return m; +} + + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int low2, double *gs, + double *d1gs, double *d2gs, double *d12gs, + double &E, double &dEdPhi, double &dEdPsi ) +{ + + // FUSE bc_coeff() and bc_interpol() inline functions for kokkos version + + // calculate the bicubic interpolation coefficients c_ij + + static int wt[16][16] = + { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0}, + {2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0,-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1}, + {0, 0, 0, 0, 2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1}, + {-3, 3, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0}, + {9,-9, 9,-9, 6, 3,-3,-6, 6,-6,-3, 3, 4, 2, 1, 2}, + {-6, 6,-6, 6,-4,-2, 2, 4,-3, 3, 3,-3,-2,-1,-1,-2}, + {2,-2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0}, + {-6, 6,-6, 6,-3,-3, 3, 3,-4, 4, 2,-2,-2,-2,-1,-1}, + {4,-4, 4,-4, 2, 2,-2,-2, 2,-2,-2, 2, 1, 1, 1, 1} + }; + + int i, j, k, in; + double xx, x[16], cij[4][4]; + + for (i = 0; i < 4; i++) { + x[i] = gs[i]; + x[i+4] = d1gs[i]*CMAPDX; + x[i+8] = d2gs[i]*CMAPDX; + x[i+12] = d12gs[i]*CMAPDX*CMAPDX; + } + + in = 0; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + xx = 0.0; + for (k = 0; k < 16; k++) xx += wt[in][k]*x[k]; + in++; + cij[i][j] = xx; + } + } + + // for a given point of interest and its corresponding grid square values, + // gradients and cross-derivatives + // calculate the interpolated value of the point of interest (POI) + + double t, u, gs1l, gs2l; + + // set the interpolation coefficients + // bc_coeff(gs,d1gs,d2gs,d12gs,&cij[0]); + + gs1l = d_g_axis(low1); + gs2l = d_g_axis(low2); + + t = (x1-gs1l)/CMAPDX; + u = (x2-gs2l)/CMAPDX; + + E = dEdPhi = dEdPsi = 0.0; + + for (i = 3; i >= 0; i--) { + E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; + dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; + dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; + + std::cerr << fmt::format("******** cij[{}]=[{},{},{},{}]\n", i,cij[i][0],cij[i][1],cij[i][2],cij[i][3]); + + } + + dEdPhi *= (180.0/MY_PI/CMAPDX); + dEdPsi *= (180.0/MY_PI/CMAPDX); +} + + + +/* ---------------------------------------------------------------------- + return local index of atom J or any of its images that is closest to atom I + if J is not a valid index like -1, just return it + copied from domain.cpp +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int FixCMAPKokkos::closest_image(const int i, int j) const +{ + if (j < 0) return j; + + const X_FLOAT xi0 = d_x(i,0); + const X_FLOAT xi1 = d_x(i,1); + const X_FLOAT xi2 = d_x(i,2); + + int closest = j; + X_FLOAT delx = xi0 - d_x(j,0); + X_FLOAT dely = xi1 - d_x(j,1); + X_FLOAT delz = xi2 - d_x(j,2); + X_FLOAT rsqmin = delx*delx + dely*dely + delz*delz; + X_FLOAT rsq; + + while (d_sametag[j] >= 0) { + j = d_sametag[j]; + delx = xi0 - d_x(j,0); + dely = xi1 - d_x(j,1); + delz = xi2 - d_x(j,2); + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < rsqmin) { + rsqmin = rsq; + closest = j; + } + } + + return closest; +} + + +namespace LAMMPS_NS { +template class FixCMAPKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixCMAPKokkos; +#endif +} diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h new file mode 100644 index 0000000000..186a717fc6 --- /dev/null +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -0,0 +1,99 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(cmap/kk,FixCMAPKokkos); +FixStyle(cmap/kk/device,FixCMAPKokkos); +FixStyle(cmap/kk/host,FixCMAPKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_CMAP_KOKKOS_H +#define LMP_FIX_CMAP_KOKKOS_H + +#include "fix_cmap.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixCMAPKokkos : public FixCMAP { + typedef ArrayTypes AT; + + public: + FixCMAPKokkos(class LAMMPS *, int, char **); + ~FixCMAPKokkos() override; + + void init() override; + void pre_neighbor() override; + void post_force(int) override; + + KOKKOS_INLINE_FUNCTION + //void operator()(const int) const; + void operator()(const int); + + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + void set_arrays(int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + protected: + typename AT::t_x_array d_x; + typename AT::t_f_array d_f; + //typename AT::t_int_1d d_type, d_mask; + + DAT::tdual_int_1d k_sametag; + typename AT::t_int_1d d_sametag; + int map_style; + DAT::tdual_int_1d k_map_array; + dual_hash_type k_map_hash; + + DAT::tdual_int_1d k_num_crossterm; + typename AT::t_int_1d d_num_crossterm; + + DAT::tdual_int_2d k_crosstermlist, k_crossterm_type; + typename AT::t_int_2d d_crosstermlist, d_crossterm_type; + + DAT::tdual_tagint_2d k_crossterm_atom1, k_crossterm_atom2, k_crossterm_atom3; + DAT::tdual_tagint_2d k_crossterm_atom4, k_crossterm_atom5; + typename AT::t_tagint_2d d_crossterm_atom1, d_crossterm_atom2, d_crossterm_atom3; + typename AT::t_tagint_2d d_crossterm_atom4, d_crossterm_atom5; + + DAT::tdual_float_1d k_g_axis; + typename AT::t_float_1d d_g_axis; + + DAT::tdual_float_3d k_cmapgrid, k_d1cmapgrid, k_d2cmapgrid, k_d12cmapgrid; + typename AT::t_float_3d d_cmapgrid, d_d1cmapgrid, d_d2cmapgrid, d_d12cmapgrid; + + // calculate bicubic interpolation coefficient matrix c_ij + KOKKOS_INLINE_FUNCTION + void bc_coeff(double *, double *, double *, double *, double **); + + // perform bicubic interpolation at point of interest + KOKKOS_INLINE_FUNCTION + void bc_interpol(double, double, int, int, double *, double *, double *, double *, + double &, double &, double &); + + // copied from Domain + KOKKOS_INLINE_FUNCTION + int closest_image(const int, int) const; + +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_CMAP_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index bc220da30e..b58e055e9b 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -46,18 +46,17 @@ #include #include +// FIXME: remove after debugging done +#include + using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -static constexpr int LISTDELTA = 10000; -static constexpr double LB_FACTOR = 1.5; +//static constexpr int LISTDELTA = 10000; +//static constexpr double LB_FACTOR = 1.5; + -static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom -static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 -static constexpr double CMAPXMIN = -360.0; -static constexpr double CMAPXMIN2 = -180.0; -static constexpr double CMAPDX = 15.0; // 360/CMAPDIM /* ---------------------------------------------------------------------- */ @@ -127,6 +126,9 @@ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : FixCMAP::~FixCMAP() { + + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,Atom::GROW); @@ -179,7 +181,7 @@ void FixCMAP::init() // pre-compute the derivatives of the maps - for (i = 0; i < 6; i++) + for (i = 0; i < CMAPMAX; i++) set_map_derivatives(cmapgrid[i],d1cmapgrid[i],d2cmapgrid[i],d12cmapgrid[i]); if (utils::strmatch(update->integrate_style,"^respa")) { @@ -242,7 +244,7 @@ void FixCMAP::pre_neighbor() if (maxcrossterm == 0) { if (nprocs == 1) maxcrossterm = ncmap; else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); - memory->create(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); + memory->create(crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); } int nlocal = atom->nlocal; @@ -274,7 +276,7 @@ void FixCMAP::pre_neighbor() i <= atom4 && i <= atom5) { if (ncrosstermlist == maxcrossterm) { maxcrossterm += LISTDELTA; - memory->grow(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); + memory->grow(crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); } crosstermlist[ncrosstermlist][0] = atom1; crosstermlist[ncrosstermlist][1] = atom2; @@ -314,7 +316,7 @@ void FixCMAP::post_force(int vflag) double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z; // cross-term dihedral angles double phi,psi,phi1,psi1; - double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[6]; + double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[CMAPMAX]; double gs[4],d1gs[4],d2gs[4],d12gs[4]; double engfraction; // vectors needed for the gradient/force calculation @@ -348,6 +350,9 @@ void FixCMAP::post_force(int vflag) i5 = crosstermlist[n][4]; type = crosstermlist[n][5]; + + //std::cerr << fmt::format("******** n={} i=[{},{},{},{},{}], type={}\n",n,i1,i2,i3,i4,i5,type); + if (type == 0) continue; // calculate bond vectors for both dihedrals @@ -426,6 +431,8 @@ void FixCMAP::post_force(int vflag) phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + std::cerr << fmt::format("******** n={} phi={}, psi={}\n", n, phi, psi); + if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; @@ -473,10 +480,19 @@ void FixCMAP::post_force(int vflag) d12gs[2] = d12cmapgrid[t1][mli11][mli21]; d12gs[3] = d12cmapgrid[t1][mli1][mli21]; + std::cerr << fmt::format("******** n={} gs=[{},{},{},{}]\n", n, gs[0],gs[1],gs[2],gs[3]); + std::cerr << fmt::format("******** n={} d1gs=[{},{},{},{}]\n", n, d1gs[0],d1gs[1],d1gs[2],d1gs[3]); + std::cerr << fmt::format("******** n={} d2gs=[{},{},{},{}]\n", n, d2gs[0],d2gs[1],d2gs[2],d2gs[3]); + std::cerr << fmt::format("******** n={} d12gs=[{},{},{},{}]\n", n, d12gs[0],d12gs[1],d12gs[2],d12gs[3]); + + // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs); + std::cerr << fmt::format("******** n={} dEdPhi={}, dEdPsi={}\n", n, dEdPhi, dEdPsi); + + // sum up cmap energy contributions engfraction = 0.2 * E; @@ -543,6 +559,8 @@ void FixCMAP::post_force(int vflag) f5[1] = -dEdPsi*dpsidr4y; f5[2] = -dEdPsi*dpsidr4z; + std::cerr << fmt::format("******** n={} f1=[{},{},{}]\n",n,f1[0],f1[1],f1[2]); + // apply force to each of the 5 atoms if (i1 < nlocal) { @@ -595,7 +613,12 @@ void FixCMAP::post_force(int vflag) ev_tally(nlist,list,5.0,E,vcmap); //ev_tally(5,list,nlocal,newton_bond,E,vcmap); } + + utils::logmesg(lmp, "post_force (n={})\n", n); + } + + } /* ---------------------------------------------------------------------- */ @@ -620,6 +643,7 @@ double FixCMAP::compute_scalar() { double all; MPI_Allreduce(&ecmap,&all,1,MPI_DOUBLE,MPI_SUM,world); + utils::logmesg(lmp, "compute_scalar = {}\n", all); return all; } @@ -654,7 +678,7 @@ void FixCMAP::read_grid_map(char *cmapfile) } } - MPI_Bcast(&cmapgrid[0][0][0],6*CMAPDIM*CMAPDIM,MPI_DOUBLE,0,world); + MPI_Bcast(&cmapgrid[0][0][0],CMAPMAX*CMAPDIM*CMAPDIM,MPI_DOUBLE,0,world); } /* ---------------------------------------------------------------------- */ @@ -907,6 +931,7 @@ void FixCMAP::bc_interpol(double x1, double x2, int low1, int low2, double *gs, E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; + std::cerr << fmt::format("******** cij[{}]=[{},{},{},{}]\n", i,cij[i][0],cij[i][1],cij[i][2],cij[i][3]); } dEdPhi *= (180.0/MY_PI/CMAPDX); diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h index 1c6aba95e0..36299a4663 100644 --- a/src/MOLECULE/fix_cmap.h +++ b/src/MOLECULE/fix_cmap.h @@ -21,8 +21,17 @@ FixStyle(cmap,FixCMAP); #define LMP_FIX_CMAP_H #include "fix.h" + namespace LAMMPS_NS { +#define CMAPMAX 6 // max # of CMAP terms stored by one atom +#define CMAPDIM 24 // grid map dimension is 24 x 24 +#define CMAPXMIN -360.0 +#define CMAPXMIN2 -180.0 +#define CMAPDX 15.0 // 360/CMAPDIM +#define LB_FACTOR 1.5 +#define LISTDELTA 10000 + class FixCMAP : public Fix { public: FixCMAP(class LAMMPS *, int, char **); @@ -64,7 +73,7 @@ class FixCMAP : public Fix { double memory_usage() override; - private: + protected: int eflag_caller; int ctype, ilevel_respa; int ncrosstermtypes, crossterm_per_atom, maxcrossterm; @@ -79,9 +88,8 @@ class FixCMAP : public Fix { tagint **crossterm_atom1, **crossterm_atom2, **crossterm_atom3; tagint **crossterm_atom4, **crossterm_atom5; - double E, dEdPhi, dEdPsi; double ecmap; - double fcmap[4], cij[4][4]; + //double fcmap[4]; FIXME: remove ? unused variable double *g_axis; // CMAP grid points obtained from external file @@ -121,6 +129,10 @@ class FixCMAP : public Fix { // perform bicubic interpolation at point of interest void bc_interpol(double, double, int, int, double *, double *, double *, double *); + + private: + double E, dEdPhi, dEdPsi, cij[4][4]; + }; } // namespace LAMMPS_NS From 3da8e9a9a58acf51afc195884c79c9a0ad6e10e8 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 31 Jul 2024 12:59:24 -0400 Subject: [PATCH 006/294] add cmap unit test --- unittest/force-styles/tests/data.cmap | 229 ++++++++++++++++++ .../force-styles/tests/fix-timestep-cmap.yaml | 80 ++++++ unittest/force-styles/tests/in.cmap | 33 +++ 3 files changed, 342 insertions(+) create mode 100644 unittest/force-styles/tests/data.cmap create mode 100644 unittest/force-styles/tests/fix-timestep-cmap.yaml create mode 100644 unittest/force-styles/tests/in.cmap diff --git a/unittest/force-styles/tests/data.cmap b/unittest/force-styles/tests/data.cmap new file mode 100644 index 0000000000..c147dc8d3e --- /dev/null +++ b/unittest/force-styles/tests/data.cmap @@ -0,0 +1,229 @@ +LAMMPS data file via write_data, version 5 May 2020, timestep = 0 + +29 atoms +5 atom types +24 bonds +5 bond types +30 angles +4 angle types +31 dihedrals +5 dihedral types +2 impropers +2 improper types +2 crossterms + + -6.024572 8.975428 xlo xhi + -7.692866 7.307134 ylo yhi + -8.086924 6.913076 zlo zhi + +Masses + +1 12.0107 +2 4.00794 +3 14.0067 +4 15.9994 +5 15.9994 + +Pair Coeffs # zero + +1 +2 +3 +4 +5 + +Bond Coeffs # zero + +1 1.5 +2 1.1 +3 1.3 +4 1.2 +5 1 + +Angle Coeffs # zero + +1 110.1 +2 111 +3 120 +4 108.5 + +Dihedral Coeffs # zero + +1 +2 +3 +4 +5 + +Improper Coeffs # zero + +1 +2 + +Atoms # full + +10 2 1 7.0000000000000007e-02 2.0185283555536988e+00 -1.4283966846517357e+00 -9.6733527271133024e-01 0 0 0 +11 2 2 8.9999999999999997e-02 1.7929780509347666e+00 -1.9871047540768743e+00 -1.8840626643185674e+00 0 0 0 +12 2 1 -2.7000000000000002e-01 3.0030247876861225e+00 -4.8923319967572748e-01 -1.6188658531537248e+00 0 0 0 +13 2 2 8.9999999999999997e-02 4.0447273787895934e+00 -9.0131998547446246e-01 -1.6384447268320836e+00 0 0 0 +14 2 2 8.9999999999999997e-02 2.6033152817257075e+00 -4.0789761505963579e-01 -2.6554413538823063e+00 0 0 0 + 2 1 2 3.1000000000000000e-01 3.0197083955402204e-01 2.9515239068888608e+00 -8.5689735572907566e-01 0 0 0 + 3 1 1 -2.0000000000000000e-02 -6.9435377880558602e-01 1.2440473127136711e+00 -6.2233801468892025e-01 0 0 0 + 4 1 2 8.9999999999999997e-02 -1.5771614164685133e+00 1.4915333140468066e+00 -1.2487126845040522e+00 0 0 0 + 6 1 1 5.1000000000000001e-01 2.9412607937706009e-01 2.2719282656652909e-01 -1.2843094067857870e+00 0 0 0 + 7 1 4 -5.1000000000000001e-01 3.4019871062879609e-01 -9.1277350075786561e-03 -2.4633113224304561e+00 0 0 0 +19 3 2 4.2359999999999998e-01 1.5349125211132961e+00 2.6315969880333707e+00 -4.2472859440220647e+00 0 0 0 +15 2 2 8.9999999999999997e-02 2.9756315249791303e+00 5.6334269722969288e-01 -1.2437650754599008e+00 0 0 0 +18 3 4 -8.4719999999999995e-01 2.1384791188033843e+00 3.0177261773770208e+00 -3.5160827596876225e+00 0 0 0 +20 3 2 4.2359999999999998e-01 2.7641167828863153e+00 3.6833419064000221e+00 -3.9380850623312638e+00 0 0 0 + 8 2 3 -4.6999999999999997e-01 1.1641187171852805e+00 -4.8375305955385234e-01 -6.7659823767368688e-01 0 0 0 + 9 2 2 3.1000000000000000e-01 1.3777459838125838e+00 -2.5366338669522998e-01 2.6877644730326306e-01 0 0 0 +16 2 1 5.1000000000000001e-01 2.6517554244980306e+00 -2.3957110424978438e+00 3.2908335999178327e-02 0 0 0 +17 2 4 -5.1000000000000001e-01 2.2309964792710639e+00 -2.1022918943319384e+00 1.1491948328949437e+00 0 0 0 + 1 1 3 -4.6999999999999997e-01 -2.7993683669226832e-01 2.4726588069312840e+00 -1.7200860244148433e-01 0 0 0 + 5 1 2 8.9999999999999997e-02 -8.9501761359359255e-01 9.3568128743071344e-01 4.0227731871484346e-01 0 0 0 +21 4 5 -8.4719999999999995e-01 4.9064454390208301e+00 -4.0751205255383196e+00 -3.6215576073601046e+00 0 0 0 +22 4 2 4.2359999999999998e-01 4.3687453488627543e+00 -4.2054270536772504e+00 -4.4651491269372565e+00 0 0 0 +23 4 2 4.2359999999999998e-01 5.7374928154769504e+00 -3.5763355905184966e+00 -3.8820297194230728e+00 0 0 0 +24 5 5 -8.4719999999999995e-01 2.0684115301174013e+00 3.1518221747664397e+00 3.1554242678474576e+00 0 0 0 +25 5 2 4.2359999999999998e-01 1.2998381073113014e+00 3.2755513587518097e+00 2.5092990173114837e+00 0 0 0 +26 5 2 4.2359999999999998e-01 2.5807438597688113e+00 4.0120175892854135e+00 3.2133398379059099e+00 0 0 0 +27 6 5 -8.4719999999999995e-01 -1.9613581876744359e+00 -4.3556300596085160e+00 2.1101467673534788e+00 0 0 0 +28 6 2 4.2359999999999998e-01 -2.7406520384725965e+00 -4.0207251278130975e+00 1.5828689861678511e+00 0 0 0 +29 6 2 4.2359999999999998e-01 -1.3108232656499081e+00 -3.5992986322410760e+00 2.2680459788743503e+00 0 0 0 + +Velocities + +1 7.7867804888392077e-04 5.8970331623292821e-04 -2.2179517633030531e-04 +2 2.7129529964126462e-03 4.6286427111164284e-03 3.5805549693846352e-03 +3 -1.2736791029204805e-03 1.6108674226414498e-03 -3.3618185901550799e-04 +4 -9.2828595122009308e-04 -1.2537885319521818e-03 -4.1204974953432108e-03 +5 -1.1800848061603740e-03 7.5424401975844038e-04 6.9023177964912290e-05 +6 -3.0914004879905335e-04 1.2755385764678133e-03 7.9574303350202582e-04 +7 -1.1037894966874103e-04 -7.6764845099077425e-04 -7.7217630460203659e-04 +8 3.9060281273221989e-04 -8.1444231918053418e-04 1.5134641148324972e-04 +9 1.2475530960659720e-03 -2.6608454451432528e-03 1.1117602907112732e-03 +10 4.5008983776042893e-04 4.9530197647538077e-04 -2.3336234361093645e-04 +11 -3.6977669078869707e-04 -1.5289071951960539e-03 -2.9176389881837113e-03 +12 1.0850834530183159e-03 -6.4965897903201833e-04 -1.2971152622619948e-03 +13 4.0754559196230639e-03 3.5043502394946119e-03 -7.8324487687854666e-04 +14 -1.3837220448746613e-04 -4.0656048637594394e-03 -3.9333461173944500e-03 +15 -4.3301707382721859e-03 -3.1802661664634938e-03 3.2037919043360571e-03 +16 -9.6715751018414326e-05 -5.0016572678960377e-04 1.4945658875149626e-03 +17 6.5692180538157174e-04 3.6635779995305095e-04 8.3495414466050911e-04 +18 -6.0936815808025862e-04 -9.3774557532468582e-04 -3.3558072507805731e-04 +19 -6.9919768291957119e-04 -3.6060777270430031e-03 4.2833405289822791e-03 +20 4.7777805013736515e-03 5.1003745845520452e-03 1.8002873923729241e-03 +21 -9.5568188553430398e-04 1.6594630943762931e-04 -1.8199788009966615e-04 +22 -3.3137518957653462e-03 -2.8683968287936054e-03 3.6384389958326871e-03 +23 2.4209481134686401e-04 -4.5457709985051130e-03 2.7663581642115042e-03 +24 2.5447450568861086e-04 4.8412447786110117e-04 -4.8021914527341357e-04 +25 4.3722771097312743e-03 -4.5184411669545515e-03 2.5200952006556795e-03 +26 -1.9250110555001179e-03 -3.0342169883610837e-03 3.5062814567984532e-03 +27 -2.6510179146429716e-04 3.6306203629019116e-04 -5.6235585400647747e-04 +28 -2.3068708109787484e-04 -8.5663070212203200e-04 2.1302563179109169e-03 +29 -2.5054744388303732e-03 -1.6773997805290820e-04 2.8436699761004796e-03 + +Bonds + +1 5 1 2 +2 3 1 3 +3 2 3 4 +4 2 3 5 +5 1 3 6 +6 3 6 8 +7 4 6 7 +8 5 8 9 +9 3 8 10 +10 2 10 11 +11 1 10 12 +12 1 10 16 +13 2 12 13 +14 2 12 14 +15 2 12 15 +16 4 16 17 +17 5 18 19 +18 5 18 20 +19 5 21 22 +20 5 21 23 +21 5 24 25 +22 5 24 26 +23 5 27 28 +24 5 27 29 + +Angles + +1 4 2 1 3 +2 4 1 3 5 +3 4 1 3 4 +4 4 1 3 6 +5 4 4 3 5 +6 2 5 3 6 +7 2 4 3 6 +8 3 3 6 7 +9 3 3 6 8 +10 3 7 6 8 +11 2 6 8 9 +12 2 9 8 10 +13 3 6 8 10 +14 2 8 10 11 +15 3 8 10 16 +16 2 11 10 12 +17 1 12 10 16 +18 1 8 10 12 +19 2 11 10 16 +20 2 10 12 15 +21 2 10 12 14 +22 2 10 12 13 +23 4 13 12 15 +24 4 13 12 14 +25 4 14 12 15 +26 4 10 16 17 +27 1 19 18 20 +28 1 22 21 23 +29 1 25 24 26 +30 1 28 27 29 + +Dihedrals + +1 2 2 1 3 6 +2 2 2 1 3 4 +3 3 2 1 3 5 +4 1 1 3 6 8 +5 1 1 3 6 7 +6 5 4 3 6 8 +7 5 4 3 6 7 +8 5 5 3 6 8 +9 5 5 3 6 7 +10 4 3 6 8 9 +11 3 3 6 8 10 +12 3 7 6 8 9 +13 4 7 6 8 10 +14 2 6 8 10 12 +15 2 6 8 10 16 +16 2 6 8 10 11 +17 2 9 8 10 12 +18 4 9 8 10 16 +19 5 9 8 10 11 +20 5 8 10 12 13 +21 1 8 10 12 14 +22 5 8 10 12 15 +23 4 8 10 16 17 +24 5 11 10 12 13 +25 5 11 10 12 14 +26 5 11 10 12 15 +27 2 11 10 16 17 +28 2 12 10 16 17 +29 5 16 10 12 13 +30 5 16 10 12 14 +31 5 16 10 12 15 + +Impropers + +1 1 6 3 8 7 +2 2 8 6 10 9 + +CMAP + +1 1 8 10 12 18 20 +2 5 18 20 22 25 27 diff --git a/unittest/force-styles/tests/fix-timestep-cmap.yaml b/unittest/force-styles/tests/fix-timestep-cmap.yaml new file mode 100644 index 0000000000..148ac7766c --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-cmap.yaml @@ -0,0 +1,80 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Wed Jul 31 05:33:32 2024 +epsilon: 2e-14 +skip_tests: +prerequisites: ! | + atom full + fix cmap +pre_commands: ! "" +post_commands: ! | + fix move all nve + fix test all cmap charmm36.cmap + fix_modify test energy yes +input_file: in.cmap +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +run_pos: ! |2 + 1 -2.7960957478828119e-01 2.4731013564911359e+00 -1.7197055808974271e-01 + 2 3.0268926383541106e-01 2.9526877236199844e+00 -8.5607741997777531e-01 + 3 -6.9478589336055918e-01 1.2441536889866271e+00 -6.2251237407370763e-01 + 4 -1.5774369590204109e+00 1.4911372570011669e+00 -1.2496944936030625e+00 + 5 -8.9546513497723546e-01 9.3570244722231344e-01 4.0225073865903394e-01 + 6 2.9323909171637064e-01 2.2849726471998921e-01 -1.2829633671249177e+00 + 7 3.4020167981158811e-01 -9.5631325556748970e-03 -2.4648145585246484e+00 + 8 1.1644032977505605e+00 -4.8402440388979490e-01 -6.7588680768764176e-01 + 9 1.3780470634871482e+00 -2.5423316933384821e-01 2.6922899960539104e-01 + 10 2.0191822499194449e+00 -1.4288375504493047e+00 -9.6763942208902598e-01 + 11 1.7928154959364644e+00 -1.9875167360214077e+00 -1.8847766479050614e+00 + 12 3.0033116656084555e+00 -4.8939013551801663e-01 -1.6192210928693009e+00 + 13 4.0457260650376039e+00 -9.0040839919745463e-01 -1.6386403150651319e+00 + 14 2.6033371010206183e+00 -4.0894628204766315e-01 -2.6564291042119756e+00 + 15 2.9745900539681318e+00 5.6250239395205237e-01 -1.2429126492679963e+00 + 16 2.6521825338274856e+00 -2.3961419746746384e+00 3.2108269763582269e-02 + 17 2.2308266599935420e+00 -2.1019686212613760e+00 1.1502963272389120e+00 + 18 2.1383188712362360e+00 3.0174856158421539e+00 -3.5161936594860612e+00 + 19 1.5347725145708542e+00 2.6307139052757385e+00 -4.2461559657929859e+00 + 20 2.7653076795372340e+00 3.6846222250909149e+00 -3.9375881976793163e+00 + 21 4.9061987235524729e+00 -4.0750856834942697e+00 -3.6215902515825067e+00 + 22 4.3678559615763275e+00 -4.2061680055551145e+00 -4.4642895781639469e+00 + 23 5.7376466626528462e+00 -3.5774216674350545e+00 -3.8813376792737415e+00 + 24 2.0684733913369224e+00 3.1519226166331364e+00 3.1553109785483522e+00 + 25 1.3008759192930202e+00 3.2744188053963823e+00 2.5098873122526379e+00 + 26 2.5803241685977909e+00 4.0113444045765041e+00 3.2142304818391287e+00 + 27 -1.9614122355901609e+00 -4.3555618328139021e+00 2.1100192128502622e+00 + 28 -2.7408085136690148e+00 -4.0209023016060552e+00 1.5833361911314168e+00 + 29 -1.3113987517049834e+00 -3.5992879715429198e+00 2.2687710339701930e+00 +run_vel: ! |2 + 1 1.8357884460742731e-03 2.9421891809309235e-03 5.2384381057974576e-04 + 2 3.0339076323099138e-03 4.6794102525867737e-03 2.9775263255857258e-03 + 3 -2.1811928064590187e-03 -7.4684694907251135e-04 -1.0535152908069134e-03 + 4 -1.2752386069489895e-03 -1.9130739446192322e-03 -3.7312914387881177e-03 + 5 -2.4016730976801626e-03 -5.8395023339643008e-04 -2.8317392625797067e-04 + 6 -6.7329173570415558e-03 9.0774093771392641e-03 9.7539044284917012e-03 + 7 1.2999303320320067e-04 -2.6824818088661915e-03 -1.1064385809705708e-02 + 8 1.8584246404331885e-03 -1.3400172041949363e-03 5.5049582245146229e-03 + 9 1.1635486325108216e-03 -1.8972361797494706e-03 2.5149816841734955e-03 + 10 4.7657693629541455e-03 -4.0062074342253401e-03 -2.1989049659389844e-03 + 11 -9.2920886333545580e-04 -1.7649214093740193e-03 -2.7906825035643196e-03 + 12 1.2099834710459008e-03 -6.0766664653779138e-04 -1.5455288455127985e-03 + 13 3.9115512319669709e-03 3.7883034915414115e-03 -7.8083665715802345e-04 + 14 3.1474964889761757e-04 -4.3199984752753566e-03 -3.9634731211288876e-03 + 15 -4.0016733023944184e-03 -3.5391950448078408e-03 3.6119622299224763e-03 + 16 3.4687668849148145e-03 -2.9176920403455979e-03 -7.7752228821476215e-03 + 17 -1.9813219464918548e-03 2.1970157651489386e-03 7.8875797890837103e-03 + 18 -6.6853137661653178e-04 -9.8326929599256503e-04 -5.4994204231171629e-04 + 19 -4.2304216640272113e-04 -3.4584846552286048e-03 4.7475482017100270e-03 + 20 4.7353375203586399e-03 5.1283070395151554e-03 2.1770619053484647e-03 + 21 -1.0192501824028387e-03 1.1167604918628844e-04 -7.6248280732632811e-05 + 22 -3.8015036794028234e-03 -3.0580779746802378e-03 3.2318199863478111e-03 + 23 9.9367095021362642e-04 -4.1395365418028232e-03 2.7644946489666354e-03 + 24 2.4311633374394557e-04 3.1447120784903294e-04 -4.2211250397061664e-04 + 25 3.9137860947859445e-03 -4.5365166679637295e-03 2.1711336306764846e-03 + 26 -1.4268708871173230e-03 -2.3370113183148592e-03 3.6181000484758448e-03 + 27 -1.6673719591314985e-04 1.8117429711863242e-04 -4.5749203077013730e-04 + 28 -1.0253930702737823e-03 -5.5815646467655806e-04 1.6045473388179754e-03 + 29 -2.0963062409570417e-03 2.5671965105012856e-04 2.9572613070049606e-03 +... diff --git a/unittest/force-styles/tests/in.cmap b/unittest/force-styles/tests/in.cmap new file mode 100644 index 0000000000..6a731ea759 --- /dev/null +++ b/unittest/force-styles/tests/in.cmap @@ -0,0 +1,33 @@ +variable newton_pair index on +variable newton_bond index on +variable bond_factor index 0.10 +variable angle_factor index 0.25 +variable dihedral_factor index 0.50 +variable units index real +variable input_dir index . +variable data_file index ${input_dir}/data.cmap +variable pair_style index 'zero 8.0' +variable bond_style index zero +variable angle_style index zero +variable dihedral_style index zero +variable improper_style index zero +variable t_target index 100.0 + +atom_style full +atom_modify map array +neigh_modify delay 2 every 2 check no +units ${units} +timestep 0.1 +newton ${newton_pair} ${newton_bond} +special_bonds lj/coul ${bond_factor} ${angle_factor} ${dihedral_factor} + +pair_style ${pair_style} +bond_style ${bond_style} +angle_style ${angle_style} +dihedral_style ${dihedral_style} +improper_style ${improper_style} + +fix cmap all cmap charmm36.cmap +fix_modify cmap energy yes + +read_data ${data_file} fix cmap crossterm CMAP From fbf1451a2f0115fb759185b03323c110b9f3279a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 1 Aug 2024 01:28:57 -0400 Subject: [PATCH 007/294] add more crossterms to cmap unit test --- unittest/force-styles/tests/data.cmap | 7 +- .../force-styles/tests/fix-timestep-cmap.yaml | 118 +++++++++--------- 2 files changed, 64 insertions(+), 61 deletions(-) diff --git a/unittest/force-styles/tests/data.cmap b/unittest/force-styles/tests/data.cmap index c147dc8d3e..ad86b1c3ed 100644 --- a/unittest/force-styles/tests/data.cmap +++ b/unittest/force-styles/tests/data.cmap @@ -10,7 +10,7 @@ LAMMPS data file via write_data, version 5 May 2020, timestep = 0 5 dihedral types 2 impropers 2 improper types -2 crossterms +5 crossterms -6.024572 8.975428 xlo xhi -7.692866 7.307134 ylo yhi @@ -226,4 +226,7 @@ Impropers CMAP 1 1 8 10 12 18 20 -2 5 18 20 22 25 27 +2 2 18 20 22 25 27 +3 3 2 4 5 6 7 +4 4 10 11 12 13 14 +5 5 5 10 15 20 25 diff --git a/unittest/force-styles/tests/fix-timestep-cmap.yaml b/unittest/force-styles/tests/fix-timestep-cmap.yaml index 148ac7766c..86e21a891d 100644 --- a/unittest/force-styles/tests/fix-timestep-cmap.yaml +++ b/unittest/force-styles/tests/fix-timestep-cmap.yaml @@ -1,7 +1,7 @@ --- lammps_version: 27 Jun 2024 tags: generated -date_generated: Wed Jul 31 05:33:32 2024 +date_generated: Thu Aug 1 00:19:04 2024 epsilon: 2e-14 skip_tests: prerequisites: ! | @@ -18,63 +18,63 @@ run_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 global_scalar: 0 run_pos: ! |2 - 1 -2.7960957478828119e-01 2.4731013564911359e+00 -1.7197055808974271e-01 - 2 3.0268926383541106e-01 2.9526877236199844e+00 -8.5607741997777531e-01 - 3 -6.9478589336055918e-01 1.2441536889866271e+00 -6.2251237407370763e-01 - 4 -1.5774369590204109e+00 1.4911372570011669e+00 -1.2496944936030625e+00 - 5 -8.9546513497723546e-01 9.3570244722231344e-01 4.0225073865903394e-01 - 6 2.9323909171637064e-01 2.2849726471998921e-01 -1.2829633671249177e+00 - 7 3.4020167981158811e-01 -9.5631325556748970e-03 -2.4648145585246484e+00 - 8 1.1644032977505605e+00 -4.8402440388979490e-01 -6.7588680768764176e-01 - 9 1.3780470634871482e+00 -2.5423316933384821e-01 2.6922899960539104e-01 - 10 2.0191822499194449e+00 -1.4288375504493047e+00 -9.6763942208902598e-01 - 11 1.7928154959364644e+00 -1.9875167360214077e+00 -1.8847766479050614e+00 - 12 3.0033116656084555e+00 -4.8939013551801663e-01 -1.6192210928693009e+00 - 13 4.0457260650376039e+00 -9.0040839919745463e-01 -1.6386403150651319e+00 - 14 2.6033371010206183e+00 -4.0894628204766315e-01 -2.6564291042119756e+00 - 15 2.9745900539681318e+00 5.6250239395205237e-01 -1.2429126492679963e+00 - 16 2.6521825338274856e+00 -2.3961419746746384e+00 3.2108269763582269e-02 - 17 2.2308266599935420e+00 -2.1019686212613760e+00 1.1502963272389120e+00 - 18 2.1383188712362360e+00 3.0174856158421539e+00 -3.5161936594860612e+00 - 19 1.5347725145708542e+00 2.6307139052757385e+00 -4.2461559657929859e+00 - 20 2.7653076795372340e+00 3.6846222250909149e+00 -3.9375881976793163e+00 - 21 4.9061987235524729e+00 -4.0750856834942697e+00 -3.6215902515825067e+00 - 22 4.3678559615763275e+00 -4.2061680055551145e+00 -4.4642895781639469e+00 - 23 5.7376466626528462e+00 -3.5774216674350545e+00 -3.8813376792737415e+00 - 24 2.0684733913369224e+00 3.1519226166331364e+00 3.1553109785483522e+00 - 25 1.3008759192930202e+00 3.2744188053963823e+00 2.5098873122526379e+00 - 26 2.5803241685977909e+00 4.0113444045765041e+00 3.2142304818391287e+00 - 27 -1.9614122355901609e+00 -4.3555618328139021e+00 2.1100192128502622e+00 - 28 -2.7408085136690148e+00 -4.0209023016060552e+00 1.5833361911314168e+00 - 29 -1.3113987517049834e+00 -3.5992879715429198e+00 2.2687710339701930e+00 + 1 -2.7045576931365384e-01 2.4912153915127910e+00 -1.6695660174193144e-01 + 2 3.1006650885653392e-01 2.9612066384818774e+00 -8.5468359240877056e-01 + 3 -7.0398718670996596e-01 1.2305509689997693e+00 -6.2777234357568623e-01 + 4 -1.5814449350624078e+00 1.4843404724798535e+00 -1.2538273456433655e+00 + 5 -9.0783243756685006e-01 9.2526534460922938e-01 3.9949965943785426e-01 + 6 2.4859337464110062e-01 2.8395437677292801e-01 -1.2315849919995718e+00 + 7 3.4129121643837462e-01 -2.3102788529791828e-02 -2.5291407998329900e+00 + 8 1.1743406680717965e+00 -4.8860189094234913e-01 -6.3780684414657063e-01 + 9 1.3800528609303513e+00 -2.5274652114108015e-01 2.8354186861628400e-01 + 10 2.0510776838326117e+00 -1.4602212365720617e+00 -9.8289749648832170e-01 + 11 1.7878063062190042e+00 -1.9921840498876129e+00 -1.8890528117809133e+00 + 12 3.0062653102416346e+00 -4.9030348819064951e-01 -1.6234817573863822e+00 + 13 4.0515398561601499e+00 -8.9202280298994308e-01 -1.6400070473765287e+00 + 14 2.6066954671851068e+00 -4.1789389390575277e-01 -2.6634066414774398e+00 + 15 2.9697386898129197e+00 5.5405474601205984e-01 -1.2343532907729176e+00 + 16 2.6747029564056741e+00 -2.4124117273842192e+00 -2.3434860532736367e-02 + 17 2.2153579387356999e+00 -2.0897987524705992e+00 1.1963152377872239e+00 + 18 2.1369285315978819e+00 3.0156108277459790e+00 -3.5183940657539963e+00 + 19 1.5355811460020863e+00 2.6255306350384799e+00 -4.2354168844939002e+00 + 20 2.7727385869610495e+00 3.6933911950960656e+00 -3.9313456335665453e+00 + 21 4.9040149976454908e+00 -4.0752342739930612e+00 -3.6210280393155685e+00 + 22 4.3584283483000474e+00 -4.2126170417598745e+00 -4.4609852540369923e+00 + 23 5.7439382608753773e+00 -3.5821957713386881e+00 -3.8766362488100117e+00 + 24 2.0689237180918769e+00 3.1513348704499196e+00 3.1550384095102570e+00 + 25 1.3045090135211659e+00 3.2665689836321810e+00 2.5111204914634193e+00 + 26 2.5809239161761726e+00 4.0117601377202847e+00 3.2212062405016724e+00 + 27 -1.9611007896081207e+00 -4.3563573211261462e+00 2.1098614022771494e+00 + 28 -2.7473545914982185e+00 -4.0200829741975630e+00 1.5830064034427631e+00 + 29 -1.3125994707851243e+00 -3.5962514442513154e+00 2.2746344518754498e+00 run_vel: ! |2 - 1 1.8357884460742731e-03 2.9421891809309235e-03 5.2384381057974576e-04 - 2 3.0339076323099138e-03 4.6794102525867737e-03 2.9775263255857258e-03 - 3 -2.1811928064590187e-03 -7.4684694907251135e-04 -1.0535152908069134e-03 - 4 -1.2752386069489895e-03 -1.9130739446192322e-03 -3.7312914387881177e-03 - 5 -2.4016730976801626e-03 -5.8395023339643008e-04 -2.8317392625797067e-04 - 6 -6.7329173570415558e-03 9.0774093771392641e-03 9.7539044284917012e-03 - 7 1.2999303320320067e-04 -2.6824818088661915e-03 -1.1064385809705708e-02 - 8 1.8584246404331885e-03 -1.3400172041949363e-03 5.5049582245146229e-03 - 9 1.1635486325108216e-03 -1.8972361797494706e-03 2.5149816841734955e-03 - 10 4.7657693629541455e-03 -4.0062074342253401e-03 -2.1989049659389844e-03 - 11 -9.2920886333545580e-04 -1.7649214093740193e-03 -2.7906825035643196e-03 - 12 1.2099834710459008e-03 -6.0766664653779138e-04 -1.5455288455127985e-03 - 13 3.9115512319669709e-03 3.7883034915414115e-03 -7.8083665715802345e-04 - 14 3.1474964889761757e-04 -4.3199984752753566e-03 -3.9634731211288876e-03 - 15 -4.0016733023944184e-03 -3.5391950448078408e-03 3.6119622299224763e-03 - 16 3.4687668849148145e-03 -2.9176920403455979e-03 -7.7752228821476215e-03 - 17 -1.9813219464918548e-03 2.1970157651489386e-03 7.8875797890837103e-03 - 18 -6.6853137661653178e-04 -9.8326929599256503e-04 -5.4994204231171629e-04 - 19 -4.2304216640272113e-04 -3.4584846552286048e-03 4.7475482017100270e-03 - 20 4.7353375203586399e-03 5.1283070395151554e-03 2.1770619053484647e-03 - 21 -1.0192501824028387e-03 1.1167604918628844e-04 -7.6248280732632811e-05 - 22 -3.8015036794028234e-03 -3.0580779746802378e-03 3.2318199863478111e-03 - 23 9.9367095021362642e-04 -4.1395365418028232e-03 2.7644946489666354e-03 - 24 2.4311633374394557e-04 3.1447120784903294e-04 -4.2211250397061664e-04 - 25 3.9137860947859445e-03 -4.5365166679637295e-03 2.1711336306764846e-03 - 26 -1.4268708871173230e-03 -2.3370113183148592e-03 3.6181000484758448e-03 - 27 -1.6673719591314985e-04 1.8117429711863242e-04 -4.5749203077013730e-04 - 28 -1.0253930702737823e-03 -5.5815646467655806e-04 1.6045473388179754e-03 - 29 -2.0963062409570417e-03 2.5671965105012856e-04 2.9572613070049606e-03 + 1 8.1702074645354263e-03 1.6515202117650986e-02 4.7941469336088534e-03 + 2 5.4793033469769841e-03 5.1464824735920319e-03 -1.4591356769853548e-03 + 3 -8.2335058988087587e-03 -1.2926663429897282e-02 -4.0922324315820231e-03 + 4 -3.3968512986425896e-03 -5.9586658523124144e-03 -1.0756650770777734e-03 + 5 -1.1658826051209279e-02 -1.1193993209032561e-02 -2.8787337175040129e-03 + 6 -3.9380810946715861e-02 4.7658078376635958e-02 3.6984166084389578e-02 + 7 7.6859250359065487e-04 -1.0596844346008383e-02 -5.1474108818315739e-02 + 8 7.8658367472618524e-03 -3.3020166182538553e-03 3.4576616847351263e-02 + 9 1.5651759825792155e-03 3.7379367797178631e-03 1.5051270508251535e-02 + 10 2.9209115027410210e-02 -2.9079636336334622e-02 -1.4693353834959967e-02 + 11 -4.7791036632847351e-03 -3.7420517412193981e-03 -2.3314072395415053e-03 + 12 2.2371540751205394e-03 -5.1178724927781723e-04 -3.3452354485908379e-03 + 13 2.7521535591674470e-03 5.8111658360700744e-03 -8.0472550021755981e-04 + 14 3.5228591162840489e-03 -5.7968032496769481e-03 -3.9605392790085618e-03 + 15 -1.6484125488189587e-03 -6.0254944267966555e-03 6.1452231213603593e-03 + 16 1.8681533496201778e-02 -1.3262182433081761e-02 -4.5636846184444781e-02 + 17 -1.2895956946875307e-02 9.7523042076136327e-03 3.7296862271497824e-02 + 18 -8.3844604949622544e-04 -1.0959331979630918e-03 -1.8883231141051712e-03 + 19 1.2400642737389216e-03 -2.5031680516680289e-03 7.2633208828032422e-03 + 20 3.5619032388416552e-03 4.6664333858458936e-03 4.9145981095079193e-03 + 21 -1.4645071159471662e-03 -2.7242209180838758e-04 7.1272665704736585e-04 + 22 -6.8856986268787035e-03 -4.2649670960839070e-03 5.6565289286038072e-04 + 23 6.0446701004681610e-03 -1.3999558207043038e-03 2.5817272842782119e-03 + 24 3.1797174259548137e-04 -9.9409313510120316e-04 1.4885702447403561e-04 + 25 1.1518919433985852e-04 -4.3777019831790272e-03 -8.8058800262529524e-04 + 26 2.0489472664324440e-03 2.7810807643201753e-03 4.3249553258435623e-03 + 27 4.8891848045180331e-04 -1.0464891567315256e-03 2.4353637884337831e-04 + 28 -6.2510920436768891e-03 1.4107986848621819e-03 -1.8406053609070112e-03 + 29 6.4221263686005782e-04 3.1280619277518889e-03 3.7257842641040153e-03 ... From 4bec1788a626435362da10f2d5af7d5c0a628a70 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 1 Aug 2024 01:31:11 -0400 Subject: [PATCH 008/294] make operator and inline functions const, plus remove printf debugging --- src/KOKKOS/fix_cmap_kokkos.cpp | 151 ++++++++++++--------------------- src/KOKKOS/fix_cmap_kokkos.h | 9 +- src/MOLECULE/fix_cmap.cpp | 22 +---- src/MOLECULE/fix_cmap.h | 3 +- 4 files changed, 59 insertions(+), 126 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 18c47439d2..960db293d9 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -49,7 +49,6 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_modify = F_MASK; // allocate memory for CMAP data - memoryKK->create_kokkos(k_g_axis,g_axis,CMAPDIM,"cmap:g_axis"); memoryKK->create_kokkos(k_cmapgrid,cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:grid"); memoryKK->create_kokkos(k_d1cmapgrid,d1cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d1grid"); @@ -62,7 +61,51 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : d_d2cmapgrid = k_d2cmapgrid.template view(); d_d12cmapgrid = k_d12cmapgrid.template view(); + // read and setup CMAP data + read_grid_map(arg[3]); + + int i = 0; + double angle = -180.0; + + while (angle < 180.0) { + g_axis[i] = angle; + angle += CMAPDX; + i++; + } + FixCMAPKokkos::grow_arrays(atom->nmax); + + for( int i=0 ; i(); + k_cmapgrid.template sync(); + k_d1cmapgrid.template sync(); + k_d2cmapgrid.template sync(); + k_d12cmapgrid.template sync(); + } /* ---------------------------------------------------------------------- */ @@ -95,38 +138,8 @@ FixCMAPKokkos::~FixCMAPKokkos() template void FixCMAPKokkos::init() { - FixCMAP::init(); - if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot yet use respa with Kokkos"); - - for( int i=0 ; i(); - k_cmapgrid.template sync(); - k_d1cmapgrid.template sync(); - k_d2cmapgrid.template sync(); - k_d12cmapgrid.template sync(); } /* ---------------------------------------------------------------------- @@ -148,7 +161,6 @@ void FixCMAPKokkos::pre_neighbor() if (nprocs == 1) maxcrossterm = ncmap; else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); memoryKK->create_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); - d_crosstermlist = k_crosstermlist.template view(); } @@ -217,38 +229,13 @@ void FixCMAPKokkos::pre_neighbor() compute CMAP terms as if newton_bond = OFF, even if actually ON ------------------------------------------------------------------------- */ -/* -template -void FixCMAPKokkos::post_force(int vflag) -{ - - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); - - d_x = atomKK->k_x.view(); - d_f = atomKK->k_f.view(); - d_type = atomKK->k_type.view(); - d_mask = atomKK->k_mask.view(); - int nlocal = atomKK->nlocal; - if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - - copymode = 1; - - Kokkos::parallel_for(nlocal, *this); - - copymode = 0; - -} -*/ - template void FixCMAPKokkos::post_force(int vflag) { d_x = atomKK->k_x.template view(); d_f = atomKK->k_f.template view(); - //atomKK->sync(execution_space,X_MASK|F_MASK); - atomKK->sync(execution_space,ALL_MASK); + atomKK->sync(execution_space,X_MASK|F_MASK); k_crosstermlist.template sync(); ecmap = 0.0; @@ -256,30 +243,19 @@ void FixCMAPKokkos::post_force(int vflag) ev_init(eflag,vflag); copymode = 1; - //Kokkos::parallel_for(ncrosstermlist, *this); - - for ( int n = 0; n < ncrosstermlist; n++) - operator()(n); - + Kokkos::parallel_for(ncrosstermlist, *this); copymode = 0; - - //atomKK->modified(execution_space,F_MASK); - atomKK->modified(execution_space,ALL_MASK); + atomKK->modified(execution_space,F_MASK); } /* ---------------------------------------------------------------------- */ - - template KOKKOS_INLINE_FUNCTION -//void FixCMAPKokkos::operator()(const int n) const -void FixCMAPKokkos::operator()(const int n) +void FixCMAPKokkos::operator()(const int n) const { - //std::cerr << "post_force (n=" << n << ")\n"; - int i1,i2,i3,i4,i5,type,nlist; int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; int list[5]; @@ -293,7 +269,7 @@ void FixCMAPKokkos::operator()(const int n) double phi,psi,phi1,psi1; double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[CMAPMAX]; double gs[4],d1gs[4],d2gs[4],d12gs[4]; - double engfraction; + // vectors needed for the gradient/force calculation double dphidr1x,dphidr1y,dphidr1z,dphidr2x,dphidr2y,dphidr2z; double dphidr3x,dphidr3y,dphidr3z,dphidr4x,dphidr4y,dphidr4z; @@ -317,10 +293,6 @@ void FixCMAPKokkos::operator()(const int n) i4 = d_crosstermlist(n,3); i5 = d_crosstermlist(n,4); type = d_crosstermlist(n,5); - - //std::cerr << fmt::format("******** n={} i=[{},{},{},{},{}], type={}\n", n,i1,i2,i3,i4,i5,type); - - if (type == 0) return; // calculate bond vectors for both dihedrals @@ -399,8 +371,6 @@ void FixCMAPKokkos::operator()(const int n) phi = FixCMAP::dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); psi = FixCMAP::dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); - std::cerr << fmt::format("******** n={} phi={}, psi={}\n", n, phi, psi); - if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; @@ -446,27 +416,21 @@ void FixCMAPKokkos::operator()(const int n) d12gs[2] = d_d12cmapgrid(t1,mli11,mli21); d12gs[3] = d_d12cmapgrid(t1,mli1,mli21); - std::cerr << fmt::format("******** n={} gs=[{},{},{},{}]\n", n, gs[0],gs[1],gs[2],gs[3]); - std::cerr << fmt::format("******** n={} d1gs=[{},{},{},{}]\n", n, d1gs[0],d1gs[1],d1gs[2],d1gs[3]); - std::cerr << fmt::format("******** n={} d2gs=[{},{},{},{}]\n", n, d2gs[0],d2gs[1],d2gs[2],d2gs[3]); - std::cerr << fmt::format("******** n={} d12gs=[{},{},{},{}]\n", n, d12gs[0],d12gs[1],d12gs[2],d12gs[3]); - // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) double E, dEdPhi, dEdPsi; bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); - std::cerr << fmt::format("******** n={} dEdPhi={}, dEdPsi={}\n", n, dEdPhi, dEdPsi); - // sum up cmap energy contributions - engfraction = 0.2 * E; +/* FIXME: needed for compute_scalar() + double engfraction = 0.2 * E; if (i1 < nlocal) ecmap += engfraction; if (i2 < nlocal) ecmap += engfraction; if (i3 < nlocal) ecmap += engfraction; if (i4 < nlocal) ecmap += engfraction; if (i5 < nlocal) ecmap += engfraction; - +*/ // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; @@ -524,8 +488,6 @@ void FixCMAPKokkos::operator()(const int n) f5[1] = -dEdPsi*dpsidr4y; f5[2] = -dEdPsi*dpsidr4z; - std::cerr << fmt::format("******** n={} f1=[{},{},{}]\n",n,f1[0],f1[1],f1[2]); - // apply force to each of the 5 atoms if (i1 < nlocal) { @@ -556,6 +518,7 @@ void FixCMAPKokkos::operator()(const int n) // tally energy and/or virial +/* if (evflag) { //std::cerr << "******** tally energy and/or virial\n"; nlist = 0; @@ -579,8 +542,7 @@ void FixCMAPKokkos::operator()(const int n) ev_tally(nlist,list,5.0,E,vcmap); //ev_tally(5,list,nlocal,newton_bond,E,vcmap); } - - //utils::logmesg(lmp, "post_force (n={})\n", n); +*/ } @@ -740,7 +702,7 @@ template KOKKOS_INLINE_FUNCTION void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int low2, double *gs, double *d1gs, double *d2gs, double *d12gs, - double &E, double &dEdPhi, double &dEdPsi ) + double &E, double &dEdPhi, double &dEdPsi ) const { // FUSE bc_coeff() and bc_interpol() inline functions for kokkos version @@ -807,9 +769,6 @@ void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; - - std::cerr << fmt::format("******** cij[{}]=[{},{},{},{}]\n", i,cij[i][0],cij[i][1],cij[i][2],cij[i][3]); - } dEdPhi *= (180.0/MY_PI/CMAPDX); diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 186a717fc6..42756116b2 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -41,8 +41,7 @@ class FixCMAPKokkos : public FixCMAP { void post_force(int) override; KOKKOS_INLINE_FUNCTION - //void operator()(const int) const; - void operator()(const int); + void operator()(const int) const; void grow_arrays(int) override; void copy_arrays(int, int, int) override; @@ -78,14 +77,10 @@ class FixCMAPKokkos : public FixCMAP { DAT::tdual_float_3d k_cmapgrid, k_d1cmapgrid, k_d2cmapgrid, k_d12cmapgrid; typename AT::t_float_3d d_cmapgrid, d_d1cmapgrid, d_d2cmapgrid, d_d12cmapgrid; - // calculate bicubic interpolation coefficient matrix c_ij - KOKKOS_INLINE_FUNCTION - void bc_coeff(double *, double *, double *, double *, double **); - // perform bicubic interpolation at point of interest KOKKOS_INLINE_FUNCTION void bc_interpol(double, double, int, int, double *, double *, double *, double *, - double &, double &, double &); + double &, double &, double &) const; // copied from Domain KOKKOS_INLINE_FUNCTION diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index b58e055e9b..25e5a0ce9a 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -350,9 +350,6 @@ void FixCMAP::post_force(int vflag) i5 = crosstermlist[n][4]; type = crosstermlist[n][5]; - - //std::cerr << fmt::format("******** n={} i=[{},{},{},{},{}], type={}\n",n,i1,i2,i3,i4,i5,type); - if (type == 0) continue; // calculate bond vectors for both dihedrals @@ -431,8 +428,6 @@ void FixCMAP::post_force(int vflag) phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); - std::cerr << fmt::format("******** n={} phi={}, psi={}\n", n, phi, psi); - if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; @@ -480,18 +475,10 @@ void FixCMAP::post_force(int vflag) d12gs[2] = d12cmapgrid[t1][mli11][mli21]; d12gs[3] = d12cmapgrid[t1][mli1][mli21]; - std::cerr << fmt::format("******** n={} gs=[{},{},{},{}]\n", n, gs[0],gs[1],gs[2],gs[3]); - std::cerr << fmt::format("******** n={} d1gs=[{},{},{},{}]\n", n, d1gs[0],d1gs[1],d1gs[2],d1gs[3]); - std::cerr << fmt::format("******** n={} d2gs=[{},{},{},{}]\n", n, d2gs[0],d2gs[1],d2gs[2],d2gs[3]); - std::cerr << fmt::format("******** n={} d12gs=[{},{},{},{}]\n", n, d12gs[0],d12gs[1],d12gs[2],d12gs[3]); - - // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs); - std::cerr << fmt::format("******** n={} dEdPhi={}, dEdPsi={}\n", n, dEdPhi, dEdPsi); - // sum up cmap energy contributions @@ -559,8 +546,6 @@ void FixCMAP::post_force(int vflag) f5[1] = -dEdPsi*dpsidr4y; f5[2] = -dEdPsi*dpsidr4z; - std::cerr << fmt::format("******** n={} f1=[{},{},{}]\n",n,f1[0],f1[1],f1[2]); - // apply force to each of the 5 atoms if (i1 < nlocal) { @@ -613,12 +598,8 @@ void FixCMAP::post_force(int vflag) ev_tally(nlist,list,5.0,E,vcmap); //ev_tally(5,list,nlocal,newton_bond,E,vcmap); } - - utils::logmesg(lmp, "post_force (n={})\n", n); - } - } /* ---------------------------------------------------------------------- */ @@ -838,7 +819,7 @@ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo, double FixCMAP::dihedral_angle_atan2(double fx, double fy, double fz, double ax, double ay, double az, double bx, double by, double bz, - double absg) + double absg) const { // calculate the dihedral angle @@ -931,7 +912,6 @@ void FixCMAP::bc_interpol(double x1, double x2, int low1, int low2, double *gs, E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; - std::cerr << fmt::format("******** cij[{}]=[{},{},{},{}]\n", i,cij[i][0],cij[i][1],cij[i][2],cij[i][3]); } dEdPhi *= (180.0/MY_PI/CMAPDX); diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h index 36299a4663..47824ff49a 100644 --- a/src/MOLECULE/fix_cmap.h +++ b/src/MOLECULE/fix_cmap.h @@ -89,7 +89,6 @@ class FixCMAP : public Fix { tagint **crossterm_atom4, **crossterm_atom5; double ecmap; - //double fcmap[4]; FIXME: remove ? unused variable double *g_axis; // CMAP grid points obtained from external file @@ -120,7 +119,7 @@ class FixCMAP : public Fix { // calculate dihedral angles double dihedral_angle_atan2(double, double, double, double, double, double, double, double, - double, double); + double, double) const; // calculate bicubic interpolation coefficient matrix c_ij From f6ea64958575a75d602cd0c7470024e616fafc6e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 5 Aug 2024 06:30:01 -0400 Subject: [PATCH 009/294] add fix wall/region/kk --- src/KOKKOS/fix_wall_region_kokkos.cpp | 370 ++++++++++++++++++++++++++ src/KOKKOS/fix_wall_region_kokkos.h | 111 ++++++++ src/fix_wall_region.cpp | 5 +- src/fix_wall_region.h | 4 +- 4 files changed, 487 insertions(+), 3 deletions(-) create mode 100644 src/KOKKOS/fix_wall_region_kokkos.cpp create mode 100644 src/KOKKOS/fix_wall_region_kokkos.h diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp new file mode 100644 index 0000000000..96bf968293 --- /dev/null +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -0,0 +1,370 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio@gmail.com) +------------------------------------------------------------------------- */ + +#include "fix_wall_region_kokkos.h" + +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "error.h" +#include "kokkos_base.h" +#include "math_special.h" +#include "memory_kokkos.h" +#include "region.h" + +using namespace LAMMPS_NS; +using MathSpecial::powint; + +enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; + +/* ---------------------------------------------------------------------- */ + +template +FixWallRegionKokkos::FixWallRegionKokkos(LAMMPS *lmp, int narg, char **arg) : + FixWallRegion(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | V_MASK | MASK_MASK; + datamask_modify = F_MASK; + + memoryKK->create_kokkos(k_ewall,ewall,4,"wall_region:ewall"); + d_ewall = k_ewall.template view(); +} + +template +FixWallRegionKokkos::~FixWallRegionKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_ewall,ewall); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixWallRegionKokkos::post_force(int vflag) +{ + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + // virial setup + + v_init(vflag); + + // reallocate per-atom arrays if necessary + + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"wall_region:vatom"); + d_vatom = k_vatom.template view(); + } + + d_x = atomKK->k_x.template view(); + d_f = atomKK->k_f.template view(); + if (style == COLLOID) d_radius = atomKK->k_radius.template view(); + d_mask = atomKK->k_mask.template view(); + int nlocal = atomKK->nlocal; + + region->prematch(); + DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); + KokkosBase* regionKKBase = dynamic_cast(region); + regionKKBase->match_all_kokkos(groupbit,k_match); + k_match.template sync(); + d_match = k_match.template view(); + + // virial setup + + v_init(vflag); + + // region->match() ensures particle is in region or on surface, else error + // if returned contact dist r = 0, is on surface, also an error + // in COLLOID case, r <= radius is an error + // initilize ewall after region->prematch(), + // so a dynamic region can access last timestep values + + // energy intialize. + // eflag is used to track whether wall energies have been communicated. + + eflag = 0; + d_ewall(0)=d_ewall(1)=d_ewall(2)=d_ewall(3)=0.0; + + double result[10]; + + copymode = 1; + FixWallRegionKokkosFunctor functor(this); + Kokkos::parallel_reduce(nlocal,functor,result); + copymode = 0; + + for( int i=0 ; i<4 ; i++ ) Kokkos::atomic_add(&(d_ewall[i]),result[i]); + + if (vflag_global) { + virial[0] += result[4]; + virial[1] += result[5]; + virial[2] += result[6]; + virial[3] += result[7]; + virial[4] += result[8]; + virial[5] += result[9]; + } + + k_ewall.template modify(); + k_ewall.template sync(); + + atomKK->modified(execution_space,F_MASK); + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + +} + + +/* ---------------------------------------------------------------------- + interaction of all particles in group with a wall + m = index of wall coeffs + which = xlo,xhi,ylo,yhi,zlo,zhi + error if any particle is on or behind wall +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixWallRegionKokkos::wall_particle(int i, value_type result) const { + if (d_mask(i) & groupbit) { + if (!d_match[i]) Kokkos::abort("Particle outside surface of region used in fix wall/region"); + + double rinv, tooclose; + + if (style == COLLOID) + tooclose = d_radius(i); + else + tooclose = 0.0; + + int n = region->surface(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); + + for ( int m = 0; m < n; m++) { + if (region->contact[m].r <= tooclose) + Kokkos::abort("Particle outside surface of region used in fix wall/region"); + else + rinv = 1.0 / region->contact[m].r; + + double fwallKK, engKK; + + if (style == LJ93) + engKK = lj93(region->contact[m].r,fwallKK); + else if (style == LJ126) + engKK = lj126(region->contact[m].r,fwallKK); + else if (style == LJ1043) + engKK = lj1043(region->contact[m].r,fwallKK); + else if (style == MORSE) + engKK = morse(region->contact[m].r,fwallKK); + else if (style == COLLOID) + engKK = colloid(region->contact[m].r,d_radius(i),fwallKK); + else + engKK = harmonic(region->contact[m].r,fwallKK); + + double delx = region->contact[m].delx; + double dely = region->contact[m].dely; + double delz = region->contact[m].delz; + double fx = fwall * delx * rinv; + double fy = fwall * dely * rinv; + double fz = fwall * delz * rinv; + d_f(i,0) += fx; + d_f(i,1) += fy; + d_f(i,2) += fz; + result[1] -= fx; + result[2] -= fy; + result[3] -= fz; + result[0] += eng; + if (evflag) { + double v[6] = { + fx * delx, + fy * dely, + fz * delz, + fx * dely, + fx * delz, + fy * delz + }; + v_tally(result,i,v); + } + } + } +} + +/* ---------------------------------------------------------------------- + LJ 9/3 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj93(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r10inv = r4inv * r4inv * r2inv; + fwallKK = coeff1 * r10inv - coeff2 * r4inv; + return coeff3 * r4inv * r4inv * rinv - coeff4 * r2inv * rinv - offset; +} + +/* ---------------------------------------------------------------------- + LJ 12/6 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj126(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r6inv = r2inv * r2inv * r2inv; + fwallKK = r6inv * (coeff1 * r6inv - coeff2) * rinv; + return r6inv * (coeff3 * r6inv - coeff4) - offset; +} + +/* ---------------------------------------------------------------------- + LJ 10/4/3 interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::lj1043(double r, double& fwallKK) const +{ + double rinv = 1.0 / r; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r10inv = r4inv * r4inv * r2inv; + fwallKK = coeff5 * r10inv * rinv - coeff6 * r4inv * rinv - coeff7 * powint(r + coeff4, -4); + return coeff1 * r10inv - coeff2 * r4inv - coeff3 * powint(r + coeff4, -3) - offset; +} + +/* ---------------------------------------------------------------------- + Morse interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::morse(double r, double& fwallKK) const +{ + double dr = r - sigma; + double dexp = exp(-alpha * dr); + fwallKK = coeff1 * (dexp * dexp - dexp); + return epsilon * (dexp * dexp - 2.0 * dexp) - offset; +} + +/* ---------------------------------------------------------------------- + colloid interaction for finite-size particle of rad with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::colloid(double r, double rad, double& fwallKK) const +{ + double new_coeff2 = coeff2 * rad * rad * rad; + double diam = 2.0 * rad; + + double rad2 = rad * rad; + double rad4 = rad2 * rad2; + double rad8 = rad4 * rad4; + double delta2 = rad2 - r * r; + double rinv = 1.0 / delta2; + double r2inv = rinv * rinv; + double r4inv = r2inv * r2inv; + double r8inv = r4inv * r4inv; + fwallKK = coeff1 * + (rad8 * rad + 27.0 * rad4 * rad2 * rad * r * r + 63.0 * rad4 * rad * powint(r, 4) + + 21.0 * rad2 * rad * powint(r, 6)) * + r8inv - + new_coeff2 * r2inv; + + double r2 = 0.5 * diam - r; + double rinv2 = 1.0 / r2; + double r2inv2 = rinv2 * rinv2; + double r4inv2 = r2inv2 * r2inv2; + double r3 = r + 0.5 * diam; + double rinv3 = 1.0 / r3; + double r2inv3 = rinv3 * rinv3; + double r4inv3 = r2inv3 * r2inv3; + return coeff3 * + ((-3.5 * diam + r) * r4inv2 * r2inv2 * rinv2 + + (3.5 * diam + r) * r4inv3 * r2inv3 * rinv3) - + coeff4 * ((-diam * r + r2 * r3 * (log(-r2) - log(r3))) * (-rinv2) * rinv3) - offset; +} + +/* ---------------------------------------------------------------------- + harmonic interaction for particle with wall + compute eng and fwall = magnitude of wall force +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixWallRegionKokkos::harmonic(double r, double& fwallKK) const +{ + double dr = cutoff - r; + fwallKK = 2.0 * epsilon * dr; + return epsilon * dr * dr; +} + +/* ---------------------------------------------------------------------- + tally virial into global and per-atom accumulators + i = local index of atom + v = total virial for the interaction + increment global virial by v + increment per-atom virial by v + this method can be used when fix computes forces in post_force() + and the force depends on a distance to some external object + e.g. fix wall/lj93: compute virial only on owned atoms +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixWallRegionKokkos::v_tally(value_type result, int i, double *v) const +{ + + if (vflag_global) { + result[4] += v[0]; + result[5] += v[1]; + result[6] += v[2]; + result[7] += v[3]; + result[8] += v[4]; + result[9] += v[5]; + } + + if (vflag_atom) { + Kokkos::atomic_add(&(d_vatom(i,0)),v[0]); + Kokkos::atomic_add(&(d_vatom(i,1)),v[1]); + Kokkos::atomic_add(&(d_vatom(i,2)),v[2]); + Kokkos::atomic_add(&(d_vatom(i,3)),v[3]); + Kokkos::atomic_add(&(d_vatom(i,4)),v[4]); + Kokkos::atomic_add(&(d_vatom(i,5)),v[5]); + } + +} + +namespace LAMMPS_NS { +template class FixWallRegionKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixWallRegionKokkos; +#endif +} diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h new file mode 100644 index 0000000000..63bf8db7e2 --- /dev/null +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -0,0 +1,111 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(wall/region/kk,FixWallRegionKokkos); +FixStyle(wall/region/kk/device,FixWallRegionKokkos); +FixStyle(wall/region/kk/host,FixWallRegionKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_WALL_REGION_KOKKOS_H +#define LMP_FIX_WALL_REGION_KOKKOS_H + +#include "fix_wall_region.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + + +template +class FixWallRegionKokkos : public FixWallRegion { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef double value_type[]; + + FixWallRegionKokkos(class LAMMPS *, int, char **); + ~FixWallRegionKokkos() override; + void post_force(int) override; + + KOKKOS_INLINE_FUNCTION + void wall_particle(int, value_type) const; + + private: + + typename AT::t_x_array d_x; + typename AT::t_f_array d_f; + typename AT::t_float_1d d_radius; + typename AT::t_int_1d d_mask; + + typename AT::t_int_1d d_match; + + DAT::tdual_virial_array k_vatom; + typename AT::t_virial_array d_vatom; + + typename AT::tdual_ffloat_1d k_ewall; + typename AT::t_ffloat_1d d_ewall; + + KOKKOS_INLINE_FUNCTION + double lj93(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double lj126(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double lj1043(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double morse(double, double&) const; + + KOKKOS_INLINE_FUNCTION + double colloid(double, double, double&) const; + + KOKKOS_INLINE_FUNCTION + double harmonic(double, double&) const; + + KOKKOS_INLINE_FUNCTION + void v_tally(value_type, int, double*) const; + +}; + + +template +struct FixWallRegionKokkosFunctor { + typedef DeviceType device_type; + typedef double value_type[]; + const int value_count; + FixWallRegionKokkos c; + + FixWallRegionKokkosFunctor(FixWallRegionKokkos* c_ptr): + value_count(10), c(*c_ptr) {} + + KOKKOS_INLINE_FUNCTION + void init(value_type result) const { + for (int i=0 ; i<10 ; i++ ) result[i] = 0.0; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type result) const { + c.wall_particle(i,result); + } + +}; + +} + +#endif +#endif + diff --git a/src/fix_wall_region.cpp b/src/fix_wall_region.cpp index 3d817f34f4..7dac494160 100644 --- a/src/fix_wall_region.cpp +++ b/src/fix_wall_region.cpp @@ -50,7 +50,8 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) : virial_global_flag = virial_peratom_flag = 1; respa_level_support = 1; ilevel_respa = 0; - + ewall = new double[4]; + // parse args region = domain->get_region_by_id(arg[3]); @@ -100,7 +101,9 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) : FixWallRegion::~FixWallRegion() { + if (copymode) return; delete[] idregion; + delete[] ewall; } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_wall_region.h b/src/fix_wall_region.h index 77b82d012c..c95cc65fa9 100644 --- a/src/fix_wall_region.h +++ b/src/fix_wall_region.h @@ -38,12 +38,12 @@ class FixWallRegion : public Fix { double compute_scalar() override; double compute_vector(int) override; - private: + protected: int style; double epsilon, sigma, cutoff; double alpha; int eflag; - double ewall[4], ewall_all[4]; + double *ewall, ewall_all[4]; // need ewall double*, not double[] for kokkos dual view int ilevel_respa; char *idregion; class Region *region; From 1166531594361acad9f93b6592e45505abdbec5a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 6 Aug 2024 01:24:21 -0400 Subject: [PATCH 010/294] add 4 units tests for fix wall/region/kk --- ...=> fix-timestep-wall_region_harmonic.yaml} | 0 .../fix-timestep-wall_region_lj1043.yaml | 84 +++++++++++++++++++ .../tests/fix-timestep-wall_region_lj126.yaml | 84 +++++++++++++++++++ .../tests/fix-timestep-wall_region_lj93.yaml | 84 +++++++++++++++++++ .../tests/fix-timestep-wall_region_morse.yaml | 84 +++++++++++++++++++ 5 files changed, 336 insertions(+) rename unittest/force-styles/tests/{fix-timestep-wall_region_harmonic_const.yaml => fix-timestep-wall_region_harmonic.yaml} (100%) create mode 100644 unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml create mode 100644 unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml create mode 100644 unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml create mode 100644 unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml similarity index 100% rename from unittest/force-styles/tests/fix-timestep-wall_region_harmonic_const.yaml rename to unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml new file mode 100644 index 0000000000..1664609b7a --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml @@ -0,0 +1,84 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Mon Aug 5 06:12:03 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj1043 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml new file mode 100644 index 0000000000..275fc6f721 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml @@ -0,0 +1,84 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Mon Aug 5 06:12:32 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj126 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml new file mode 100644 index 0000000000..d68a2f0e75 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml @@ -0,0 +1,84 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Mon Aug 5 05:42:49 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box lj93 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml new file mode 100644 index 0000000000..2798fba1ba --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml @@ -0,0 +1,84 @@ +--- +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Mon Aug 5 06:13:11 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region box block EDGE EDGE EDGE EDGE EDGE EDGE + fix test solute wall/region box morse 1.0 1.0 1.5 3.0 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... From ce4e01fb788ba5ed21f1f6fb8520b7d27add58c1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 7 Aug 2024 00:12:11 -0400 Subject: [PATCH 011/294] author information only in cpp, kokkos version needs protected instead of private variables --- src/KOKKOS/fix_bond_react_kokkos.cpp | 4576 +++++++++++++++++++ src/KOKKOS/fix_bond_react_kokkos.h | 238 + src/KOKKOS/fix_cmap_kokkos.cpp | 35 +- src/KOKKOS/superpose3d_kokkos.h | 439 ++ src/MOLECULE/fix_cmap.cpp | 31 +- src/MOLECULE/fix_cmap.h | 11 +- src/REACTION/fix_bond_react.h | 6 +- unittest/force-styles/test_fix_timestep.cpp | 184 +- 8 files changed, 5491 insertions(+), 29 deletions(-) create mode 100644 src/KOKKOS/fix_bond_react_kokkos.cpp create mode 100644 src/KOKKOS/fix_bond_react_kokkos.h create mode 100644 src/KOKKOS/superpose3d_kokkos.h diff --git a/src/KOKKOS/fix_bond_react_kokkos.cpp b/src/KOKKOS/fix_bond_react_kokkos.cpp new file mode 100644 index 0000000000..6bc287bc5f --- /dev/null +++ b/src/KOKKOS/fix_bond_react_kokkos.cpp @@ -0,0 +1,4576 @@ +/* ---------------------------------------------------------------------- +LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator +https://www.lammps.org/, Sandia National Laboratories +LAMMPS development team: developers@lammps.org + +Copyright (2003) Sandia Corporation. Under the terms of Contract +DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains +certain rights in this software. This software is distributed under +the GNU General Public License. + +See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- +Contributing Author: Jacob Gissinger (jgissing@stevens.edu) +KOKKOS version (2024/08): Mitch Murphy (alphataubio@gmail.com) +------------------------------------------------------------------------- */ + +#include "fix_bond_react.h" + +#include "atom.h" +#include "atom_vec.h" +#include "citeme.h" +#include "comm.h" +#include "compute.h" +#include "domain.h" +#include "error.h" +#include "fix_bond_history.h" +#include "force.h" +#include "group.h" +#include "input.h" +#include "math_const.h" +#include "math_extra.h" +#include "memory.h" +#include "modify.h" +#include "molecule.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "random_mars.h" +#include "reset_atoms_mol.h" +#include "respa.h" +#include "update.h" +#include "variable.h" + +#include "superpose3d.h" + +#include +#include +#include + +#include +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; +using namespace MathConst; + +static const char cite_fix_bond_react[] = + "fix bond/react: reacter.org doi:10.1016/j.polymer.2017.09.038, " + "doi:10.1021/acs.macromol.0c02012\n\n" + "@Article{Gissinger17,\n" + " author = {J. R. Gissinger and B. D. Jensen and K. E. Wise},\n" + " title = {Modeling Chemical Reactions in Classical Molecular Dynamics Simulations},\n" + " journal = {Polymer},\n" + " year = 2017,\n" + " volume = 128,\n" + " pages = {211--217}\n" + "}\n\n" + "@Article{Gissinger20,\n" + " author = {J. R. Gissinger, B. D. Jensen, K. E. Wise},\n" + " title = {{REACTER}: A Heuristic Method for Reactive Molecular Dynamics},\n" + " journal = {Macromolecules},\n" + " year = 2020,\n" + " volume = 53,\n" + " number = 22,\n" + " pages = {9953--9961}\n" + "}\n\n"; + +static constexpr double BIG = 1.0e20; +static constexpr int DELTA = 16; +static constexpr int MAXGUESS = 20; // max # of guesses allowed by superimpose algorithm +static constexpr int MAXCONARGS = 14; // max # of arguments for any type of constraint + rxnID +static constexpr int NUMVARVALS = 5; // max # of keyword values that have variables as input + +// various statuses of superimpose algorithm: +// ACCEPT: site successfully matched to pre-reacted template +// REJECT: site does not match pre-reacted template +// PROCEED: normal execution (non-guessing mode) +// CONTINUE: a neighbor has been assigned, skip to next neighbor +// GUESSFAIL: a guess has failed (if no more restore points, status = 'REJECT') +// RESTORE: restore mode, load most recent restore point +enum { ACCEPT, REJECT, PROCEED, CONTINUE, GUESSFAIL, RESTORE }; + +// types of available reaction constraints +enum { DISTANCE, ANGLE, DIHEDRAL, ARRHENIUS, RMSD, CUSTOM }; + +// ID type used by constraint +enum { ATOM, FRAG }; + +// keyword values that accept variables as input +enum { NEVERY, RMIN, RMAX, PROB, NRATE }; + +// flag for one-proc vs shared reaction sites +enum { LOCAL, GLOBAL }; + +// values for molecule_keyword +enum { OFF, INTER, INTRA }; + +/* ---------------------------------------------------------------------- */ +// clang-format off + +FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg) +{ + if (lmp->citeme) lmp->citeme->add(cite_fix_bond_react); + + fix1 = nullptr; + fix2 = nullptr; + fix3 = nullptr; + reset_mol_ids = nullptr; + + if (narg < 8) utils::missing_cmd_args(FLERR,"fix bond/react", error); + + newton_bond = force->newton_bond; + + restart_global = 1; + attempted_rxn = 0; + force_reneighbor = 1; + next_reneighbor = -1; + vector_flag = 1; + global_freq = 1; + extvector = 0; + rxnID = 0; + cuff = 1; + maxnconstraints = 0; + narrhenius = 0; + status = PROCEED; + + // reaction functions used by 'custom' constraint + nrxnfunction = 3; + rxnfunclist.resize(nrxnfunction); + peratomflag.resize(nrxnfunction); + rxnfunclist[0] = "rxnsum"; + peratomflag[0] = 1; + rxnfunclist[1] = "rxnave"; + peratomflag[1] = 1; + rxnfunclist[2] = "rxnbond"; + peratomflag[2] = 0; + nvvec = 0; + ncustomvars = 0; + vvec = nullptr; + + nxspecial = nullptr; + onemol_nxspecial = nullptr; + twomol_nxspecial = nullptr; + xspecial = nullptr; + onemol_xspecial = nullptr; + twomol_xspecial = nullptr; + + // these group names are reserved for use exclusively by bond/react + master_group = (char *) "bond_react_MASTER_group"; + + // by using fixed group names, only one instance of fix bond/react is allowed. + if (modify->get_fix_by_style("^bond/react").size() != 0) + error->all(FLERR,"Only one instance of fix bond/react allowed at a time"); + + // let's find number of reactions specified + nreacts = 0; + for (int i = 3; i < narg; i++) { + if (strcmp(arg[i],"react") == 0) { + nreacts++; + i = i + 6; // skip past mandatory arguments + if (i > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'react' has too few arguments"); + } + } + + if (nreacts == 0) error->all(FLERR,"Illegal fix bond/react command: " + "missing mandatory 'react' argument"); + + size_vector = nreacts; + + int iarg = 3; + stabilization_flag = 0; + reset_mol_ids_flag = 1; + int num_common_keywords = 2; + for (int m = 0; m < num_common_keywords; m++) { + if (strcmp(arg[iarg],"stabilization") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'stabilization' keyword has too few arguments"); + stabilization_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); + if (stabilization_flag) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix bond/react command:" + "'stabilization' keyword has too few arguments"); + exclude_group = utils::strdup(arg[iarg+2]); + nve_limit_xmax = arg[iarg+3]; + iarg += 4; + } else iarg += 2; + } else if (strcmp(arg[iarg],"reset_mol_ids") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'reset_mol_ids' keyword has too few arguments"); + reset_mol_ids_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"react") == 0) { + break; + } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword"); + } + + if (reset_mol_ids_flag) { + delete reset_mol_ids; + reset_mol_ids = new ResetAtomsMol(lmp); + reset_mol_ids->create_computes(id,group->names[igroup]); + } + + // set up common variables as vectors of length 'nreacts' + // nevery, cutoff, onemol, twomol, superimpose file + + // this looks excessive + // the price of vectorization (all reactions in one command)? + memory->create(rxn_name,nreacts,MAXNAME,"bond/react:rxn_name"); + memory->create(nevery,nreacts,"bond/react:nevery"); + memory->create(cutsq,nreacts,2,"bond/react:cutsq"); + memory->create(unreacted_mol,nreacts,"bond/react:unreacted_mol"); + memory->create(reacted_mol,nreacts,"bond/react:reacted_mol"); + memory->create(fraction,nreacts,"bond/react:fraction"); + memory->create(max_rxn,nreacts,"bond/react:max_rxn"); + memory->create(nlocalskips,nreacts,"bond/react:nlocalskips"); + memory->create(nghostlyskips,nreacts,"bond/react:nghostlyskips"); + memory->create(seed,nreacts,"bond/react:seed"); + memory->create(limit_duration,nreacts,"bond/react:limit_duration"); + memory->create(rate_limit,3,nreacts,"bond/react:rate_limit"); + memory->create(stabilize_steps_flag,nreacts,"bond/react:stabilize_steps_flag"); + memory->create(custom_charges_fragid,nreacts,"bond/react:custom_charges_fragid"); + memory->create(rescale_charges_flag,nreacts,"bond/react:rescale_charges_flag"); + memory->create(create_atoms_flag,nreacts,"bond/react:create_atoms_flag"); + memory->create(modify_create_fragid,nreacts,"bond/react:modify_create_fragid"); + memory->create(overlapsq,nreacts,"bond/react:overlapsq"); + memory->create(molecule_keyword,nreacts,"bond/react:molecule_keyword"); + memory->create(nconstraints,nreacts,"bond/react:nconstraints"); + memory->create(constraintstr,nreacts,MAXLINE,"bond/react:constraintstr"); + memory->create(var_flag,NUMVARVALS,nreacts,"bond/react:var_flag"); + memory->create(var_id,NUMVARVALS,nreacts,"bond/react:var_id"); + memory->create(iatomtype,nreacts,"bond/react:iatomtype"); + memory->create(jatomtype,nreacts,"bond/react:jatomtype"); + memory->create(ibonding,nreacts,"bond/react:ibonding"); + memory->create(jbonding,nreacts,"bond/react:jbonding"); + memory->create(closeneigh,nreacts,"bond/react:closeneigh"); + memory->create(groupbits,nreacts,"bond/react:groupbits"); + memory->create(reaction_count,nreacts,"bond/react:reaction_count"); + memory->create(local_rxn_count,nreacts,"bond/react:local_rxn_count"); + memory->create(ghostly_rxn_count,nreacts,"bond/react:ghostly_rxn_count"); + memory->create(reaction_count_total,nreacts,"bond/react:reaction_count_total"); + + rescale_charges_anyflag = 0; + for (int i = 0; i < nreacts; i++) { + fraction[i] = 1.0; + seed[i] = 12345; + max_rxn[i] = INT_MAX; + for (int j = 0; j < 3; j++) + rate_limit[j][i] = 0; + stabilize_steps_flag[i] = 0; + custom_charges_fragid[i] = -1; + rescale_charges_flag[i] = 0; + create_atoms_flag[i] = 0; + modify_create_fragid[i] = -1; + overlapsq[i] = 0.0; + molecule_keyword[i] = OFF; + nconstraints[i] = 0; + // set default limit duration to 60 timesteps + limit_duration[i] = 60; + reaction_count[i] = 0; + local_rxn_count[i] = 0; + ghostly_rxn_count[i] = 0; + reaction_count_total[i] = 0; + for (int j = 0; j < NUMVARVALS; j++) { + var_flag[j][i] = 0; + var_id[j][i] = 0; + } + } + + char **files; + files = new char*[nreacts]; + + for (int rxn = 0; rxn < nreacts; rxn++) { + + if (strcmp(arg[iarg],"react") != 0) error->all(FLERR,"Illegal fix bond/react command: " + "'react' or 'stabilization' has incorrect arguments"); + + iarg++; + + int n = strlen(arg[iarg]) + 1; + if (n > MAXNAME) error->all(FLERR,"Reaction name (react-ID) is too long (limit: 256 characters)"); + strcpy(rxn_name[rxn],arg[iarg++]); + + int groupid = group->find(arg[iarg++]); + if (groupid == -1) error->all(FLERR,"Could not find fix group ID"); + groupbits[rxn] = group->bitmask[groupid]; + + if (strncmp(arg[iarg],"v_",2) == 0) read_variable_keyword(&arg[iarg][2],NEVERY,rxn); + else { + nevery[rxn] = utils::inumeric(FLERR,arg[iarg],false,lmp); + if (nevery[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: " + "'Nevery' must be a positive integer"); + } + iarg++; + + double cutoff; + if (strncmp(arg[iarg],"v_",2) == 0) { + read_variable_keyword(&arg[iarg][2],RMIN,rxn); + cutoff = input->variable->compute_equal(var_id[RMIN][rxn]); + } else cutoff = utils::numeric(FLERR,arg[iarg],false,lmp); + if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command: " + "'Rmin' cannot be negative"); + cutsq[rxn][0] = cutoff*cutoff; + iarg++; + + if (strncmp(arg[iarg],"v_",2) == 0) { + read_variable_keyword(&arg[iarg][2],RMAX,rxn); + cutoff = input->variable->compute_equal(var_id[RMAX][rxn]); + } else cutoff = utils::numeric(FLERR,arg[iarg],false,lmp); + if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command:" + "'Rmax' cannot be negative"); + cutsq[rxn][1] = cutoff*cutoff; + iarg++; + + unreacted_mol[rxn] = atom->find_molecule(arg[iarg++]); + if (unreacted_mol[rxn] == -1) error->all(FLERR,"Unreacted molecule template ID for " + "fix bond/react does not exist"); + reacted_mol[rxn] = atom->find_molecule(arg[iarg++]); + if (reacted_mol[rxn] == -1) error->all(FLERR,"Reacted molecule template ID for " + "fix bond/react does not exist"); + + //read map file + files[rxn] = utils::strdup(arg[iarg]); + iarg++; + + while (iarg < narg && strcmp(arg[iarg],"react") != 0) { + if (strcmp(arg[iarg],"prob") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'prob' keyword has too few arguments"); + // check if probability is a variable + if (strncmp(arg[iarg+1],"v_",2) == 0) { + read_variable_keyword(&arg[iarg+1][2],PROB,rxn); + fraction[rxn] = input->variable->compute_equal(var_id[PROB][rxn]); + } else { + // otherwise probability should be a number + fraction[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } + seed[rxn] = utils::inumeric(FLERR,arg[iarg+2],false,lmp); + if (fraction[rxn] < 0.0 || fraction[rxn] > 1.0) + error->all(FLERR,"Illegal fix bond/react command: " + "probability fraction must between 0 and 1, inclusive"); + if (seed[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: " + "probability seed must be positive"); + iarg += 3; + } else if (strcmp(arg[iarg],"max_rxn") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'max_rxn' has too few arguments"); + max_rxn[rxn] = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (max_rxn[rxn] < 0) error->all(FLERR,"Illegal fix bond/react command: " + "'max_rxn' cannot be negative"); + iarg += 2; + } else if (strcmp(arg[iarg],"rate_limit") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'rate_limit' has too few arguments"); + rate_limit[0][rxn] = 1; // serves as flag for rate_limit keyword + if (strncmp(arg[iarg+1],"v_",2) == 0) read_variable_keyword(&arg[iarg+1][2],NRATE,rxn); + else rate_limit[1][rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + rate_limit[2][rxn] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + iarg += 3; + } else if (strcmp(arg[iarg],"stabilize_steps") == 0) { + if (stabilization_flag == 0) error->all(FLERR,"Stabilize_steps keyword " + "used without stabilization keyword"); + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'stabilize_steps' has too few arguments"); + limit_duration[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + stabilize_steps_flag[rxn] = 1; + iarg += 2; + } else if (strcmp(arg[iarg],"custom_charges") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'custom_charges' has too few arguments"); + if (strcmp(arg[iarg+1],"no") == 0) custom_charges_fragid[rxn] = -1; //default + else { + custom_charges_fragid[rxn] = atom->molecules[unreacted_mol[rxn]]->findfragment(arg[iarg+1]); + if (custom_charges_fragid[rxn] < 0) error->one(FLERR,"Fix bond/react: Molecule fragment for " + "'custom_charges' keyword does not exist"); + } + iarg += 2; + } else if (strcmp(arg[iarg],"rescale_charges") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'rescale_charges' has too few arguments"); + if (strcmp(arg[iarg+1],"no") == 0) rescale_charges_flag[rxn] = 0; //default + else if (strcmp(arg[iarg+1],"yes") == 0) { + if (!atom->q_flag) error->all(FLERR,"Illegal fix bond/react command: cannot use " + "'rescale_charges' without atomic charges enabled"); + twomol = atom->molecules[reacted_mol[rxn]]; + if (!twomol->qflag) error->all(FLERR,"Illegal fix bond/react command: cannot use " + "'rescale_charges' without Charges section in post-reaction template"); + rescale_charges_flag[rxn] = 1; // overloaded below to also indicate number of atoms to update + rescale_charges_anyflag = 1; + cuff = 2; // index shift for extra values carried around by mega_gloves + } else error->one(FLERR,"Bond/react: Illegal option for 'rescale_charges' keyword"); + iarg += 2; + } else if (strcmp(arg[iarg],"molecule") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'molecule' has too few arguments"); + if (strcmp(arg[iarg+1],"off") == 0) molecule_keyword[rxn] = OFF; //default + else if (strcmp(arg[iarg+1],"inter") == 0) molecule_keyword[rxn] = INTER; + else if (strcmp(arg[iarg+1],"intra") == 0) molecule_keyword[rxn] = INTRA; + else error->one(FLERR,"Fix bond/react: Illegal option for 'molecule' keyword"); + iarg += 2; + } else if (strcmp(arg[iarg],"modify_create") == 0) { + if (iarg++ > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'modify_create' has too few arguments"); + while (iarg < narg && strcmp(arg[iarg],"react") != 0) { + if (strcmp(arg[iarg],"fit") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'modify_create' has too few arguments"); + if (strcmp(arg[iarg+1],"all") == 0) modify_create_fragid[rxn] = -1; //default + else { + modify_create_fragid[rxn] = atom->molecules[reacted_mol[rxn]]->findfragment(arg[iarg+1]); + if (modify_create_fragid[rxn] < 0) error->one(FLERR,"Fix bond/react: Molecule fragment for " + "'modify_create' keyword does not exist"); + } + iarg += 2; + } else if (strcmp(arg[iarg],"overlap") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " + "'modify_create' has too few arguments"); + overlapsq[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + overlapsq[rxn] *= overlapsq[rxn]; + iarg += 2; + } else break; + } + } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword"); + } + } + + max_natoms = 0; // the number of atoms in largest molecule template + max_rate_limit_steps = 0; + for (int myrxn = 0; myrxn < nreacts; myrxn++) { + twomol = atom->molecules[reacted_mol[myrxn]]; + max_natoms = MAX(max_natoms,twomol->natoms); + max_rate_limit_steps = MAX(max_rate_limit_steps,rate_limit[2][myrxn]); + } + + memory->create(equivalences,max_natoms,2,nreacts,"bond/react:equivalences"); + memory->create(reverse_equiv,max_natoms,2,nreacts,"bond/react:reverse_equiv"); + memory->create(edge,max_natoms,nreacts,"bond/react:edge"); + memory->create(landlocked_atoms,max_natoms,nreacts,"bond/react:landlocked_atoms"); + memory->create(store_rxn_count,max_rate_limit_steps,nreacts,"bond/react:store_rxn_count"); + memory->create(custom_charges,max_natoms,nreacts,"bond/react:custom_charges"); + memory->create(delete_atoms,max_natoms,nreacts,"bond/react:delete_atoms"); + memory->create(create_atoms,max_natoms,nreacts,"bond/react:create_atoms"); + memory->create(chiral_atoms,max_natoms,6,nreacts,"bond/react:chiral_atoms"); + memory->create(mol_total_charge,nreacts,"bond/react:mol_total_charge"); + + for (int j = 0; j < nreacts; j++) { + mol_total_charge[j] = 0.0; + for (int i = 0; i < max_natoms; i++) { + edge[i][j] = 0; + custom_charges[i][j] = 1; // update all partial charges by default + delete_atoms[i][j] = 0; + create_atoms[i][j] = 0; + for (int k = 0; k < 6; k++) { + chiral_atoms[i][k][j] = 0; + } + // default equivalences to their own mol index + // all but created atoms will be updated + for (int m = 0; m < 2; m++) { + equivalences[i][m][j] = i+1; + } + } + for (int i = 0; i < max_rate_limit_steps; i++) { + store_rxn_count[i][j] = -1; + } + } + + // read all map files afterward + for (int i = 0; i < nreacts; i++) { + open(files[i]); + onemol = atom->molecules[unreacted_mol[i]]; + twomol = atom->molecules[reacted_mol[i]]; + onemol->check_attributes(); + twomol->check_attributes(); + get_molxspecials(); + read_map_file(i); + fclose(fp); + if (ncreate == 0 && onemol->natoms != twomol->natoms) + error->all(FLERR,"Fix bond/react: Reaction templates must contain the same number of atoms"); + else if (ncreate > 0 && onemol->natoms + ncreate != twomol->natoms) + error->all(FLERR,"Fix bond/react: Incorrect number of created atoms"); + iatomtype[i] = onemol->type[ibonding[i]-1]; + jatomtype[i] = onemol->type[jbonding[i]-1]; + find_landlocked_atoms(i); + if (custom_charges_fragid[i] >= 0) CustomCharges(custom_charges_fragid[i],i); + } + + // charge rescaling values must be calculated after calling CustomCharges + for (int myrxn = 0; myrxn < nreacts; myrxn++) { + if (rescale_charges_flag[myrxn]) { + rescale_charges_flag[myrxn] = 0; // will now store number of updated atoms + twomol = atom->molecules[reacted_mol[myrxn]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][myrxn]-1; + if (custom_charges[jj][myrxn] == 1 && delete_atoms[jj][myrxn] == 0) { + mol_total_charge[myrxn] += twomol->q[j]; + rescale_charges_flag[myrxn]++; + } + } + } + } + + // get the names of per-atom variables needed by 'rxn' functions of custom constraint + customvarnames(); + + // initialize Marsaglia RNG with processor-unique seed (Arrhenius prob) + + rrhandom = new RanMars*[narrhenius]; + int tmp = 0; + for (int i = 0; i < nreacts; i++) { + for (int j = 0; j < nconstraints[i]; j++) { + if (constraints[j][i].type == ARRHENIUS) { + rrhandom[tmp++] = new RanMars(lmp,(int) constraints[j][i].par[4] + comm->me); + } + } + } + + for (int i = 0; i < nreacts; i++) { + delete [] files[i]; + } + delete [] files; + + if (atom->molecular != Atom::MOLECULAR) + error->all(FLERR,"Fix bond/react: Cannot use fix bond/react with non-molecular systems"); + + // check if bonding atoms are 1-2, 1-3, or 1-4 bonded neighbors + // if so, we don't need non-bonded neighbor list + for (int myrxn = 0; myrxn < nreacts; myrxn++) { + closeneigh[myrxn] = -1; // indicates will search non-bonded neighbors + onemol = atom->molecules[unreacted_mol[myrxn]]; + get_molxspecials(); + for (int k = 0; k < onemol_nxspecial[ibonding[myrxn]-1][2]; k++) { + if (onemol_xspecial[ibonding[myrxn]-1][k] == jbonding[myrxn]) { + closeneigh[myrxn] = 2; // index for 1-4 neighbor + if (k < onemol_nxspecial[ibonding[myrxn]-1][1]) + closeneigh[myrxn] = 1; // index for 1-3 neighbor + if (k < onemol_nxspecial[ibonding[myrxn]-1][0]) + closeneigh[myrxn] = 0; // index for 1-2 neighbor + break; + } + } + } + + // initialize Marsaglia RNG with processor-unique seed ('prob' keyword) + + random = new RanMars*[nreacts]; + for (int i = 0; i < nreacts; i++) { + random[i] = new RanMars(lmp,seed[i] + comm->me); + } + + // set comm sizes needed by this fix + // forward is big due to comm of broken bonds and 1-2 neighbors + + comm_forward = MAX(2,2+atom->maxspecial); + comm_reverse = 2; + + // allocate arrays local to this fix + nmax = 0; + partner = finalpartner = nullptr; + distsq = nullptr; + maxattempt = 0; + attempt = nullptr; + nattempt = nullptr; + allnattempt = 0; + my_num_mega = 0; + local_num_mega = 0; + ghostly_num_mega = 0; + restore = nullptr; + + // zero out stats + global_megasize = 0; + avail_guesses = 0; + glove_counter = 0; + guess_branch = new int[MAXGUESS](); + pioneer_count = new int[max_natoms]; + my_mega_glove = nullptr; + local_mega_glove = nullptr; + ghostly_mega_glove = nullptr; + global_mega_glove = nullptr; + + // these are merely loop indices that became important + pion = neigh = trace = 0; + + id_fix1 = nullptr; + id_fix2 = nullptr; + id_fix3 = nullptr; + statted_id = nullptr; + custom_exclude_flag = 0; + + // used to store restart info + set = new Set[nreacts]; + memset(set,0,nreacts*sizeof(Set)); +} + +/* ---------------------------------------------------------------------- */ + +FixBondReact::~FixBondReact() +{ + for (int i = 0; i < narrhenius; i++) { + delete rrhandom[i]; + } + delete [] rrhandom; + + for (int i = 0; i < nreacts; i++) { + delete random[i]; + } + delete [] random; + + delete reset_mol_ids; + + memory->destroy(partner); + memory->destroy(finalpartner); + memory->destroy(nattempt); + memory->destroy(distsq); + memory->destroy(attempt); + memory->destroy(edge); + memory->destroy(equivalences); + memory->destroy(reverse_equiv); + memory->destroy(landlocked_atoms); + memory->destroy(store_rxn_count); + memory->destroy(custom_charges); + memory->destroy(delete_atoms); + memory->destroy(create_atoms); + memory->destroy(chiral_atoms); + memory->destroy(mol_total_charge); + if (vvec != nullptr) memory->destroy(vvec); + + memory->destroy(rxn_name); + memory->destroy(nevery); + memory->destroy(cutsq); + memory->destroy(unreacted_mol); + memory->destroy(reacted_mol); + memory->destroy(fraction); + memory->destroy(seed); + memory->destroy(max_rxn); + memory->destroy(nlocalskips); + memory->destroy(nghostlyskips); + memory->destroy(limit_duration); + memory->destroy(var_flag); + memory->destroy(var_id); + memory->destroy(rate_limit); + memory->destroy(stabilize_steps_flag); + memory->destroy(custom_charges_fragid); + memory->destroy(rescale_charges_flag); + memory->destroy(molecule_keyword); + memory->destroy(nconstraints); + memory->destroy(constraintstr); + memory->destroy(create_atoms_flag); + memory->destroy(modify_create_fragid); + memory->destroy(overlapsq); + + memory->destroy(iatomtype); + memory->destroy(jatomtype); + memory->destroy(ibonding); + memory->destroy(jbonding); + memory->destroy(closeneigh); + memory->destroy(groupbits); + memory->destroy(reaction_count); + memory->destroy(local_rxn_count); + memory->destroy(ghostly_rxn_count); + memory->destroy(reaction_count_total); + + if (attempted_rxn == 1) { + memory->destroy(restore_pt); + memory->destroy(restore); + memory->destroy(glove); + memory->destroy(pioneers); + memory->destroy(my_mega_glove); + memory->destroy(local_mega_glove); + memory->destroy(ghostly_mega_glove); + } + + memory->destroy(global_mega_glove); + + if (stabilization_flag == 1) { + // delete fixes if not already deleted + if (id_fix1 && modify->get_fix_by_id(id_fix1)) modify->delete_fix(id_fix1); + delete[] id_fix1; + + if (id_fix3 && modify->get_fix_by_id(id_fix3)) modify->delete_fix(id_fix3); + delete[] id_fix3; + } + + if (id_fix2 && modify->get_fix_by_id(id_fix2)) modify->delete_fix(id_fix2); + delete[] id_fix2; + + delete[] statted_id; + delete[] guess_branch; + delete[] pioneer_count; + delete[] set; + + if (group) { + group->assign(std::string(master_group) + " delete"); + if (stabilization_flag == 1) { + group->assign(std::string(exclude_group) + " delete"); + delete[] exclude_group; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixBondReact::setmask() +{ + int mask = 0; + mask |= POST_INTEGRATE; + mask |= POST_INTEGRATE_RESPA; + return mask; +} + +/* ---------------------------------------------------------------------- +let's add an internal nve/limit fix for relaxation of reaction sites +also let's add our per-atom property fix here! +this per-atom property will state the timestep an atom was 'limited' +it will have the name 'i_limit_tags' and will be intitialized to 0 (not in group) +------------------------------------------------------------------------- */ + +void FixBondReact::post_constructor() +{ + // let's add the limit_tags per-atom property fix + id_fix2 = utils::strdup("bond_react_props_internal"); + if (!modify->get_fix_by_id(id_fix2)) + fix2 = modify->add_fix(std::string(id_fix2) + + " all property/atom i_limit_tags i_react_tags ghost yes"); + + // create master_group if not already existing + // NOTE: limit_tags and react_tags automaticaly intitialized to zero (unless read from restart) + group->find_or_create(master_group); + std::string cmd = fmt::format("{} dynamic all property limit_tags",master_group); + group->assign(cmd); + + if (stabilization_flag == 1) { + int groupid = group->find(exclude_group); + // create exclude_group if not already existing, or use as parent group if static + if (groupid == -1 || group->dynamic[groupid] == 0) { + + // create stabilization per-atom property + id_fix3 = utils::strdup("bond_react_stabilization_internal"); + if (!modify->get_fix_by_id(id_fix3)) + fix3 = modify->add_fix(std::string(id_fix3) + + " all property/atom i_statted_tags ghost yes"); + + statted_id = utils::strdup("statted_tags"); + + // if static group exists, use as parent group + // also, rename dynamic exclude_group by appending '_REACT' + char *exclude_PARENT_group; + exclude_PARENT_group = utils::strdup(exclude_group); + delete[] exclude_group; + exclude_group = utils::strdup(std::string(exclude_PARENT_group)+"_REACT"); + + group->find_or_create(exclude_group); + if (groupid == -1) + cmd = fmt::format("{} dynamic all property statted_tags",exclude_group); + else + cmd = fmt::format("{} dynamic {} property statted_tags",exclude_group,exclude_PARENT_group); + group->assign(cmd); + delete[] exclude_PARENT_group; + + // on to statted_tags (system-wide thermostat) + // initialize per-atom statted_flags to 1 + // (only if not already initialized by restart) + if (fix3->restart_reset != 1) { + int flag,cols; + int index = atom->find_custom("statted_tags",flag,cols); + int *i_statted_tags = atom->ivector[index]; + + for (int i = 0; i < atom->nlocal; i++) + i_statted_tags[i] = 1; + } + } else { + // sleeping code, for future capabilities + custom_exclude_flag = 1; + // first we have to find correct fix group reference + Fix *fix = modify->get_fix_by_id(std::string("GROUP_")+exclude_group); + + // this returns names of corresponding property + int unused; + char *idprop; + idprop = (char *) fix->extract("property",unused); + if (idprop == nullptr) + error->all(FLERR,"Exclude group must be a per-atom property group"); + statted_id = utils::strdup(idprop); + + // initialize per-atom statted_tags to 1 + // need to correct for smooth restarts + //int flag,cols; + //int index = atom->find_custom(statted_id,flag,cols); + //int *i_statted_tags = atom->ivector[index]; + //for (int i = 0; i < atom->nlocal; i++) + // i_statted_tags[i] = 1; + } + + // let's create a new nve/limit fix to limit newly reacted atoms + id_fix1 = utils::strdup("bond_react_MASTER_nve_limit"); + if (!modify->get_fix_by_id(id_fix1)) + fix1 = modify->add_fix(fmt::format("{} {} nve/limit {}", + id_fix1,master_group,nve_limit_xmax)); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::init() +{ + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; + + // check cutoff for iatomtype,jatomtype + if (!utils::strmatch(force->pair_style,"^hybrid")) + for (int i = 0; i < nreacts; i++) + if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])) + error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff"); + + // need a half neighbor list, built every Nevery steps + neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); + + lastcheck = -1; +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- + Identify all pairs of potentially reactive atoms for this time step. + This function is modified from LAMMPS’ fix bond/create. +---------------------------------------------------------------------- */ + +void FixBondReact::post_integrate() +{ + // update store_rxn_count on every step + for (int myrxn = 0; myrxn < nreacts; myrxn++) { + if (rate_limit[0][myrxn] == 1) { + for (int i = rate_limit[2][myrxn]-1; i > 0; i--) { + store_rxn_count[i][myrxn] = store_rxn_count[i-1][myrxn]; + } + store_rxn_count[0][myrxn] = reaction_count_total[myrxn]; + } + } + + // check if any reactions could occur on this timestep + int nevery_check = 1; + for (int i = 0; i < nreacts; i++) { + if (var_flag[NEVERY][i]) + nevery[i] = ceil(input->variable->compute_equal(var_id[NEVERY][i])); + if (nevery[i] <= 0) + error->all(FLERR,"Illegal fix bond/react command: " + "'Nevery' must be a positive integer"); + if (!(update->ntimestep % nevery[i])) { + nevery_check = 0; + break; + } + } + + for (int i = 0; i < nreacts; i++) { + reaction_count[i] = 0; + local_rxn_count[i] = 0; + ghostly_rxn_count[i] = 0; + nlocalskips[i] = 0; + nghostlyskips[i] = 0; + // update reaction probability + if (var_flag[PROB][i]) + fraction[i] = input->variable->compute_equal(var_id[PROB][i]); + } + + if (nevery_check) { + unlimit_bond(); + return; + } + + // acquire updated ghost atom positions + // necessary b/c are calling this after integrate, but before Verlet comm + + comm->forward_comm(); + + // resize bond partner list and initialize it + // needs to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(partner); + memory->destroy(finalpartner); + memory->destroy(distsq); + memory->destroy(nattempt); + nmax = atom->nmax; + memory->create(partner,nmax,"bond/react:partner"); + memory->create(finalpartner,nmax,"bond/react:finalpartner"); + memory->create(distsq,nmax,2,"bond/react:distsq"); + memory->create(nattempt,nreacts,"bond/react:nattempt"); + } + + // reset 'attempt' counts + for (int i = 0; i < nreacts; i++) { + nattempt[i] = 0; + } + // reset per-bond compute map flag + atoms2bondflag = 0; + + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // loop over neighbors of my atoms + // each atom sets one closest eligible partner atom ID to bond with + + tagint *tag = atom->tag; + int *type = atom->type; + + neighbor->build_one(list); + + // here we define a full special list + // may need correction for unusual special bond settings + nxspecial = atom->nspecial; + xspecial = atom->special; + + int j; + for (rxnID = 0; rxnID < nreacts; rxnID++) { + int rate_limit_flag = 1; + if (rate_limit[0][rxnID] == 1) { + int myrxn_count = store_rxn_count[rate_limit[2][rxnID]-1][rxnID]; + if (myrxn_count == -1) rate_limit_flag = 0; + else { + int nrxns_delta = reaction_count_total[rxnID] - myrxn_count; + int my_nrate; + if (var_flag[NRATE][rxnID] == 1) { + my_nrate = input->variable->compute_equal(var_id[NRATE][rxnID]); + } else my_nrate = rate_limit[1][rxnID]; + if (nrxns_delta >= my_nrate) rate_limit_flag = 0; + } + } + if ((update->ntimestep % nevery[rxnID]) || + (max_rxn[rxnID] <= reaction_count_total[rxnID]) || + (rate_limit_flag == 0)) continue; + for (int ii = 0; ii < nall; ii++) { + partner[ii] = 0; + finalpartner[ii] = 0; + distsq[ii][0] = 0.0; + distsq[ii][1] = BIG; + } + + // fork between far and close_partner here + if (closeneigh[rxnID] < 0) { + far_partner(); + // reverse comm of distsq and partner + // not needed if newton_pair off since I,J pair was seen by both procs + commflag = 2; + if (force->newton_pair) comm->reverse_comm(this); + } else { + close_partner(); + commflag = 2; + comm->reverse_comm(this); + } + + // each atom now knows its winning partner + // forward comm of partner, so ghosts have it + + commflag = 2; + comm->forward_comm(this,1); + + // consider for reaction: + // only if both atoms list each other as winning bond partner + // if other atom is owned by another proc, it should do same thing + + int temp_nattempt = 0; + for (int i = 0; i < nlocal; i++) { + if (partner[i] == 0) { + continue; + } + + j = atom->map(partner[i]); + if (partner[j] != tag[i]) { + continue; + } + + // store final bond partners and count the rxn possibility once + + finalpartner[i] = tag[j]; + finalpartner[j] = tag[i]; + + if (tag[i] < tag[j]) temp_nattempt++; + } + + // cycle loop if no even eligible bonding atoms were found (on any proc) + int some_chance; + MPI_Allreduce(&temp_nattempt,&some_chance,1,MPI_INT,MPI_SUM,world); + if (!some_chance) continue; + + // communicate final partner + + commflag = 3; + comm->forward_comm(this); + + // add instance to 'attempt' only if this processor + // owns the atoms with smaller global ID + // NOTE: we no longer care about ghost-ghost instances as bond/create did + // this is because we take care of updating topology later (and differently) + for (int i = 0; i < nlocal; i++) { + + if (finalpartner[i] == 0) continue; + + j = atom->map(finalpartner[i]); + if (tag[i] < tag[j]) { + if (nattempt[rxnID] > maxattempt-2) { + maxattempt += DELTA; + // third dim of 'attempt': bond/react integer ID + memory->grow(attempt,maxattempt,2,nreacts,"bond/react:attempt"); + } + // to ensure types remain in same order + if (iatomtype[rxnID] == type[i]) { + attempt[nattempt[rxnID]][0][rxnID] = tag[i]; + attempt[nattempt[rxnID]][1][rxnID] = finalpartner[i]; + nattempt[rxnID]++; + // add another attempt if initiator atoms are same type + if (iatomtype[rxnID] == jatomtype[rxnID]) { + attempt[nattempt[rxnID]][0][rxnID] = finalpartner[i]; + attempt[nattempt[rxnID]][1][rxnID] = tag[i]; + nattempt[rxnID]++; + } + } else { + attempt[nattempt[rxnID]][0][rxnID] = finalpartner[i]; + attempt[nattempt[rxnID]][1][rxnID] = tag[i]; + nattempt[rxnID]++; + } + } + } + } + + // break loop if no even eligible bonding atoms were found (on any proc) + int some_chance; + + allnattempt = 0; + for (int i = 0; i < nreacts; i++) + allnattempt += nattempt[i]; + + MPI_Allreduce(&allnattempt,&some_chance,1,MPI_INT,MPI_SUM,world); + if (!some_chance) { + unlimit_bond(); + return; + } + + // evaluate custom constraint variable values here and forward_comm + get_customvars(); + commflag = 1; + comm->forward_comm(this,ncustomvars); + + // run through the superimpose algorithm + // this checks if simulation topology matches unreacted mol template + superimpose_algorithm(); + // free atoms that have been limited after reacting + unlimit_bond(); +} + +/* ---------------------------------------------------------------------- + Search non-bonded neighbor lists if bonding atoms are not in special list +------------------------------------------------------------------------- */ + +void FixBondReact::far_partner() +{ + int inum,jnum,itype,jtype,possible; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *ilist,*jlist,*numneigh,**firstneigh; + + // loop over neighbors of my atoms + // each atom sets one closest eligible partner atom ID to bond with + + double **x = atom->x; + tagint *tag = atom->tag; + int *mask = atom->mask; + int *type = atom->type; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // per-atom property indicating if in bond/react master group + int flag,cols; + int index1 = atom->find_custom("limit_tags",flag,cols); + int *i_limit_tags = atom->ivector[index1]; + + int i,j; + + for (int ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if (!(mask[i] & groupbits[rxnID])) continue; + if (i_limit_tags[i] != 0) continue; + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + if (!(mask[j] & groupbits[rxnID])) { + continue; + } + + if (i_limit_tags[j] != 0) { + continue; + } + + if (molecule_keyword[rxnID] == INTER) { + if (atom->molecule[i] == atom->molecule[j]) continue; + } else if (molecule_keyword[rxnID] == INTRA) { + if (atom->molecule[i] != atom->molecule[j]) continue; + } + + jtype = type[j]; + possible = 0; + if (itype == iatomtype[rxnID] && jtype == jatomtype[rxnID]) { + possible = 1; + } else if (itype == jatomtype[rxnID] && jtype == iatomtype[rxnID]) { + possible = 1; + } + + if (possible == 0) continue; + + // do not allow bonding atoms within special list + for (int k = 0; k < nxspecial[i][2]; k++) + if (xspecial[i][k] == tag[j]) possible = 0; + if (!possible) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + domain->minimum_image(delx,dely,delz); // ghost location fix + rsq = delx*delx + dely*dely + delz*delz; + + if (var_flag[RMIN][rxnID]) { + double cutoff = input->variable->compute_equal(var_id[RMIN][rxnID]); + cutsq[rxnID][0] = cutoff*cutoff; + } + if (var_flag[RMAX][rxnID]) { + double cutoff = input->variable->compute_equal(var_id[RMAX][rxnID]); + cutsq[rxnID][1] = cutoff*cutoff; + } + if (rsq >= cutsq[rxnID][1] || rsq <= cutsq[rxnID][0]) { + continue; + } + if (rsq < distsq[i][1]) { + partner[i] = tag[j]; + distsq[i][1] = rsq; + } + if (rsq < distsq[j][1]) { + partner[j] = tag[i]; + distsq[j][1] = rsq; + } + } + } +} + +/* ---------------------------------------------------------------------- + Slightly simpler to find bonding partner when a close neighbor +------------------------------------------------------------------------- */ + +void FixBondReact::close_partner() +{ + int n,i1,i2,itype,jtype; + double delx,dely,delz,rsq; + + double **x = atom->x; + tagint *tag = atom->tag; + int *type = atom->type; + int *mask = atom->mask; + + // per-atom property indicating if in bond/react master group + int flag,cols; + int index1 = atom->find_custom("limit_tags",flag,cols); + int *i_limit_tags = atom->ivector[index1]; + + // loop over special list + for (int ii = 0; ii < atom->nlocal; ii++) { + itype = type[ii]; + n = 0; + if (closeneigh[rxnID] != 0) + n = nxspecial[ii][closeneigh[rxnID]-1]; + for (; n < nxspecial[ii][closeneigh[rxnID]]; n++) { + i1 = ii; + i2 = atom->map(xspecial[ii][n]); + jtype = type[i2]; + if (!(mask[i1] & groupbits[rxnID])) continue; + if (!(mask[i2] & groupbits[rxnID])) continue; + if (i_limit_tags[i1] != 0) continue; + if (i_limit_tags[i2] != 0) continue; + if (itype != iatomtype[rxnID] || jtype != jatomtype[rxnID]) continue; + + if (molecule_keyword[rxnID] == INTER) { + if (atom->molecule[i1] == atom->molecule[i2]) continue; + } else if (molecule_keyword[rxnID] == INTRA) { + if (atom->molecule[i1] != atom->molecule[i2]) continue; + } + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); // ghost location fix + rsq = delx*delx + dely*dely + delz*delz; + + if (var_flag[RMIN][rxnID]) { + double cutoff = input->variable->compute_equal(var_id[RMIN][rxnID]); + cutsq[rxnID][0] = cutoff*cutoff; + } + if (var_flag[RMAX][rxnID]) { + double cutoff = input->variable->compute_equal(var_id[RMAX][rxnID]); + cutsq[rxnID][1] = cutoff*cutoff; + } + if (rsq >= cutsq[rxnID][1] || rsq <= cutsq[rxnID][0]) continue; + + if (closeneigh[rxnID] == 0) { + if (rsq > distsq[i1][0]) { + partner[i1] = tag[i2]; + distsq[i1][0] = rsq; + } + if (rsq > distsq[i2][0]) { + partner[i2] = tag[i1]; + distsq[i2][0] = rsq; + } + } else { + if (rsq < distsq[i1][1]) { + partner[i1] = tag[i2]; + distsq[i1][1] = rsq; + } + if (rsq < distsq[i2][1]) { + partner[i2] = tag[i1]; + distsq[i2][1] = rsq; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + Set up global variables. Loop through all pairs; loop through Pioneers + until Superimpose Algorithm is completed for each pair. +------------------------------------------------------------------------- */ + +void FixBondReact::superimpose_algorithm() +{ + const int nprocs = comm->nprocs; + my_num_mega = 0; + local_num_mega = 0; + ghostly_num_mega = 0; + + // indicates local ghosts of other procs + int tmp; + localsendlist = (int *) comm->extract("localsendlist",tmp); + + // quick description of important global indices you'll see floating about: + // 'pion' is the pioneer loop index + // 'neigh' in the first neighbor index + // 'trace' retraces the first nieghbors + // trace: once you choose a first neighbor, you then check for other nieghbors of same type + + if (attempted_rxn == 1) { + memory->destroy(restore_pt); + memory->destroy(restore); + memory->destroy(glove); + memory->destroy(pioneers); + memory->destroy(my_mega_glove); + memory->destroy(local_mega_glove); + memory->destroy(ghostly_mega_glove); + } + + memory->create(glove,max_natoms,2,"bond/react:glove"); + memory->create(restore_pt,MAXGUESS,4,"bond/react:restore_pt"); + memory->create(pioneers,max_natoms,"bond/react:pioneers"); + memory->create(restore,max_natoms,MAXGUESS*4,"bond/react:restore"); + memory->create(my_mega_glove,max_natoms+cuff,allnattempt,"bond/react:my_mega_glove"); + + for (int i = 0; i < max_natoms+cuff; i++) + for (int j = 0; j < allnattempt; j++) + my_mega_glove[i][j] = 0.0; + + attempted_rxn = 1; + + // let's finally begin the superimpose loop + for (rxnID = 0; rxnID < nreacts; rxnID++) { + for (lcl_inst = 0; lcl_inst < nattempt[rxnID]; lcl_inst++) { + + onemol = atom->molecules[unreacted_mol[rxnID]]; + twomol = atom->molecules[reacted_mol[rxnID]]; + get_molxspecials(); + + status = PROCEED; + + glove_counter = 0; + for (int i = 0; i < max_natoms; i++) { + for (int j = 0; j < 2; j++) { + glove[i][j] = 0; + } + } + + for (int i = 0; i < MAXGUESS; i++) { + guess_branch[i] = 0; + } + + int myibonding = ibonding[rxnID]; + int myjbonding = jbonding[rxnID]; + + glove[myibonding-1][0] = myibonding; + glove[myibonding-1][1] = attempt[lcl_inst][0][rxnID]; + glove_counter++; + glove[myjbonding-1][0] = myjbonding; + glove[myjbonding-1][1] = attempt[lcl_inst][1][rxnID]; + glove_counter++; + + // special case, only two atoms in reaction templates + // then: bonding onemol_nxspecials guaranteed to be equal, and either 0 or 1 + if (glove_counter == onemol->natoms) { + tagint local_atom1 = atom->map(glove[myibonding-1][1]); + tagint local_atom2 = atom->map(glove[myjbonding-1][1]); + if ( (nxspecial[local_atom1][0] == onemol_nxspecial[myibonding-1][0] && + nxspecial[local_atom2][0] == nxspecial[local_atom1][0]) && + (nxspecial[local_atom1][0] == 0 || + xspecial[local_atom1][0] == atom->tag[local_atom2]) && + check_constraints()) { + if (fraction[rxnID] < 1.0 && + random[rxnID]->uniform() >= fraction[rxnID]) { + status = REJECT; + } else { + status = ACCEPT; + my_mega_glove[0][my_num_mega] = (double) rxnID; + if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); + for (int i = 0; i < onemol->natoms; i++) { + my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; + } + my_num_mega++; + } + } else status = REJECT; + } + + avail_guesses = 0; + + for (int i = 0; i < max_natoms; i++) + pioneer_count[i] = 0; + + for (int i = 0; i < onemol_nxspecial[myibonding-1][0]; i++) + pioneer_count[onemol_xspecial[myibonding-1][i]-1]++; + + for (int i = 0; i < onemol_nxspecial[myjbonding-1][0]; i++) + pioneer_count[onemol_xspecial[myjbonding-1][i]-1]++; + + + int hang_catch = 0; + while (status != ACCEPT && status != REJECT) { + + for (int i = 0; i < max_natoms; i++) { + pioneers[i] = 0; + } + + for (int i = 0; i < onemol->natoms; i++) { + if (glove[i][0] != 0 && pioneer_count[i] < onemol_nxspecial[i][0] && edge[i][rxnID] == 0) { + pioneers[i] = 1; + } + } + + // run through the pioneers + // due to use of restore points, 'pion' index can change in loop + for (pion = 0; pion < onemol->natoms; pion++) { + if (pioneers[pion] || status == GUESSFAIL) { + make_a_guess(); + if (status == ACCEPT || status == REJECT) break; + } + } + + // reaction site found successfully! + if (status == ACCEPT) { + if (fraction[rxnID] < 1.0 && + random[rxnID]->uniform() >= fraction[rxnID]) status = REJECT; + else { + my_mega_glove[0][my_num_mega] = (double) rxnID; + if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); + for (int i = 0; i < onemol->natoms; i++) { + my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; + } + my_num_mega++; + } + } + hang_catch++; + // let's go ahead and catch the simplest of hangs + //if (hang_catch > onemol->natoms*4) + if (hang_catch > atom->nlocal*30) { + error->one(FLERR,"Fix bond/react: Excessive iteration of superimpose algorithm. " + "Please check that all pre-reaction template atoms are linked to an initiator atom, " + "via at least one path that does not involve edge atoms."); + } + } + } + } + + global_megasize = 0; + + memory->create(local_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:local_mega_glove"); + memory->create(ghostly_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:ghostly_mega_glove"); + + for (int i = 0; i < max_natoms+cuff; i++) { + for (int j = 0; j < my_num_mega; j++) { + local_mega_glove[i][j] = 0.0; + ghostly_mega_glove[i][j] = 0.0; + } + } + + dedup_mega_gloves(LOCAL); // make sure atoms aren't added to more than one reaction + glove_ghostcheck(); // split into 'local' and 'global' + ghost_glovecast(); // consolidate all mega_gloves to all processors + + MPI_Allreduce(&local_rxn_count[0],&reaction_count[0],nreacts,MPI_INT,MPI_SUM,world); + + int rxnflag = 0; + if (comm->me == 0) + for (int i = 0; i < nreacts; i++) { + reaction_count_total[i] += reaction_count[i] + ghostly_rxn_count[i]; + rxnflag += reaction_count[i] + ghostly_rxn_count[i]; + } + + MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world); + MPI_Bcast(&rxnflag, 1, MPI_INT, 0, world); + + if (!rxnflag) return; + + // C++11 and later compatible version of Park pRNG + std::random_device rnd; + std::minstd_rand park_rng(rnd()); + + // check if we overstepped our reaction limit, via either max_rxn or rate_limit + for (int i = 0; i < nreacts; i++) { + int overstep = 0; + int max_rxn_overstep = reaction_count_total[i] - max_rxn[i]; + overstep = MAX(overstep,max_rxn_overstep); + if (rate_limit[0][i] == 1) { + int myrxn_count = store_rxn_count[rate_limit[2][i]-1][i]; + if (myrxn_count != -1) { + int nrxn_delta = reaction_count_total[i] - myrxn_count; + int my_nrate; + if (var_flag[NRATE][i] == 1) { + my_nrate = input->variable->compute_equal(var_id[NRATE][i]); + } else my_nrate = rate_limit[1][i]; + int rate_limit_overstep = nrxn_delta - my_nrate; + overstep = MAX(overstep,rate_limit_overstep); + } + } + + if (overstep > 0) { + // let's randomly choose rxns to skip, unbiasedly from local and ghostly + int *local_rxncounts; + int *all_localskips; + memory->create(local_rxncounts,nprocs,"bond/react:local_rxncounts"); + memory->create(all_localskips,nprocs,"bond/react:all_localskips"); + MPI_Gather(&local_rxn_count[i],1,MPI_INT,local_rxncounts,1,MPI_INT,0,world); + if (comm->me == 0) { + int delta_rxn = reaction_count[i] + ghostly_rxn_count[i]; + // when using variable input for rate_limit, rate_limit_overstep could be > delta_rxn (below) + // we need to limit overstep to the number of reactions on this timestep + // essentially skipping all reactions, would be more efficient to use a skip_all flag + if (overstep > delta_rxn) overstep = delta_rxn; + int *rxn_by_proc; + memory->create(rxn_by_proc,delta_rxn,"bond/react:rxn_by_proc"); + for (int j = 0; j < delta_rxn; j++) + rxn_by_proc[j] = -1; // corresponds to ghostly + int itemp = 0; + for (int j = 0; j < nprocs; j++) + for (int k = 0; k < local_rxncounts[j]; k++) + rxn_by_proc[itemp++] = j; + std::shuffle(&rxn_by_proc[0],&rxn_by_proc[delta_rxn], park_rng); + for (int j = 0; j < nprocs; j++) + all_localskips[j] = 0; + nghostlyskips[i] = 0; + for (int j = 0; j < overstep; j++) { + if (rxn_by_proc[j] == -1) nghostlyskips[i]++; + else all_localskips[rxn_by_proc[j]]++; + } + memory->destroy(rxn_by_proc); + reaction_count_total[i] -= overstep; + } + MPI_Scatter(&all_localskips[0],1,MPI_INT,&nlocalskips[i],1,MPI_INT,0,world); + MPI_Bcast(&nghostlyskips[i],1,MPI_INT,0,world); + memory->destroy(local_rxncounts); + memory->destroy(all_localskips); + } + } + MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world); + + // this updates topology next step + next_reneighbor = update->ntimestep; + + update_everything(); // change topology +} + +/* ---------------------------------------------------------------------- + Screen for obvious algorithm fails. This is the return point when a guess + has failed: check for available restore points. +------------------------------------------------------------------------- */ + +void FixBondReact::make_a_guess() +{ + int *type = atom->type; + int nfirst_neighs = onemol_nxspecial[pion][0]; + + // per-atom property indicating if in bond/react master group + int flag,cols; + int index1 = atom->find_custom("limit_tags",flag,cols); + int *i_limit_tags = atom->ivector[index1]; + + if (status == GUESSFAIL && avail_guesses == 0) { + status = REJECT; + return; + } + + if (status == GUESSFAIL && avail_guesses > 0) { + // load restore point + for (int i = 0; i < onemol->natoms; i++) { + glove[i][0] = restore[i][(avail_guesses*4)-4]; + glove[i][1] = restore[i][(avail_guesses*4)-3]; + pioneer_count[i] = restore[i][(avail_guesses*4)-2]; + pioneers[i] = restore[i][(avail_guesses*4)-1]; + } + pion = restore_pt[avail_guesses-1][0]; + neigh = restore_pt[avail_guesses-1][1]; + trace = restore_pt[avail_guesses-1][2]; + glove_counter = restore_pt[avail_guesses-1][3]; + status = RESTORE; + neighbor_loop(); + if (status != PROCEED) return; + } + + nfirst_neighs = onemol_nxspecial[pion][0]; + + // check if any of first neighbors are in bond_react_MASTER_group + // if so, this constitutes a fail + // because still undergoing a previous reaction! + // could technically fail unnecessarily during a wrong guess if near edge atoms + // we accept this temporary and infrequent decrease in reaction occurrences + + for (int i = 0; i < nxspecial[atom->map(glove[pion][1])][0]; i++) { + if (atom->map(xspecial[atom->map(glove[pion][1])][i]) < 0) { + error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); // parallel issues. + } + if (i_limit_tags[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] != 0) { + status = GUESSFAIL; + return; + } + } + + // check for same number of neighbors between unreacted mol and simulation + if (nfirst_neighs != nxspecial[atom->map(glove[pion][1])][0]) { + status = GUESSFAIL; + return; + } + + // make sure all neighbors aren't already assigned + // an issue discovered for coarse-grained example + int assigned_count = 0; + for (int i = 0; i < nfirst_neighs; i++) + for (int j = 0; j < onemol->natoms; j++) + if (xspecial[atom->map(glove[pion][1])][i] == glove[j][1]) { + assigned_count++; + break; + } + + if (assigned_count == nfirst_neighs) status = GUESSFAIL; + + // check if all neigh atom types are the same between simulation and unreacted mol + int *mol_ntypes = new int[atom->ntypes]; + int *lcl_ntypes = new int[atom->ntypes]; + + for (int i = 0; i < atom->ntypes; i++) { + mol_ntypes[i] = 0; + lcl_ntypes[i] = 0; + } + + for (int i = 0; i < nfirst_neighs; i++) { + mol_ntypes[(int)onemol->type[(int)onemol_xspecial[pion][i]-1]-1]++; + lcl_ntypes[(int)type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])]-1]++; //added -1 + } + + for (int i = 0; i < atom->ntypes; i++) { + if (mol_ntypes[i] != lcl_ntypes[i]) { + status = GUESSFAIL; + delete [] mol_ntypes; + delete [] lcl_ntypes; + return; + } + } + + delete [] mol_ntypes; + delete [] lcl_ntypes; + + // okay everything seems to be in order. let's assign some ID pairs!!! + neighbor_loop(); +} + +/* ---------------------------------------------------------------------- + Loop through all First Bonded Neighbors of the current Pioneer. + Prepare appropriately if we are in Restore Mode. +------------------------------------------------------------------------- */ + +void FixBondReact::neighbor_loop() +{ + int nfirst_neighs = onemol_nxspecial[pion][0]; + + if (status == RESTORE) { + check_a_neighbor(); + return; + } + + for (neigh = 0; neigh < nfirst_neighs; neigh++) { + if (glove[(int)onemol_xspecial[pion][neigh]-1][0] == 0) { + check_a_neighbor(); + } + } + // status should still = PROCEED +} + +/* ---------------------------------------------------------------------- + Check if we can assign this First Neighbor to pre-reacted template + without guessing. If so, do it! If not, call crosscheck_the_nieghbor(). +------------------------------------------------------------------------- */ + +void FixBondReact::check_a_neighbor() +{ + int *type = atom->type; + int nfirst_neighs = onemol_nxspecial[pion][0]; + + if (status != RESTORE) { + // special consideration for hydrogen atoms (and all first neighbors bonded to no other atoms) (and aren't edge atoms) + if (onemol_nxspecial[(int)onemol_xspecial[pion][neigh]-1][0] == 1 && edge[(int)onemol_xspecial[pion][neigh]-1][rxnID] == 0) { + + for (int i = 0; i < nfirst_neighs; i++) { + + if (type[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1] && + nxspecial[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])][0] == 1) { + + int already_assigned = 0; + for (int j = 0; j < onemol->natoms; j++) { + if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) { + already_assigned = 1; + break; + } + } + + if (already_assigned == 0) { + glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; + glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i]; + + //another check for ghost atoms. perhaps remove the one in make_a_guess + if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { + error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); + } + + for (int j = 0; j < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; j++) { + pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][j]-1]++; + } + + glove_counter++; + if (glove_counter == onemol->natoms) { + if (ring_check() && check_constraints()) status = ACCEPT; + else status = GUESSFAIL; + return; + } + // status should still == PROCEED + return; + } + } + } + // we are here if no matching atom found + status = GUESSFAIL; + return; + } + } + + crosscheck_the_neighbor(); + if (status != PROCEED) { + if (status == CONTINUE) + status = PROCEED; + return; + } + + // finally ready to match non-duplicate, non-edge atom IDs!! + + for (int i = 0; i < nfirst_neighs; i++) { + + if (type[atom->map((int)xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) { + int already_assigned = 0; + + //check if a first neighbor of the pioneer is already assigned to pre-reacted template + for (int j = 0; j < onemol->natoms; j++) { + if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) { + already_assigned = 1; + break; + } + } + + if (already_assigned == 0) { + glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; + glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i]; + + //another check for ghost atoms. perhaps remove the one in make_a_guess + if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { + error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); + } + + for (int ii = 0; ii < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; ii++) { + pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][ii]-1]++; + } + + glove_counter++; + if (glove_counter == onemol->natoms) { + if (ring_check() && check_constraints()) status = ACCEPT; + else status = GUESSFAIL; + return; + // will never complete here when there are edge atoms + // ...actually that could be wrong if people get creative...shouldn't affect anything + } + // status should still = PROCEED + return; + } + } + } + // status is still 'PROCEED' if we are here! +} + +/* ---------------------------------------------------------------------- + Check if there a viable guess to be made. If so, prepare to make a + guess by recording a restore point. +------------------------------------------------------------------------- */ + +void FixBondReact::crosscheck_the_neighbor() +{ + int nfirst_neighs = onemol_nxspecial[pion][0]; + + if (status == RESTORE) { + inner_crosscheck_loop(); + return; + } + + for (trace = 0; trace < nfirst_neighs; trace++) { + if (neigh!=trace && onemol->type[(int)onemol_xspecial[pion][neigh]-1] == onemol->type[(int)onemol_xspecial[pion][trace]-1] && + glove[onemol_xspecial[pion][trace]-1][0] == 0) { + + if (avail_guesses == MAXGUESS) { + error->warning(FLERR,"Fix bond/react: Fix bond/react failed because MAXGUESS set too small. ask developer for info"); + status = GUESSFAIL; + return; + } + avail_guesses++; + for (int i = 0; i < onemol->natoms; i++) { + restore[i][(avail_guesses*4)-4] = glove[i][0]; + restore[i][(avail_guesses*4)-3] = glove[i][1]; + restore[i][(avail_guesses*4)-2] = pioneer_count[i]; + restore[i][(avail_guesses*4)-1] = pioneers[i]; + restore_pt[avail_guesses-1][0] = pion; + restore_pt[avail_guesses-1][1] = neigh; + restore_pt[avail_guesses-1][2] = trace; + restore_pt[avail_guesses-1][3] = glove_counter; + } + + inner_crosscheck_loop(); + return; + } + } + // status is still 'PROCEED' if we are here! +} + +/* ---------------------------------------------------------------------- + We are ready to make a guess. If there are multiple possible choices + for this guess, keep track of these. +------------------------------------------------------------------------- */ + +void FixBondReact::inner_crosscheck_loop() +{ + int *type = atom->type; + // arbitrarily limited to 5 identical first neighbors + tagint tag_choices[5]; + int nfirst_neighs = onemol_nxspecial[pion][0]; + + int num_choices = 0; + for (int i = 0; i < nfirst_neighs; i++) { + if (type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) { + if (num_choices == 5) { // here failed because too many identical first neighbors. but really no limit if situation arises + status = GUESSFAIL; + return; + } + tag_choices[num_choices++] = xspecial[atom->map(glove[pion][1])][i]; + } + } + + // guess branch is for when multiple identical neighbors. then we guess each one in turn + // guess_branch must work even when avail_guesses = 0. so index accordingly! + // ...actually, avail_guesses should never be zero here anyway + if (guess_branch[avail_guesses-1] == 0) guess_branch[avail_guesses-1] = num_choices; + + for (int i=1; i < num_choices; ++i) { + tagint hold = tag_choices[i]; + int j = i - 1; + while ((j >= 0) && (tag_choices[j] > hold)) { + tag_choices[j+1] = tag_choices[j]; + --j; + } + tag_choices[j+1] = hold; + } + + for (int i = guess_branch[avail_guesses-1]-1; i >= 0; i--) { + int already_assigned = 0; + for (int j = 0; j < onemol->natoms; j++) { + if (glove[j][1] == tag_choices[i]) { + already_assigned = 1; + break; + } + } + if (already_assigned == 1) { + guess_branch[avail_guesses-1]--; + if (guess_branch[avail_guesses-1] == 0) { + status = REJECT; + return; + } + } else { + glove[onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; + glove[onemol_xspecial[pion][neigh]-1][1] = tag_choices[i]; + guess_branch[avail_guesses-1]--; + break; + } + } + + //another check for ghost atoms. perhaps remove the one in make_a_guess + if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { + error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); + } + + if (guess_branch[avail_guesses-1] == 0) avail_guesses--; + + for (int i = 0; i < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; i++) { + pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][i]-1]++; + } + glove_counter++; + if (glove_counter == onemol->natoms) { + if (ring_check() && check_constraints()) status = ACCEPT; + else status = GUESSFAIL; + return; + } + status = CONTINUE; +} + +/* ---------------------------------------------------------------------- + Check that newly assigned atoms have correct bonds + Necessary for certain ringed structures +------------------------------------------------------------------------- */ + +int FixBondReact::ring_check() +{ + // ring_check can be made more efficient by re-introducing 'frozen' atoms + // 'frozen' atoms have been assigned and also are no longer pioneers + + // double check the number of neighbors match for all non-edge atoms + // otherwise, atoms at 'end' of symmetric ring can behave like edge atoms + for (int i = 0; i < onemol->natoms; i++) + if (edge[i][rxnID] == 0 && + onemol_nxspecial[i][0] != nxspecial[atom->map(glove[i][1])][0]) + return 0; + + for (int i = 0; i < onemol->natoms; i++) { + for (int j = 0; j < onemol_nxspecial[i][0]; j++) { + int ring_fail = 1; + int ispecial = onemol_xspecial[i][j]; + for (int k = 0; k < nxspecial[atom->map(glove[i][1])][0]; k++) { + if (xspecial[atom->map(glove[i][1])][k] == glove[ispecial-1][1]) { + ring_fail = 0; + break; + } + } + if (ring_fail == 1) return 0; + } + } + return 1; +} + +/* ---------------------------------------------------------------------- +evaluate constraints: return 0 if any aren't satisfied +------------------------------------------------------------------------- */ + +int FixBondReact::check_constraints() +{ + double x1[3],x2[3],x3[3],x4[3]; + double delx,dely,delz,rsq; + double delx1,dely1,delz1,delx2,dely2,delz2; + double rsq1,rsq2,r1,r2,c,t,prrhob; + // for computation of dihedrals + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,ra2inv,rb2inv,rabinv; + double s,phi; + int ANDgate; + + tagint atom1,atom2; + double **x = atom->x; + + int *satisfied; + memory->create(satisfied,nconstraints[rxnID],"bond/react:satisfied"); + for (int i = 0; i < nconstraints[rxnID]; i++) + satisfied[i] = 1; + + for (int i = 0; i < nconstraints[rxnID]; i++) { + if (constraints[i][rxnID].type == DISTANCE) { + get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); + get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + domain->minimum_image(delx,dely,delz); // ghost location fix + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < constraints[i][rxnID].par[0] || rsq > constraints[i][rxnID].par[1]) satisfied[i] = 0; + } else if (constraints[i][rxnID].type == ANGLE) { + get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); + get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); + get_IDcoords(constraints[i][rxnID].idtype[2], constraints[i][rxnID].id[2], x3); + + // 1st bond + delx1 = x1[0] - x2[0]; + dely1 = x1[1] - x2[1]; + delz1 = x1[2] - x2[2]; + domain->minimum_image(delx1,dely1,delz1); // ghost location fix + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + delx2 = x3[0] - x2[0]; + dely2 = x3[1] - x2[1]; + delz2 = x3[2] - x2[2]; + domain->minimum_image(delx2,dely2,delz2); // ghost location fix + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + if (acos(c) < constraints[i][rxnID].par[0] || acos(c) > constraints[i][rxnID].par[1]) satisfied[i] = 0; + } else if (constraints[i][rxnID].type == DIHEDRAL) { + // phi calculation from dihedral style harmonic + get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); + get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); + get_IDcoords(constraints[i][rxnID].idtype[2], constraints[i][rxnID].id[2], x3); + get_IDcoords(constraints[i][rxnID].idtype[3], constraints[i][rxnID].id[3], x4); + + vb1x = x1[0] - x2[0]; + vb1y = x1[1] - x2[1]; + vb1z = x1[2] - x2[2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + vb2x = x3[0] - x2[0]; + vb2y = x3[1] - x2[1]; + vb2z = x3[2] - x2[2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + vb3x = x4[0] - x3[0]; + vb3y = x4[1] - x3[1]; + vb3z = x4[2] - x3[2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + ra2inv = rb2inv = 0.0; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + phi = atan2(s,c); + + ANDgate = 0; + if (constraints[i][rxnID].par[0] < constraints[i][rxnID].par[1]) { + if (phi > constraints[i][rxnID].par[0] && phi < constraints[i][rxnID].par[1]) ANDgate = 1; + } else { + if (phi > constraints[i][rxnID].par[0] || phi < constraints[i][rxnID].par[1]) ANDgate = 1; + } + if (constraints[i][rxnID].par[2] < constraints[i][rxnID].par[3]) { + if (phi > constraints[i][rxnID].par[2] && phi < constraints[i][rxnID].par[3]) ANDgate = 1; + } else { + if (phi > constraints[i][rxnID].par[2] || phi < constraints[i][rxnID].par[3]) ANDgate = 1; + } + if (ANDgate != 1) satisfied[i] = 0; + } else if (constraints[i][rxnID].type == ARRHENIUS) { + t = get_temperature(glove,0,1); + prrhob = constraints[i][rxnID].par[1]*pow(t,constraints[i][rxnID].par[2])* + exp(-constraints[i][rxnID].par[3]/(force->boltz*t)); + if (prrhob < rrhandom[(int) constraints[i][rxnID].par[0]]->uniform()) satisfied[i] = 0; + } else if (constraints[i][rxnID].type == RMSD) { + // call superpose + int iatom; + int iref = -1; // choose first atom as reference + int n2superpose = 0; + double **xfrozen; // coordinates for the "frozen" target molecule + double **xmobile; // coordinates for the "mobile" molecule + int ifragment = constraints[i][rxnID].id[0]; + if (ifragment >= 0) { + for (int j = 0; j < onemol->natoms; j++) + if (onemol->fragmentmask[ifragment][j]) n2superpose++; + memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); + memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); + int myincr = 0; + for (int j = 0; j < onemol->natoms; j++) { + if (onemol->fragmentmask[ifragment][j]) { + iatom = atom->map(glove[j][1]); + if (iref == -1) iref = iatom; + iatom = domain->closest_image(iref,iatom); + for (int k = 0; k < 3; k++) { + xfrozen[myincr][k] = x[iatom][k]; + xmobile[myincr][k] = onemol->x[j][k]; + } + myincr++; + } + } + } else { + int iatom; + int iref = -1; // choose first atom as reference + n2superpose = onemol->natoms; + memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); + memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); + for (int j = 0; j < n2superpose; j++) { + iatom = atom->map(glove[j][1]); + if (iref == -1) iref = iatom; + iatom = domain->closest_image(iref,iatom); + for (int k = 0; k < 3; k++) { + xfrozen[j][k] = x[iatom][k]; + xmobile[j][k] = onemol->x[j][k]; + } + } + } + Superpose3D superposer(n2superpose); + double rmsd = superposer.Superpose(xfrozen, xmobile); + memory->destroy(xfrozen); + memory->destroy(xmobile); + if (rmsd > constraints[i][rxnID].par[0]) satisfied[i] = 0; + } else if (constraints[i][rxnID].type == CUSTOM) { + satisfied[i] = custom_constraint(constraints[i][rxnID].str); + } + } + + if (nconstraints[rxnID] > 0) { + char evalstr[MAXLINE],*ptr; + strcpy(evalstr,constraintstr[rxnID]); + for (int i = 0; i < nconstraints[rxnID]; i++) { + ptr = strchr(evalstr,'C'); + *ptr = satisfied[i] ? '1' : '0'; + } + double verdict = input->variable->evaluate_boolean(evalstr); + if (verdict == 0.0) { + memory->destroy(satisfied); + return 0; + } + } + + // let's also check chirality within 'check_constraint' + for (int i = 0; i < onemol->natoms; i++) { + if (chiral_atoms[i][0][rxnID] == 1) { + double my4coords[12]; + // already ensured, by transitive property, that chiral simulation atom has four neighs + for (int j = 0; j < 4; j++) { + atom1 = atom->map(glove[i][1]); + // loop over known types involved in chiral center + for (int jj = 0; jj < 4; jj++) { + if (atom->type[atom->map(xspecial[atom1][j])] == chiral_atoms[i][jj+2][rxnID]) { + atom2 = atom->map(xspecial[atom1][j]); + atom2 = domain->closest_image(atom1,atom2); + for (int k = 0; k < 3; k++) { + my4coords[3*jj+k] = x[atom2][k]; + } + break; + } + } + } + if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) { + memory->destroy(satisfied); + return 0; + } + } + } + + memory->destroy(satisfied); + return 1; +} + +/* ---------------------------------------------------------------------- +return pre-reaction atom or fragment location +fragment: given pre-reacted molID (onemol) and fragID, + return geometric center (of mapped simulation atoms) +------------------------------------------------------------------------- */ + +void FixBondReact::get_IDcoords(int mode, int myID, double *center) +{ + double **x = atom->x; + if (mode == ATOM) { + int iatom = atom->map(glove[myID-1][1]); + for (int i = 0; i < 3; i++) + center[i] = x[iatom][i]; + } else { + int iref = -1; // choose first atom as reference + int iatom; + int nfragatoms = 0; + for (int i = 0; i < 3; i++) + center[i] = 0; + + for (int i = 0; i < onemol->natoms; i++) { + if (onemol->fragmentmask[myID][i]) { + if (iref == -1) + iref = atom->map(glove[i][1]); + iatom = atom->map(glove[i][1]); + iatom = domain->closest_image(iref,iatom); + for (int j = 0; j < 3; j++) + center[j] += x[iatom][j]; + nfragatoms++; + } + } + if (nfragatoms > 0) + for (int i = 0; i < 3; i++) center[i] /= nfragatoms; + } +} + +/* ---------------------------------------------------------------------- +compute local temperature: average over all atoms in reaction template +------------------------------------------------------------------------- */ + +double FixBondReact::get_temperature(tagint **myglove, int row_offset, int col) +{ + int i,ilocal; + double adof = domain->dimension; + + double **v = atom->v; + double *mass = atom->mass; + double *rmass = atom->rmass; + int *type = atom->type; + + double t = 0.0; + + if (rmass) { + for (i = 0; i < onemol->natoms; i++) { + ilocal = atom->map(myglove[i+row_offset][col]); + t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + + v[ilocal][2]*v[ilocal][2]) * rmass[ilocal]; + } + } else { + for (i = 0; i < onemol->natoms; i++) { + ilocal = atom->map(myglove[i+row_offset][col]); + t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + + v[ilocal][2]*v[ilocal][2]) * mass[type[ilocal]]; + } + } + + // final temperature + double dof = adof*onemol->natoms; + double tfactor = force->mvv2e / (dof * force->boltz); + t *= tfactor; + return t; +} + +/* ---------------------------------------------------------------------- +compute sum of partial charges in rxn site, for updated atoms +note: currently uses global rxnID and onemol variables +------------------------------------------------------------------------- */ + +double FixBondReact::get_totalcharge() +{ + int j,jj; + double *q = atom->q; + double sim_total_charge = 0.0; + for (j = 0; j < onemol->natoms; j++) { + jj = equivalences[j][1][rxnID]-1; + if (custom_charges[jj][rxnID] == 1) + sim_total_charge += q[atom->map(glove[jj][1])]; + } + return sim_total_charge; +} + +/* ---------------------------------------------------------------------- +get per-atom variable names used by custom constraint +------------------------------------------------------------------------- */ + +void FixBondReact::customvarnames() +{ + std::size_t pos,pos1,pos2,pos3; + int prev3; + std::string varstr,argstr,varid; + + // search all constraints' varstr for special 'rxn' functions + // add variable names to customvarstrs + // add values to customvars + + for (rxnID = 0; rxnID < nreacts; rxnID++) { + for (int i = 0; i < nconstraints[rxnID]; i++) { + if (constraints[i][rxnID].type == CUSTOM) { + varstr = constraints[i][rxnID].str; + prev3 = -1; + while (true) { + // find next reaction special function occurrence + pos1 = std::string::npos; + for (int i = 0; i < nrxnfunction; i++) { + if (peratomflag[i] == 0) continue; + pos = varstr.find(rxnfunclist[i],prev3+1); + if (pos == std::string::npos) continue; + if (pos < pos1) pos1 = pos; + } + if (pos1 == std::string::npos) break; + + pos2 = varstr.find("(",pos1); + pos3 = varstr.find(")",pos2); + if (pos2 == std::string::npos || pos3 == std::string::npos) + error->all(FLERR,"Fix bond/react: Illegal rxn function syntax\n"); + prev3 = (int)pos3; + argstr = varstr.substr(pos2+1,pos3-pos2-1); + argstr.erase(remove_if(argstr.begin(), argstr.end(), isspace), argstr.end()); // remove whitespace + pos2 = argstr.find(","); + if (pos2 != std::string::npos) varid = argstr.substr(0,pos2); + else varid = argstr; + // check if we already know about this variable + int varidflag = 0; + for (int j = 0; j < ncustomvars; j++) { + if (customvarstrs[j] == varid) { + varidflag = 1; + break; + } + } + if (!varidflag) { + customvarstrs.resize(ncustomvars+1); + customvarstrs[ncustomvars++] = varid; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- +evaluate per-atom variables needed for custom constraint +------------------------------------------------------------------------- */ + +void FixBondReact::get_customvars() +{ + double *tempvvec; + std::string varid; + int nall = atom->nlocal + atom->nghost; + + memory->create(tempvvec,nall,"bond/react:tempvvec"); + if (vvec == nullptr) { + memory->create(vvec,nall,ncustomvars,"bond/react:vvec"); + nvvec = nall; + } + if (nvvec < nall) { + memory->grow(vvec,nall,ncustomvars,"bond/react:vvec"); + nvvec = nall; + } + for (int i = 0; i < ncustomvars; i++) { + varid = customvarstrs[i]; + if (varid.substr(0,2) != "v_") error->all(FLERR,"Fix bond/react: Reaction special function variable " + "name should begin with 'v_'"); + varid = varid.substr(2); + int ivar = input->variable->find(varid.c_str()); + if (ivar < 0) + error->all(FLERR,"Fix bond/react: Reaction special function variable " + "name does not exist"); + if (!input->variable->atomstyle(ivar)) + error->all(FLERR,"Fix bond/react: Reaction special function must " + "reference an atom-style variable"); + + input->variable->compute_atom(ivar,igroup,tempvvec,1,0); + for (int j = 0; j < nall; j++) vvec[j][i] = tempvvec[j]; + } + memory->destroy(tempvvec); +} + +/* ---------------------------------------------------------------------- +evaulate expression for variable constraint +------------------------------------------------------------------------- */ + +double FixBondReact::custom_constraint(const std::string& varstr) +{ + std::size_t pos,pos1,pos2,pos3; + int irxnfunc; + int prev3 = -1; + std::string argstr,varid,fragid,evlcat; + std::vector evlstr; + + // search varstr for special 'rxn' functions + while (true) { + // find next reaction special function occurrence + pos1 = std::string::npos; + for (int i = 0; i < nrxnfunction; i++) { + pos = varstr.find(rxnfunclist[i],prev3+1); + if (pos == std::string::npos) continue; + if (pos < pos1) { + pos1 = pos; + irxnfunc = i; + } + } + if (pos1 == std::string::npos) break; + + fragid = "all"; // operate over entire reaction site by default + pos2 = varstr.find("(",pos1); + pos3 = varstr.find(")",pos2); + if (pos2 == std::string::npos || pos3 == std::string::npos) + error->one(FLERR,"Fix bond/react: Illegal rxn function syntax\n"); + evlstr.push_back(varstr.substr(prev3+1,pos1-(prev3+1))); + prev3 = pos3; + argstr = varstr.substr(pos2+1,pos3-pos2-1); + argstr.erase(remove_if(argstr.begin(), argstr.end(), isspace), argstr.end()); // remove whitespace + pos2 = argstr.find(","); + if (pos2 != std::string::npos) { + varid = argstr.substr(0,pos2); + fragid = argstr.substr(pos2+1); + } else varid = argstr; + evlstr.push_back(std::to_string(rxnfunction(rxnfunclist[irxnfunc], varid, fragid))); + } + evlstr.push_back(varstr.substr(prev3+1)); + + for (auto & evl : evlstr) evlcat += evl; + return input->variable->compute_equal(evlcat); +} + +/* ---------------------------------------------------------------------- +currently three 'rxn' functions: rxnsum, rxnave, and rxnbond +------------------------------------------------------------------------- */ + +double FixBondReact::rxnfunction(const std::string& rxnfunc, const std::string& varid, + const std::string& fragid) +{ + int ifrag = -1; + if (fragid != "all") { + ifrag = onemol->findfragment(fragid.c_str()); + if (ifrag < 0) error->one(FLERR,"Bond/react: Molecule fragment " + "in reaction special function does not exist"); + } + + // start with 'rxnbond' per-bond function + // for 'rxnbond', varid corresponds to 'compute bond/local' name, + // and fragid is a pre-reaction fragment containing the two atoms in the bond + if (rxnfunc == "rxnbond") { + int icompute,ibond,nsum; + double perbondval; + std::set aset; + std::string computeid = varid; + std::map,int>::iterator it; + + if (computeid.substr(0,2) != "c_") error->one(FLERR,"Bond/react: Reaction special function compute " + "name should begin with 'c_'"); + computeid = computeid.substr(2); + icompute = modify->find_compute(computeid); + if (icompute < 0) error->one(FLERR,"Bond/react: Reaction special function compute name does not exist"); + cperbond = modify->compute[icompute]; + std::string compute_style = cperbond->style; + if (compute_style != "bond/local") error->one(FLERR,"Bond/react: Compute used by reaction " + "special function 'rxnbond' must be of style 'bond/local'"); + if (cperbond->size_local_cols > 0) error->one(FLERR,"Bond/react: 'Compute bond/local' used by reaction " + "special function 'rxnbond' must compute one value"); + + if (atoms2bondflag == 0) { + atoms2bondflag = 1; + get_atoms2bond(cperbond->groupbit); + } + + nsum = 0; + for (int i = 0; i < onemol->natoms; i++) { + if (onemol->fragmentmask[ifrag][i]) { + aset.insert(glove[i][1]); + nsum++; + } + } + if (nsum != 2) error->one(FLERR,"Bond/react: Molecule fragment of reaction special function 'rxnbond' " + "must contain exactly two atoms"); + + if (cperbond->invoked_local != lmp->update->ntimestep) + cperbond->compute_local(); + + it = atoms2bond.find(aset); + if (it == atoms2bond.end()) error->one(FLERR,"Bond/react: Unable to locate bond referenced by " + "reaction special function 'rxnbond'"); + ibond = it->second; + perbondval = cperbond->vector_local[ibond]; + return perbondval; + } + + int ivar = -1; + for (int i = 0; i < ncustomvars; i++) { + if (varid == customvarstrs[i]) { + ivar = i; + break; + } + } + // variable name should always be found, at this point + // however, let's double check for completeness + if (ivar < 0) + error->one(FLERR,"Fix bond/react: Reaction special function variable " + "name does not exist"); + + int iatom; + int nsum = 0; + double sumvvec = 0; + if (rxnfunc == "rxnsum" || rxnfunc == "rxnave") { + if (fragid == "all") { + for (int i = 0; i < onemol->natoms; i++) { + iatom = atom->map(glove[i][1]); + sumvvec += vvec[iatom][ivar]; + } + nsum = onemol->natoms; + } else { + for (int i = 0; i < onemol->natoms; i++) { + if (onemol->fragmentmask[ifrag][i]) { + iatom = atom->map(glove[i][1]); + sumvvec += vvec[iatom][ivar]; + nsum++; + } + } + } + } + + if (rxnfunc == "rxnsum") return sumvvec; + if (rxnfunc == "rxnave") return sumvvec/nsum; + return 0.0; +} + +/* ---------------------------------------------------------------------- +populate map to get bond index from atom IDs +------------------------------------------------------------------------- */ + +void FixBondReact::get_atoms2bond(int cgroupbit) +{ + int i,m,atom1,atom2,btype,nb; + std::set aset; + + int nlocal = atom->nlocal; + tagint *tag = atom->tag; + int *num_bond = atom->num_bond; + tagint **bond_atom = atom->bond_atom; + int **bond_type = atom->bond_type; + int *mask = atom->mask; + + m = 0; + atoms2bond.clear(); + for (atom1 = 0; atom1 < nlocal; atom1++) { + if (!(mask[atom1] & cgroupbit)) continue; + nb = num_bond[atom1]; + for (i = 0; i < nb; i++) { + btype = bond_type[atom1][i]; + atom2 = atom->map(bond_atom[atom1][i]); + if (atom2 < 0 || !(mask[atom2] & cgroupbit)) continue; + if (newton_bond == 0 && tag[atom1] > tag[atom2]) continue; + if (btype == 0) continue; + aset = {tag[atom1], tag[atom2]}; + atoms2bond.insert(std::make_pair(aset,m++)); + } + } +} + +/* ---------------------------------------------------------------------- +return handedness (1 or -1) of a chiral center, given ordered set of coordinates +------------------------------------------------------------------------- */ + +int FixBondReact::get_chirality(double four_coords[12]) +{ + // define oriented plane with first three coordinates + double vec1[3],vec2[3],vec3[3],vec4[3],mean3[3],dot; + + for (int i = 0; i < 3; i++) { + vec1[i] = four_coords[i]-four_coords[i+3]; + vec2[i] = four_coords[i+3]-four_coords[i+6]; + } + + MathExtra::cross3(vec1,vec2,vec3); + + for (int i = 0; i < 3; i++) { + mean3[i] = (four_coords[i] + four_coords[i+3] + + four_coords[i+6])/3; + vec4[i] = four_coords[i+9] - mean3[i]; + } + + dot = MathExtra::dot3(vec3,vec4); + dot = dot/fabs(dot); + return (int) dot; +} + +/* ---------------------------------------------------------------------- + Get xspecials for current molecule templates + may need correction when specials defined explicitly in molecule templates +------------------------------------------------------------------------- */ + +void FixBondReact::get_molxspecials() +{ + onemol_nxspecial = onemol->nspecial; + onemol_xspecial = onemol->special; + twomol_nxspecial = twomol->nspecial; + twomol_xspecial = twomol->special; +} + +/* ---------------------------------------------------------------------- + Determine which pre-reacted template atoms are at least three bonds + away from edge atoms. +------------------------------------------------------------------------- */ + +void FixBondReact::find_landlocked_atoms(int myrxn) +{ + // landlocked_atoms are atoms for which all topology is contained in reacted template + // if dihedrals/impropers exist: this means that edge atoms are not in their 1-3 neighbor list + // note: due to various usage/definitions of impropers, treated same as dihedrals + // if angles exist: this means edge atoms not in their 1-2 neighbors list + // if just bonds: this just means that edge atoms are not landlocked + // Note: landlocked defined in terms of reacted template + // if no edge atoms (small reacting molecule), all atoms are landlocked + // we can delete all current topology of landlocked atoms and replace + + // always remove edge atoms from landlocked list + for (int i = 0; i < twomol->natoms; i++) { + if (create_atoms[i][myrxn] == 0 && edge[equivalences[i][1][myrxn]-1][myrxn] == 1) + landlocked_atoms[i][myrxn] = 0; + else landlocked_atoms[i][myrxn] = 1; + } + int nspecial_limit = -1; + if (force->angle && twomol->angleflag) nspecial_limit = 0; + + if ((force->dihedral && twomol->dihedralflag) || + (force->improper && twomol->improperflag)) nspecial_limit = 1; + + if (nspecial_limit != -1) { + for (int i = 0; i < twomol->natoms; i++) { + for (int j = 0; j < twomol_nxspecial[i][nspecial_limit]; j++) { + for (int k = 0; k < onemol->natoms; k++) { + if (equivalences[twomol_xspecial[i][j]-1][1][myrxn] == k+1 && edge[k][myrxn] == 1) { + landlocked_atoms[i][myrxn] = 0; + } + } + } + } + } + + // bad molecule templates check + // if atoms change types, but aren't landlocked, that's bad + for (int i = 0; i < twomol->natoms; i++) { + if ((create_atoms[i][myrxn] == 0) && + (twomol->type[i] != onemol->type[equivalences[i][1][myrxn]-1]) && + (landlocked_atoms[i][myrxn] == 0)) + error->all(FLERR, "Fix bond/react: Atom type affected by reaction {} is too close " + "to template edge", rxn_name[myrxn]); + } + + // additionally, if a bond changes type, but neither involved atom is landlocked, bad + // would someone want to change an angle type but not bond or atom types? (etc.) ...hopefully not yet + for (int i = 0; i < twomol->natoms; i++) { + if (create_atoms[i][myrxn] == 0) { + if (landlocked_atoms[i][myrxn] == 0) { + for (int j = 0; j < twomol->num_bond[i]; j++) { + int twomol_atomj = twomol->bond_atom[i][j]; + if (landlocked_atoms[twomol_atomj-1][myrxn] == 0) { + int onemol_atomi = equivalences[i][1][myrxn]; + int onemol_batom; + for (int m = 0; m < onemol->num_bond[onemol_atomi-1]; m++) { + onemol_batom = onemol->bond_atom[onemol_atomi-1][m]; + if ((onemol_batom == equivalences[twomol_atomj-1][1][myrxn]) && + (twomol->bond_type[i][j] != onemol->bond_type[onemol_atomi-1][m])) + error->all(FLERR, "Fix bond/react: Bond type affected by reaction {} is " + "too close to template edge",rxn_name[myrxn]); + } + if (newton_bond) { + int onemol_atomj = equivalences[twomol_atomj-1][1][myrxn]; + for (int m = 0; m < onemol->num_bond[onemol_atomj-1]; m++) { + onemol_batom = onemol->bond_atom[onemol_atomj-1][m]; + if ((onemol_batom == equivalences[i][1][myrxn]) && + (twomol->bond_type[i][j] != onemol->bond_type[onemol_atomj-1][m])) + error->all(FLERR, "Fix bond/react: Bond type affected by reaction {} is " + "too close to template edge",rxn_name[myrxn]); + } + } + } + } + } + } + } + + // additionally, if a deleted atom is bonded to an atom that is not deleted, bad + for (int i = 0; i < onemol->natoms; i++) { + if (delete_atoms[i][myrxn] == 1) { + int ii = reverse_equiv[i][1][myrxn] - 1; + for (int j = 0; j < twomol_nxspecial[ii][0]; j++) { + if (delete_atoms[equivalences[twomol_xspecial[ii][j]-1][1][myrxn]-1][myrxn] == 0) { + error->all(FLERR,"Fix bond/react: A deleted atom cannot remain bonded to an atom that is not deleted"); + } + } + } + } + + // also, if atoms change number of bonds, but aren't landlocked, that could be bad + int warnflag = 0; + if (comm->me == 0) + for (int i = 0; i < twomol->natoms; i++) { + if ((create_atoms[i][myrxn] == 0) && + (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) && + (landlocked_atoms[i][myrxn] == 0)) { + warnflag = 1; + break; + } + } + + // also, if an atom changes any of its bonds, but is not landlocked, that could be bad + int thereflag; + if (comm->me == 0) + for (int i = 0; i < twomol->natoms; i++) { + if (landlocked_atoms[i][myrxn] == 1) continue; + for (int j = 0; j < twomol_nxspecial[i][0]; j++) { + int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn]; + int ii = equivalences[i][1][myrxn] - 1; + thereflag = 0; + for (int k = 0; k < onemol_nxspecial[ii][0]; k++) { + if (oneneighID == onemol_xspecial[ii][k]) { + thereflag = 1; + break; + } + } + if (thereflag == 0) { + warnflag = 1; + break; + } + } + if (warnflag == 1) break; + } + + if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected " + "by reaction {} is too close to template edge",rxn_name[myrxn]); + + // finally, if a created atom is not landlocked, bad! + for (int i = 0; i < twomol->natoms; i++) { + if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) { + error->one(FLERR,"Fix bond/react: Created atom too close to template edge"); + } + } +} + +/* ---------------------------------------------------------------------- +let's dedup global_mega_glove +allows for same site undergoing different pathways, in parallel +------------------------------------------------------------------------- */ + +void FixBondReact::dedup_mega_gloves(int dedup_mode) +{ + // dedup_mode == LOCAL for local_dedup + // dedup_mode == GLOBAL for global_mega_glove + + if (dedup_mode == GLOBAL) + for (int i = 0; i < nreacts; i++) + ghostly_rxn_count[i] = 0; + + int dedup_size = 0; + if (dedup_mode == LOCAL) { + dedup_size = my_num_mega; + } else if (dedup_mode == GLOBAL) { + dedup_size = global_megasize; + } + + double **dedup_glove; + memory->create(dedup_glove,max_natoms+cuff,dedup_size,"bond/react:dedup_glove"); + + if (dedup_mode == LOCAL) { + for (int i = 0; i < dedup_size; i++) { + for (int j = 0; j < max_natoms+cuff; j++) { + dedup_glove[j][i] = my_mega_glove[j][i]; + } + } + } else if (dedup_mode == GLOBAL) { + for (int i = 0; i < dedup_size; i++) { + for (int j = 0; j < max_natoms+cuff; j++) { + dedup_glove[j][i] = global_mega_glove[j][i]; + } + } + } + + // dedup_mask is size dedup_size and filters reactions that have been deleted + // a value of 1 means this reaction instance has been deleted + int *dedup_mask = new int[dedup_size]; + for (int i = 0; i < dedup_size; i++) { + dedup_mask[i] = 0; + } + + // let's randomly mix up our reaction instances first + // then we can feel okay about ignoring ones we've already deleted (or accepted) + // based off std::shuffle + double *temp_rxn = new double[max_natoms+cuff]; + for (int i = dedup_size-1; i > 0; --i) { //dedup_size + // choose random entry to swap current one with + int k = floor(random[0]->uniform()*(i+1)); + + // swap entries + for (int j = 0; j < max_natoms+cuff; j++) + temp_rxn[j] = dedup_glove[j][i]; + + for (int j = 0; j < max_natoms+cuff; j++) { + dedup_glove[j][i] = dedup_glove[j][k]; + dedup_glove[j][k] = temp_rxn[j]; + } + } + delete [] temp_rxn; + + for (int i = 0; i < dedup_size; i++) { + if (dedup_mask[i] == 0) { + int myrxnid1 = dedup_glove[0][i]; + onemol = atom->molecules[unreacted_mol[myrxnid1]]; + for (int j = 0; j < onemol->natoms; j++) { + int check1 = dedup_glove[j+cuff][i]; + for (int ii = i + 1; ii < dedup_size; ii++) { + if (dedup_mask[ii] == 0) { + int myrxnid2 = dedup_glove[0][ii]; + twomol = atom->molecules[unreacted_mol[myrxnid2]]; + for (int jj = 0; jj < twomol->natoms; jj++) { + int check2 = dedup_glove[jj+cuff][ii]; + if (check2 == check1) { + dedup_mask[ii] = 1; + break; + } + } + } + } + } + } + } + + // we must update local_mega_glove and local_megasize + // we can simply overwrite local_mega_glove column by column + if (dedup_mode == LOCAL) { + int my_new_megasize = 0; + for (int i = 0; i < my_num_mega; i++) { + if (dedup_mask[i] == 0) { + for (int j = 0; j < max_natoms+cuff; j++) { + my_mega_glove[j][my_new_megasize] = dedup_glove[j][i]; + } + my_new_megasize++; + } + } + my_num_mega = my_new_megasize; + } + + // we must update global_mega_glove and global_megasize + // we can simply overwrite global_mega_glove column by column + if (dedup_mode == GLOBAL) { + int new_global_megasize = 0; + for (int i = 0; i < global_megasize; i++) { + if (dedup_mask[i] == 0) { + ghostly_rxn_count[(int) dedup_glove[0][i]]++; + for (int j = 0; j < max_natoms + cuff; j++) { + global_mega_glove[j][new_global_megasize] = dedup_glove[j][i]; + } + new_global_megasize++; + } + } + global_megasize = new_global_megasize; + } + + memory->destroy(dedup_glove); + delete [] dedup_mask; +} + +/* ---------------------------------------------------------------------- +let's unlimit movement of newly bonded atoms after n timesteps. +we give them back to the system thermostat +------------------------------------------------------------------------- */ + +void FixBondReact::unlimit_bond() +{ + // let's now unlimit in terms of i_limit_tags + // we just run through all nlocal, looking for > limit_duration + // then we return i_limit_tag to 0 (which removes from dynamic group) + int flag, cols; + int index1 = atom->find_custom("limit_tags",flag,cols); + int *i_limit_tags = atom->ivector[index1]; + + int *i_statted_tags; + if (stabilization_flag == 1) { + int index2 = atom->find_custom(statted_id,flag,cols); + i_statted_tags = atom->ivector[index2]; + } + + int index3 = atom->find_custom("react_tags",flag,cols); + int *i_react_tags = atom->ivector[index3]; + + int unlimitflag = 0; + for (int i = 0; i < atom->nlocal; i++) { + // unlimit atoms for next step! this resolves # of procs disparity, mostly + // first '1': indexing offset, second '1': for next step + if (i_limit_tags[i] != 0 && (update->ntimestep + 1 - i_limit_tags[i]) > limit_duration[i_react_tags[i]]) { + unlimitflag = 1; + i_limit_tags[i] = 0; + if (stabilization_flag == 1) i_statted_tags[i] = 1; + i_react_tags[i] = 0; + } + } + + // really should only communicate this per-atom property, not entire reneighboring + MPI_Allreduce(MPI_IN_PLACE,&unlimitflag,1,MPI_INT,MPI_MAX,world); + if (unlimitflag) next_reneighbor = update->ntimestep; +} + +/* ---------------------------------------------------------------------- +check mega_glove for ghosts +if so, flag for broadcasting for perusal by all processors +------------------------------------------------------------------------- */ + +void FixBondReact::glove_ghostcheck() +{ + // here we add glove to either local_mega_glove or ghostly_mega_glove + // ghostly_mega_glove includes atoms that are ghosts, either of this proc or another + // 'ghosts of another' indication taken from comm->sendlist + // also includes local gloves that overlap with ghostly gloves, to get dedup right + + for (int i = 0; i < nreacts; i++) + local_rxn_count[i] = 0; + + for (int i = 0; i < my_num_mega; i++) { + rxnID = (int) my_mega_glove[0][i]; + onemol = atom->molecules[unreacted_mol[rxnID]]; + int ghostly = 0; + #if !defined(MPI_STUBS) + if (comm->style == Comm::BRICK) { + if (create_atoms_flag[rxnID] == 1) { + ghostly = 1; + } else { + for (int j = 0; j < onemol->natoms; j++) { + int ilocal = atom->map((tagint) my_mega_glove[j+cuff][i]); + if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) { + ghostly = 1; + break; + } + } + } + } else { + ghostly = 1; + } + #endif + + if (ghostly == 1) { + for (int j = 0; j < onemol->natoms+cuff; j++) { + ghostly_mega_glove[j][ghostly_num_mega] = my_mega_glove[j][i]; + } + ghostly_num_mega++; + } else { + local_rxn_count[rxnID]++; + for (int j = 0; j < onemol->natoms+cuff; j++) { + local_mega_glove[j][local_num_mega] = my_mega_glove[j][i]; + } + local_num_mega++; + } + } +} + +/* ---------------------------------------------------------------------- +broadcast entries of mega_glove which contain nonlocal atoms for perusal by all processors +------------------------------------------------------------------------- */ + +void FixBondReact::ghost_glovecast() +{ +#if !defined(MPI_STUBS) + const int nprocs = comm->nprocs; + + global_megasize = 0; + + int *allncols = new int[nprocs]; + for (int i = 0; i < nprocs; i++) + allncols[i] = 0; + MPI_Allgather(&ghostly_num_mega, 1, MPI_INT, allncols, 1, MPI_INT, world); + for (int i = 0; i < nprocs; i++) + global_megasize = global_megasize + allncols[i]; + + if (global_megasize == 0) { + delete [] allncols; + return; + } + + int *allstarts = new int[nprocs]; + + int start = 0; + for (int i = 0; i < comm->me; i++) { + start += allncols[i]; + } + MPI_Allgather(&start, 1, MPI_INT, allstarts, 1, MPI_INT, world); + MPI_Datatype columnunsized, column; + int sizes[2] = {max_natoms+cuff, global_megasize}; + int subsizes[2] = {max_natoms+cuff, 1}; + int starts[2] = {0,0}; + MPI_Type_create_subarray (2, sizes, subsizes, starts, MPI_ORDER_C, + MPI_DOUBLE, &columnunsized); + MPI_Type_create_resized (columnunsized, 0, sizeof(double), &column); + MPI_Type_commit(&column); + + memory->destroy(global_mega_glove); + memory->create(global_mega_glove,max_natoms+cuff,global_megasize,"bond/react:global_mega_glove"); + + for (int i = 0; i < max_natoms+cuff; i++) + for (int j = 0; j < global_megasize; j++) + global_mega_glove[i][j] = 0; + + if (ghostly_num_mega > 0) { + for (int i = 0; i < max_natoms+cuff; i++) { + for (int j = 0; j < ghostly_num_mega; j++) { + global_mega_glove[i][j+start] = ghostly_mega_glove[i][j]; + } + } + } + // let's send to root, dedup, then broadcast + if (comm->me == 0) { + MPI_Gatherv(MPI_IN_PLACE, ghostly_num_mega, column, // Note: some values ignored for MPI_IN_PLACE + &(global_mega_glove[0][0]), allncols, allstarts, + column, 0, world); + } else { + MPI_Gatherv(&(global_mega_glove[0][start]), ghostly_num_mega, column, + &(global_mega_glove[0][0]), allncols, allstarts, + column, 0, world); + } + + if (comm->me == 0) dedup_mega_gloves(GLOBAL); // global_mega_glove mode + MPI_Bcast(&global_megasize,1,MPI_INT,0,world); + MPI_Bcast(&(global_mega_glove[0][0]), global_megasize, column, 0, world); + + delete [] allstarts; + delete [] allncols; + + MPI_Type_free(&column); + MPI_Type_free(&columnunsized); +#endif +} + +/* ---------------------------------------------------------------------- +update molecule IDs, charges, types, special lists and all topology +------------------------------------------------------------------------- */ + +void FixBondReact::update_everything() +{ + int nlocal = atom->nlocal; // must be redefined after create atoms + int *type = atom->type; + int **nspecial = atom->nspecial; + tagint **special = atom->special; + + int **bond_type = atom->bond_type; + tagint **bond_atom = atom->bond_atom; + int *num_bond = atom->num_bond; + + // used when deleting atoms + int ndel,ndelone; + int *mark; + int nmark = nlocal; + memory->create(mark,nmark,"bond/react:mark"); + for (int i = 0; i < nmark; i++) mark[i] = 0; + + // flag used to delete special interactions + int *delflag; + memory->create(delflag,atom->maxspecial,"bond/react:delflag"); + + tagint *tag = atom->tag; + AtomVec *avec = atom->avec; + + // used when creating atoms + int inserted_atoms_flag = 0; + + // update atom->nbonds, etc. + // TODO: correctly tally with 'newton off' + int delta_bonds = 0; + int delta_angle = 0; + int delta_dihed = 0; + int delta_imprp = 0; + + // use the following per-atom arrays to keep track of reacting atoms + + int flag,cols; + int index1 = atom->find_custom("limit_tags",flag,cols); + int *i_limit_tags = atom->ivector[index1]; + + int *i_statted_tags; + if (stabilization_flag == 1) { + int index2 = atom->find_custom(statted_id,flag,cols); + i_statted_tags = atom->ivector[index2]; + } + + int index3 = atom->find_custom("react_tags",flag,cols); + int *i_react_tags = atom->ivector[index3]; + + // pass through twice + // redefining 'update_num_mega' and 'update_mega_glove' each time + // first pass: when glove is all local atoms + // second pass: search for local atoms in global_mega_glove + // add check for local atoms as well + + int update_num_mega; + tagint **update_mega_glove; + // for now, keeping rxnID in update_mega_glove, but not rest of cuff in update_mega_glove + int maxmega = MAX(local_num_mega,global_megasize); + memory->create(update_mega_glove,max_natoms+1,maxmega,"bond/react:update_mega_glove"); + + double *sim_total_charges; + if (rescale_charges_anyflag) memory->create(sim_total_charges,maxmega,"bond/react:sim_total_charges"); + + for (int pass = 0; pass < 2; pass++) { + update_num_mega = 0; + int *iskip = new int[nreacts]; + for (int i = 0; i < nreacts; i++) iskip[i] = 0; + if (pass == 0) { + for (int i = 0; i < local_num_mega; i++) { + rxnID = (int) local_mega_glove[0][i]; + // reactions already shuffled from dedup procedure, so can skip first N + if (iskip[rxnID]++ < nlocalskips[rxnID]) continue; + + // this will be overwritten if reaction skipped by create_atoms below + update_mega_glove[0][update_num_mega] = (tagint) local_mega_glove[0][i]; + for (int j = 0; j < max_natoms; j++) + update_mega_glove[j+1][update_num_mega] = (tagint) local_mega_glove[j+cuff][i]; + + // atoms inserted here for serial MPI_STUBS build only + if (create_atoms_flag[rxnID] == 1) { + onemol = atom->molecules[unreacted_mol[rxnID]]; + twomol = atom->molecules[reacted_mol[rxnID]]; + if (insert_atoms(update_mega_glove,update_num_mega)) { + inserted_atoms_flag = 1; + } else { // create aborted + reaction_count_total[rxnID]--; + continue; + } + } + + if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = local_mega_glove[1][i]; + update_num_mega++; + } + } else if (pass == 1) { + for (int i = 0; i < global_megasize; i++) { + rxnID = (int) global_mega_glove[0][i]; + // reactions already shuffled from dedup procedure, so can skip first N + if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue; + + // this will be overwritten if reaction skipped by create_atoms below + update_mega_glove[0][update_num_mega] = (tagint) global_mega_glove[0][i]; + for (int j = 0; j < max_natoms; j++) + update_mega_glove[j+1][update_num_mega] = (tagint) global_mega_glove[j+cuff][i]; + + // we can insert atoms here, now that reactions are finalized + // can't do it any earlier, due to skipped reactions (max_rxn) + // for MPI build, reactions that create atoms are always treated as 'global' + if (create_atoms_flag[rxnID] == 1) { + onemol = atom->molecules[unreacted_mol[rxnID]]; + twomol = atom->molecules[reacted_mol[rxnID]]; + if (insert_atoms(update_mega_glove,update_num_mega)) { + inserted_atoms_flag = 1; + } else { // create aborted + reaction_count_total[rxnID]--; + continue; + } + } + + if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = global_mega_glove[1][i]; + update_num_mega++; + } + } + delete [] iskip; + + if (update_num_mega == 0) continue; + + // if inserted atoms and global map exists, reset map now instead + // of waiting for comm since other pre-exchange fixes may use it + // invoke map_init() b/c atom count has grown + // do this once after all atom insertions + if (inserted_atoms_flag == 1 && atom->map_style != Atom::MAP_NONE) { + atom->map_init(); + atom->map_set(); + } + + // mark to-delete atoms + nlocal = atom->nlocal; + if (nlocal > nmark) { + memory->grow(mark,nlocal,"bond/react:mark"); + for (int i = nmark; i < nlocal; i++) mark[i] = 0; + nmark = nlocal; + } + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + onemol = atom->molecules[unreacted_mol[rxnID]]; + for (int j = 0; j < onemol->natoms; j++) { + int iatom = atom->map(update_mega_glove[j+1][i]); + if (delete_atoms[j][rxnID] == 1 && iatom >= 0 && iatom < nlocal) { + mark[iatom] = 1; + } + } + } + + // update charges and types of landlocked atoms + // also keep track of 'stabilization' groups here + int n_custom_charge; + double charge_rescale_addend; + for (int i = 0; i < update_num_mega; i++) { + charge_rescale_addend = 0; + rxnID = update_mega_glove[0][i]; + twomol = atom->molecules[reacted_mol[rxnID]]; + if (rescale_charges_flag[rxnID]) { + n_custom_charge = rescale_charges_flag[rxnID]; + charge_rescale_addend = (sim_total_charges[i]-mol_total_charge[rxnID])/n_custom_charge; + } + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + int ilocal = atom->map(update_mega_glove[jj+1][i]); + if (ilocal >= 0 && ilocal < nlocal) { + + // update->ntimestep could be 0. so add 1 throughout + i_limit_tags[ilocal] = update->ntimestep + 1; + if (stabilization_flag == 1) i_statted_tags[ilocal] = 0; + i_react_tags[ilocal] = rxnID; + + if (landlocked_atoms[j][rxnID] == 1) + type[ilocal] = twomol->type[j]; + if (twomol->qflag && atom->q_flag && custom_charges[jj][rxnID] == 1) { + double *q = atom->q; + q[ilocal] = twomol->q[j]+charge_rescale_addend; + } + } + } + } + + int insert_num; + // very nice and easy to completely overwrite special bond info for landlocked atoms + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + onemol = atom->molecules[unreacted_mol[rxnID]]; + twomol = atom->molecules[reacted_mol[rxnID]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + int ilocal = atom->map(update_mega_glove[jj+1][i]); + if (ilocal < nlocal && ilocal >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + for (int k = 0; k < 3; k++) { + nspecial[ilocal][k] = twomol->nspecial[j][k]; + } + for (int p = 0; p < twomol->nspecial[j][2]; p++) { + special[ilocal][p] = update_mega_glove[equivalences[twomol->special[j][p]-1][1][rxnID]][i]; + } + } + // now delete and replace landlocked atoms from non-landlocked atoms' special info + // delete 1-2, 1-3, 1-4 specials individually. only delete if special exists in pre-reaction template + if (landlocked_atoms[j][rxnID] == 0) { + int ispec, fspec, imolspec, fmolspec, nspecdel[3]; + for (int k = 0; k < 3; k++) nspecdel[k] = 0; + for (int k = 0; k < atom->maxspecial; k++) delflag[k] = 0; + for (int specn = 0; specn < 3; specn++) { + if (specn == 0) { + imolspec = 0; + ispec = 0; + } else { + imolspec = onemol->nspecial[jj][specn-1]; + ispec = nspecial[ilocal][specn-1]; + } + fmolspec = onemol->nspecial[jj][specn]; + fspec = nspecial[ilocal][specn]; + for (int k = ispec; k < fspec; k++) { + for (int p = imolspec; p < fmolspec; p++) { + if (update_mega_glove[onemol->special[jj][p]][i] == special[ilocal][k]) { + delflag[k] = 1; + for (int m = 2; m >= specn; m--) nspecdel[m]++; + break; + } + } + } + } + int incr = 0; + for (int k = 0; k < nspecial[ilocal][2]; k++) + if (delflag[k] == 0) special[ilocal][incr++] = special[ilocal][k]; + for (int m = 0; m < 3; m++) nspecial[ilocal][m] -= nspecdel[m]; + // now reassign from reacted template + for (int k = 0; k < twomol->nspecial[j][2]; k++) { + if (k > twomol->nspecial[j][1] - 1) { + insert_num = nspecial[ilocal][2]++; + } else if (k > twomol->nspecial[j][0] - 1) { + insert_num = nspecial[ilocal][1]++; + nspecial[ilocal][2]++; + } else { + insert_num = nspecial[ilocal][0]++; + nspecial[ilocal][1]++; + nspecial[ilocal][2]++; + } + if (nspecial[ilocal][2] > atom->maxspecial) + error->one(FLERR,"Fix bond/react special bond generation overflow"); + for (int n = nspecial[ilocal][2]-1; n > insert_num; n--) { + special[ilocal][n] = special[ilocal][n-1]; + } + special[ilocal][insert_num] = update_mega_glove[equivalences[twomol->special[j][k]-1][1][rxnID]][i]; + } + } + } + } + } + + // next let's update bond info + // cool thing is, newton_bond issues are already taken care of in templates + // same with class2 improper issues, which is why this fix started in the first place + // also need to find any instances of bond history to update histories + auto histories = modify->get_fix_by_style("BOND_HISTORY"); + int n_histories = histories.size(); + + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + twomol = atom->molecules[reacted_mol[rxnID]]; + // let's first delete all bond info about landlocked atoms + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + delta_bonds -= num_bond[atom->map(update_mega_glove[jj+1][i])]; + // If deleting all bonds, first cache then remove all histories + if (n_histories > 0) + for (auto &ihistory: histories) { + for (int n = 0; n < num_bond[atom->map(update_mega_glove[jj+1][i])]; n++) + dynamic_cast(ihistory)->cache_history(atom->map(update_mega_glove[jj+1][i]), n); + for (int n = 0; n < num_bond[atom->map(update_mega_glove[jj+1][i])]; n++) + dynamic_cast(ihistory)->delete_history(atom->map(update_mega_glove[jj+1][i]), 0); + } + num_bond[atom->map(update_mega_glove[jj+1][i])] = 0; + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = num_bond[atom->map(update_mega_glove[jj+1][i])]-1; p > -1 ; p--) { + for (int n = 0; n < twomol->natoms; n++) { + int nn = equivalences[n][1][rxnID]-1; + if (n!=j && bond_atom[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] && landlocked_atoms[n][rxnID] == 1) { + // Cache history information, shift history, then delete final element + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->cache_history(atom->map(update_mega_glove[jj+1][i]), p); + for (int m = p; m < num_bond[atom->map(update_mega_glove[jj+1][i])]-1; m++) { + bond_type[atom->map(update_mega_glove[jj+1][i])][m] = bond_type[atom->map(update_mega_glove[jj+1][i])][m+1]; + bond_atom[atom->map(update_mega_glove[jj+1][i])][m] = bond_atom[atom->map(update_mega_glove[jj+1][i])][m+1]; + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->shift_history(atom->map(update_mega_glove[jj+1][i]),m,m+1); + } + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->delete_history(atom->map(update_mega_glove[jj+1][i]), + num_bond[atom->map(update_mega_glove[jj+1][i])]-1); + num_bond[atom->map(update_mega_glove[jj+1][i])]--; + delta_bonds--; + } + } + } + } + } + } + // now let's add the new bond info. + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_bond[atom->map(update_mega_glove[jj+1][i])] = twomol->num_bond[j]; + delta_bonds += twomol->num_bond[j]; + for (int p = 0; p < twomol->num_bond[j]; p++) { + bond_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->bond_type[j][p]; + bond_atom[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->bond_atom[j][p]-1][1][rxnID]][i]; + // Check cached history data to see if bond regenerated + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->check_cache(atom->map(update_mega_glove[jj+1][i]), p); + } + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_bond[j]; p++) { + if (landlocked_atoms[twomol->bond_atom[j][p]-1][rxnID] == 1) { + insert_num = num_bond[atom->map(update_mega_glove[jj+1][i])]; + bond_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->bond_type[j][p]; + bond_atom[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->bond_atom[j][p]-1][1][rxnID]][i]; + // Check cached history data to see if bond regenerated + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->check_cache(atom->map(update_mega_glove[jj+1][i]), insert_num); + num_bond[atom->map(update_mega_glove[jj+1][i])]++; + if (num_bond[atom->map(update_mega_glove[jj+1][i])] > atom->bond_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_bonds++; + } + } + } + } + } + } + + if (n_histories > 0) + for (auto &ihistory: histories) + dynamic_cast(ihistory)->clear_cache(); + + // Angles! First let's delete all angle info: + if (force->angle) { + int *num_angle = atom->num_angle; + int **angle_type = atom->angle_type; + tagint **angle_atom1 = atom->angle_atom1; + tagint **angle_atom2 = atom->angle_atom2; + tagint **angle_atom3 = atom->angle_atom3; + + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + twomol = atom->molecules[reacted_mol[rxnID]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + delta_angle -= num_angle[atom->map(update_mega_glove[jj+1][i])]; + num_angle[atom->map(update_mega_glove[jj+1][i])] = 0; + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = num_angle[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { + for (int n = 0; n < twomol->natoms; n++) { + int nn = equivalences[n][1][rxnID]-1; + if (n!=j && landlocked_atoms[n][rxnID] == 1 && + (angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { + for (int m = p; m < num_angle[atom->map(update_mega_glove[jj+1][i])]-1; m++) { + angle_type[atom->map(update_mega_glove[jj+1][i])][m] = angle_type[atom->map(update_mega_glove[jj+1][i])][m+1]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; + } + num_angle[atom->map(update_mega_glove[jj+1][i])]--; + delta_angle--; + break; + } + } + } + } + } + } + // now let's add the new angle info. + if (twomol->angleflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; + delta_angle += twomol->num_angle[j]; + for (int p = 0; p < twomol->num_angle[j]; p++) { + angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + } + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_angle[j]; p++) { + if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { + insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; + angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + num_angle[atom->map(update_mega_glove[jj+1][i])]++; + if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_angle++; + } + } + } + } + } + } + } + } + + // Dihedrals! first let's delete all dihedral info for landlocked atoms + if (force->dihedral) { + int *num_dihedral = atom->num_dihedral; + int **dihedral_type = atom->dihedral_type; + tagint **dihedral_atom1 = atom->dihedral_atom1; + tagint **dihedral_atom2 = atom->dihedral_atom2; + tagint **dihedral_atom3 = atom->dihedral_atom3; + tagint **dihedral_atom4 = atom->dihedral_atom4; + + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + twomol = atom->molecules[reacted_mol[rxnID]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + delta_dihed -= num_dihedral[atom->map(update_mega_glove[jj+1][i])]; + num_dihedral[atom->map(update_mega_glove[jj+1][i])] = 0; + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = num_dihedral[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { + for (int n = 0; n < twomol->natoms; n++) { + int nn = equivalences[n][1][rxnID]-1; + if (n!=j && landlocked_atoms[n][rxnID] == 1 && + (dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { + for (int m = p; m < num_dihedral[atom->map(update_mega_glove[jj+1][i])]-1; m++) { + dihedral_type[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_type[atom->map(update_mega_glove[jj+1][i])][m+1]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][m+1]; + } + num_dihedral[atom->map(update_mega_glove[jj+1][i])]--; + delta_dihed--; + break; + } + } + } + } + } + } + // now let's add new dihedral info + if (twomol->dihedralflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; + delta_dihed += twomol->num_dihedral[j]; + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + } + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; + dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; + if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_dihed++; + } + } + } + } + } + } + } + } + + // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms + if (force->improper) { + int *num_improper = atom->num_improper; + int **improper_type = atom->improper_type; + tagint **improper_atom1 = atom->improper_atom1; + tagint **improper_atom2 = atom->improper_atom2; + tagint **improper_atom3 = atom->improper_atom3; + tagint **improper_atom4 = atom->improper_atom4; + + for (int i = 0; i < update_num_mega; i++) { + rxnID = update_mega_glove[0][i]; + twomol = atom->molecules[reacted_mol[rxnID]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + delta_imprp -= num_improper[atom->map(update_mega_glove[jj+1][i])]; + num_improper[atom->map(update_mega_glove[jj+1][i])] = 0; + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = num_improper[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { + for (int n = 0; n < twomol->natoms; n++) { + int nn = equivalences[n][1][rxnID]-1; + if (n!=j && landlocked_atoms[n][rxnID] == 1 && + (improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || + improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { + for (int m = p; m < num_improper[atom->map(update_mega_glove[jj+1][i])]-1; m++) { + improper_type[atom->map(update_mega_glove[jj+1][i])][m] = improper_type[atom->map(update_mega_glove[jj+1][i])][m+1]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom4[atom->map(update_mega_glove[jj+1][i])][m+1]; + } + num_improper[atom->map(update_mega_glove[jj+1][i])]--; + delta_imprp--; + break; + } + } + } + } + } + } + // now let's add new improper info + if (twomol->improperflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; + delta_imprp += twomol->num_improper[j]; + for (int p = 0; p < twomol->num_improper[j]; p++) { + improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + } + } + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_improper[j]; p++) { + if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; + improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + num_improper[atom->map(update_mega_glove[jj+1][i])]++; + if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_imprp++; + } + } + } + } + } + } + } + } + + } + + memory->destroy(update_mega_glove); + if (rescale_charges_anyflag) memory->destroy(sim_total_charges); + + // delete atoms. taken from fix_evaporate. but don't think it needs to be in pre_exchange + // loop in reverse order to avoid copying marked atoms + ndel = ndelone = 0; + for (int i = atom->nlocal-1; i >= 0; i--) { + if (mark[i] == 1) { + avec->copy(atom->nlocal-1,i,1); + atom->nlocal--; + ndelone++; + + if (atom->avec->bonds_allow) { + if (force->newton_bond) delta_bonds += atom->num_bond[i]; + else { + for (int j = 0; j < atom->num_bond[i]; j++) { + if (tag[i] < atom->bond_atom[i][j]) delta_bonds++; + } + } + } + if (atom->avec->angles_allow) { + if (force->newton_bond) delta_angle += atom->num_angle[i]; + else { + for (int j = 0; j < atom->num_angle[i]; j++) { + int m = atom->map(atom->angle_atom2[i][j]); + if (m >= 0 && m < nlocal) delta_angle++; + } + } + } + if (atom->avec->dihedrals_allow) { + if (force->newton_bond) delta_dihed += atom->num_dihedral[i]; + else { + for (int j = 0; j < atom->num_dihedral[i]; j++) { + int m = atom->map(atom->dihedral_atom2[i][j]); + if (m >= 0 && m < nlocal) delta_dihed++; + } + } + } + if (atom->avec->impropers_allow) { + if (force->newton_bond) delta_imprp += atom->num_improper[i]; + else { + for (int j = 0; j < atom->num_improper[i]; j++) { + int m = atom->map(atom->improper_atom2[i][j]); + if (m >= 0 && m < nlocal) delta_imprp++; + } + } + } + } + } + memory->destroy(mark); + memory->destroy(delflag); + + MPI_Allreduce(&ndelone,&ndel,1,MPI_INT,MPI_SUM,world); + + atom->natoms -= ndel; + // done deleting atoms + + // reset mol ids + if (reset_mol_ids_flag) reset_mol_ids->reset(); + + // something to think about: this could done much more concisely if + // all atom-level info (bond,angles, etc...) were kinda inherited from a common data struct --JG + + int Tdelta_bonds; + MPI_Allreduce(&delta_bonds,&Tdelta_bonds,1,MPI_INT,MPI_SUM,world); + atom->nbonds += Tdelta_bonds; + + int Tdelta_angle; + MPI_Allreduce(&delta_angle,&Tdelta_angle,1,MPI_INT,MPI_SUM,world); + atom->nangles += Tdelta_angle; + + int Tdelta_dihed; + MPI_Allreduce(&delta_dihed,&Tdelta_dihed,1,MPI_INT,MPI_SUM,world); + atom->ndihedrals += Tdelta_dihed; + + int Tdelta_imprp; + MPI_Allreduce(&delta_imprp,&Tdelta_imprp,1,MPI_INT,MPI_SUM,world); + atom->nimpropers += Tdelta_imprp; + + if (ndel && (atom->map_style != Atom::MAP_NONE)) { + atom->nghost = 0; + atom->map_init(); + atom->map_set(); + } +} + +/* ---------------------------------------------------------------------- +insert created atoms +------------------------------------------------------------------------- */ + +int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) +{ + // inserting atoms based off fix_deposit->pre_exchange + int flag; + imageint *imageflags; + double **coords,lamda[3],rotmat[3][3]; + double *newcoord; + double **v = atom->v; + double t,delx,dely,delz,rsq; + + memory->create(coords,twomol->natoms,3,"bond/react:coords"); + memory->create(imageflags,twomol->natoms,"bond/react:imageflags"); + + double *sublo,*subhi; + if (domain->triclinic == 0) { + sublo = domain->sublo; + subhi = domain->subhi; + } else { + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + // find current max atom and molecule IDs + tagint *tag = atom->tag; + double **x = atom->x; + tagint *molecule = atom->molecule; + int nlocal = atom->nlocal; + + tagint maxtag_all,maxmol_all; + tagint max = 0; + for (int i = 0; i < nlocal; i++) max = MAX(max,tag[i]); + MPI_Allreduce(&max,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world); + + max = 0; + for (int i = 0; i < nlocal; i++) max = MAX(max,molecule[i]); + MPI_Allreduce(&max,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world); + + int dimension = domain->dimension; + + // only proc that owns reacting atom (use ibonding), + // fits post-reaction template to reaction site, for creating atoms + int n2superpose = 0; + for (int j = 0; j < twomol->natoms; j++) { + if (modify_create_fragid[rxnID] >= 0) + if (!twomol->fragmentmask[modify_create_fragid[rxnID]][j]) continue; + if (!create_atoms[j][rxnID] && !delete_atoms[equivalences[j][1][rxnID]][rxnID]) + n2superpose++; + } + + int ifit = atom->map(my_update_mega_glove[ibonding[rxnID]+1][iupdate]); // use this local ID to find fitting proc + Superpose3D superposer(n2superpose); + int fitroot = 0; + if (ifit >= 0 && ifit < atom->nlocal) { + fitroot = comm->me; + + // get 'temperatere' averaged over site, used for created atoms' vels + // note: row_offset for my_update_mega_glove is unity, not 'cuff' + t = get_temperature(my_update_mega_glove,1,iupdate); + + double **xfrozen; // coordinates for the "frozen" target molecule + double **xmobile; // coordinates for the "mobile" molecule + memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); + memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); + tagint iatom; + tagint iref = -1; // choose first atom as reference + int fit_incr = 0; + for (int j = 0; j < twomol->natoms; j++) { + if (modify_create_fragid[rxnID] >= 0) + if (!twomol->fragmentmask[modify_create_fragid[rxnID]][j]) continue; + int ipre = equivalences[j][1][rxnID]-1; // equiv pre-reaction template index + if (!create_atoms[j][rxnID] && !delete_atoms[ipre][rxnID]) { + if (atom->map(my_update_mega_glove[ipre+1][iupdate]) < 0) { + error->warning(FLERR," eligible atoms skipped for created-atoms fit on rank {}\n", + comm->me); + continue; + } + iatom = atom->map(my_update_mega_glove[ipre+1][iupdate]); + if (iref == -1) iref = iatom; + iatom = domain->closest_image(iref,iatom); + for (int k = 0; k < 3; k++) { + xfrozen[fit_incr][k] = x[iatom][k]; + xmobile[fit_incr][k] = twomol->x[j][k]; + } + fit_incr++; + } + } + superposer.Superpose(xfrozen, xmobile); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + rotmat[i][j] = superposer.R[i][j]; + memory->destroy(xfrozen); + memory->destroy(xmobile); + } + MPI_Allreduce(MPI_IN_PLACE,&fitroot,1,MPI_INT,MPI_SUM,world); + MPI_Bcast(&t,1,MPI_DOUBLE,fitroot,world); + + // get coordinates and image flags + for (int m = 0; m < twomol->natoms; m++) { + if (create_atoms[m][rxnID] == 1) { + // apply optimal rotation/translation for created atom coords + // also map coords back into simulation box + if (fitroot == comm->me) { + MathExtra::matvec(rotmat,twomol->x[m],coords[m]); + for (int i = 0; i < 3; i++) coords[m][i] += superposer.T[i]; + imageflags[m] = atom->image[ifit]; + domain->remap(coords[m],imageflags[m]); + } + MPI_Bcast(&imageflags[m],1,MPI_LMP_IMAGEINT,fitroot,world); + MPI_Bcast(coords[m],3,MPI_DOUBLE,fitroot,world); + } + } + + // check distance between any existing atom and inserted atom + // if less than near, abort + if (overlapsq[rxnID] > 0) { + int abortflag = 0; + for (int m = 0; m < twomol->natoms; m++) { + if (create_atoms[m][rxnID] == 1) { + for (int i = 0; i < nlocal; i++) { + delx = coords[m][0] - x[i][0]; + dely = coords[m][1] - x[i][1]; + delz = coords[m][2] - x[i][2]; + domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < overlapsq[rxnID]) { + abortflag = 1; + break; + } + } + if (abortflag) break; + } + } + MPI_Allreduce(MPI_IN_PLACE,&abortflag,1,MPI_INT,MPI_MAX,world); + if (abortflag) { + memory->destroy(coords); + memory->destroy(imageflags); + return 0; + } + } + + // clear ghost count and any ghost bonus data internal to AtomVec + // same logic as beginning of Comm::exchange() + // do it now b/c inserting atoms will overwrite ghost atoms + atom->nghost = 0; + atom->avec->clear_bonus(); + + // check if new atoms are in my sub-box or above it if I am highest proc + // if so, add atom to my list via create_atom() + // initialize additional info about the atoms + // set group mask to "all" plus fix group + int preID; // new equivalences index + int add_count = 0; + for (int m = 0; m < twomol->natoms; m++) { + if (create_atoms[m][rxnID] == 1) { + // increase atom count + add_count++; + preID = onemol->natoms+add_count; + + if (domain->triclinic) { + domain->x2lamda(coords[m],lamda); + newcoord = lamda; + } else newcoord = coords[m]; + + flag = 0; + if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] && + newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1; + else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) { + if (comm->layout != Comm::LAYOUT_TILED) { + if (comm->myloc[2] == comm->procgrid[2]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } else { + if (comm->mysplit[2][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } + } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) { + if (comm->layout != Comm::LAYOUT_TILED) { + if (comm->myloc[1] == comm->procgrid[1]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } else { + if (comm->mysplit[1][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } + } + + int root = 0; + if (flag) { + root = comm->me; + + atom->avec->create_atom(twomol->type[m],coords[m]); + int n = atom->nlocal - 1; + atom->tag[n] = maxtag_all + add_count; + + // locally update mega_glove + my_update_mega_glove[preID][iupdate] = atom->tag[n]; + + if (atom->molecule_flag) { + if (twomol->moleculeflag) { + atom->molecule[n] = maxmol_all + twomol->molecule[m]; + } else { + atom->molecule[n] = maxmol_all + 1; + } + } + + atom->mask[n] = 1 | groupbit; + atom->image[n] = imageflags[m]; + + // guess a somewhat reasonable initial velocity based on reaction site + // further control is possible using bond_react_MASTER_group + // compute |velocity| corresponding to a given temperature t, using specific atom's mass + double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]]; + double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass); + v[n][0] = random[rxnID]->uniform(); + v[n][1] = random[rxnID]->uniform(); + v[n][2] = random[rxnID]->uniform(); + double vnorm = sqrt(v[n][0]*v[n][0] + v[n][1]*v[n][1] + v[n][2]*v[n][2]); + v[n][0] = v[n][0]/vnorm*vtnorm; + v[n][1] = v[n][1]/vnorm*vtnorm; + v[n][2] = v[n][2]/vnorm*vtnorm; + modify->create_attribute(n); + } + // globally update mega_glove and equivalences + MPI_Allreduce(MPI_IN_PLACE,&root,1,MPI_INT,MPI_SUM,world); + MPI_Bcast(&my_update_mega_glove[preID][iupdate],1,MPI_LMP_TAGINT,root,world); + equivalences[m][0][rxnID] = m+1; + equivalences[m][1][rxnID] = preID; + reverse_equiv[preID-1][0][rxnID] = preID; + reverse_equiv[preID-1][1][rxnID] = m+1; + } + } + + // reset global natoms here + // reset atom map elsewhere, after all calls to 'insert_atoms' + atom->natoms += add_count; + if (atom->natoms < 0) + error->all(FLERR,"Too many total atoms"); + maxtag_all += add_count; + if (maxtag_all >= MAXTAGINT) + error->all(FLERR,"New atom IDs exceed maximum allowed ID"); + // atom creation successful + memory->destroy(coords); + memory->destroy(imageflags); + return 1; +} + +/* ---------------------------------------------------------------------- +add equal-style variable to keyword argument list +------------------------------------------------------------------------- */ + +void FixBondReact::read_variable_keyword(const char *myarg, int keyword, int myrxn) +{ + var_id[keyword][myrxn] = input->variable->find(myarg); + if (var_id[keyword][myrxn] < 0) + error->all(FLERR,"Fix bond/react: Variable name {} does not exist",myarg); + if (!input->variable->equalstyle(var_id[keyword][myrxn])) + error->all(FLERR,"Fix bond/react: Variable {} is not equal-style",myarg); + var_flag[keyword][myrxn] = 1; +} + +/* ---------------------------------------------------------------------- +read map file +------------------------------------------------------------------------- */ + +void FixBondReact::read_map_file(int myrxn) +{ + int rv; + char line[MAXLINE] = {'\0'}; + char keyword[MAXLINE] = {'\0'}; + char *eof,*ptr; + + // skip 1st line of file + eof = fgets(line,MAXLINE,fp); + if (eof == nullptr) error->one(FLERR,"Fix bond/react: Unexpected end of superimpose file"); + + // read header lines + // skip blank lines or lines that start with "#" + // stop when read an unrecognized line + + ncreate = 0; + while (true) { + + readline(line); + + // trim anything from '#' onward + // if line is blank, continue + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + if (strspn(line," \t\n\r") == strlen(line)) continue; + + if (strstr(line,"edgeIDs")) sscanf(line,"%d",&nedge); + else if (strstr(line,"equivalences")) { + rv = sscanf(line,"%d",&nequivalent); + if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); + if (nequivalent != onemol->natoms) + error->one(FLERR,"Fix bond/react: Number of equivalences in map file must " + "equal number of atoms in reaction templates"); + } + else if (strstr(line,"deleteIDs")) { + rv = sscanf(line,"%d",&ndelete); + if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); + } else if (strstr(line,"createIDs")) { + rv = sscanf(line,"%d",&ncreate); + if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); + } else if (strstr(line,"chiralIDs")) { + rv = sscanf(line,"%d",&nchiral); + if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); + } else if (strstr(line,"constraints")) { + rv = sscanf(line,"%d",&nconstraints[myrxn]); + if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); + if (maxnconstraints < nconstraints[myrxn]) maxnconstraints = nconstraints[myrxn]; + constraints.resize(maxnconstraints, std::vector(nreacts)); + } else break; + } + + // grab keyword and skip next line + + parse_keyword(0,line,keyword); + readline(line); + + // loop over sections of superimpose file + + int equivflag = 0, bondflag = 0; + while (strlen(keyword)) { + if (strcmp(keyword,"InitiatorIDs") == 0 || strcmp(keyword,"BondingIDs") == 0) { + if (strcmp(keyword,"BondingIDs") == 0) + if (comm->me == 0) error->warning(FLERR,"Fix bond/react: The BondingIDs section title has been deprecated. Please use InitiatorIDs instead."); + bondflag = 1; + readline(line); + rv = sscanf(line,"%d",&ibonding[myrxn]); + if (rv != 1) error->one(FLERR, "InitiatorIDs section is incorrectly formatted"); + if (ibonding[myrxn] > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + readline(line); + rv = sscanf(line,"%d",&jbonding[myrxn]); + if (rv != 1) error->one(FLERR, "InitiatorIDs section is incorrectly formatted"); + if (jbonding[myrxn] > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + } else if (strcmp(keyword,"EdgeIDs") == 0) { + EdgeIDs(line, myrxn); + } else if (strcmp(keyword,"Equivalences") == 0) { + equivflag = 1; + Equivalences(line, myrxn); + } else if (strcmp(keyword,"DeleteIDs") == 0) { + DeleteAtoms(line, myrxn); + } else if (strcmp(keyword,"CreateIDs") == 0) { + CreateAtoms(line, myrxn); + } else if (strcmp(keyword,"ChiralIDs") == 0) { + ChiralCenters(line, myrxn); + } else if (strcmp(keyword,"Constraints") == 0) { + ReadConstraints(line, myrxn); + } else error->one(FLERR,"Fix bond/react: Unknown section in map file"); + + parse_keyword(1,line,keyword); + + } + + // error check + if (bondflag == 0 || equivflag == 0) + error->all(FLERR,"Fix bond/react: Map file missing InitiatorIDs or Equivalences section\n"); +} + +void FixBondReact::EdgeIDs(char *line, int myrxn) +{ + // puts a 1 at edge(edgeID) + + int tmp,rv; + for (int i = 0; i < nedge; i++) { + readline(line); + rv = sscanf(line,"%d",&tmp); + if (rv != 1) error->one(FLERR, "EdgeIDs section is incorrectly formatted"); + if (tmp > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + edge[tmp-1][myrxn] = 1; + } +} + +void FixBondReact::Equivalences(char *line, int myrxn) +{ + int tmp1,tmp2,rv; + for (int i = 0; i < nequivalent; i++) { + readline(line); + rv = sscanf(line,"%d %d",&tmp1,&tmp2); + if (rv != 2) error->one(FLERR, "Equivalences section is incorrectly formatted"); + if (tmp1 > onemol->natoms || tmp2 > twomol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + //equivalences is-> clmn 1: post-reacted, clmn 2: pre-reacted + equivalences[tmp2-1][0][myrxn] = tmp2; + equivalences[tmp2-1][1][myrxn] = tmp1; + //reverse_equiv is-> clmn 1: pre-reacted, clmn 2: post-reacted + reverse_equiv[tmp1-1][0][myrxn] = tmp1; + reverse_equiv[tmp1-1][1][myrxn] = tmp2; + } +} + +void FixBondReact::DeleteAtoms(char *line, int myrxn) +{ + int tmp,rv; + for (int i = 0; i < ndelete; i++) { + readline(line); + rv = sscanf(line,"%d",&tmp); + if (rv != 1) error->one(FLERR, "DeleteIDs section is incorrectly formatted"); + if (tmp > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + delete_atoms[tmp-1][myrxn] = 1; + } +} + +void FixBondReact::CreateAtoms(char *line, int myrxn) +{ + create_atoms_flag[myrxn] = 1; + int tmp,rv; + for (int i = 0; i < ncreate; i++) { + readline(line); + rv = sscanf(line,"%d",&tmp); + if (rv != 1) error->one(FLERR, "CreateIDs section is incorrectly formatted"); + create_atoms[tmp-1][myrxn] = 1; + } + if (twomol->xflag == 0) + error->one(FLERR,"Fix bond/react: 'Coords' section required in post-reaction template when creating new atoms"); +} + +void FixBondReact::CustomCharges(int ifragment, int myrxn) +{ + for (int i = 0; i < onemol->natoms; i++) + if (onemol->fragmentmask[ifragment][i]) + custom_charges[i][myrxn] = 1; + else + custom_charges[i][myrxn] = 0; +} + +void FixBondReact::ChiralCenters(char *line, int myrxn) +{ + int tmp,rv; + for (int i = 0; i < nchiral; i++) { + readline(line); + rv = sscanf(line,"%d",&tmp); + if (rv != 1) error->one(FLERR, "ChiralIDs section is incorrectly formatted"); + if (tmp > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); + chiral_atoms[tmp-1][0][myrxn] = 1; + if (onemol->xflag == 0) + error->one(FLERR,"Fix bond/react: Molecule template 'Coords' section required for chiralIDs keyword"); + if ((int) onemol_nxspecial[tmp-1][0] != 4) + error->one(FLERR,"Fix bond/react: Chiral atoms must have exactly four first neighbors"); + for (int j = 0; j < 4; j++) { + for (int k = j+1; k < 4; k++) { + if (onemol->type[onemol_xspecial[tmp-1][j]-1] == + onemol->type[onemol_xspecial[tmp-1][k]-1]) + error->one(FLERR,"Fix bond/react: First neighbors of chiral atoms must be of mutually different types"); + } + } + // record order of atom types, and coords + double my4coords[12]; + for (int j = 0; j < 4; j++) { + chiral_atoms[tmp-1][j+2][myrxn] = onemol->type[onemol_xspecial[tmp-1][j]-1]; + for (int k = 0; k < 3; k++) { + my4coords[3*j+k] = onemol->x[onemol_xspecial[tmp-1][j]-1][k]; + } + } + // get orientation + chiral_atoms[tmp-1][1][myrxn] = get_chirality(my4coords); + } +} + +void FixBondReact::ReadConstraints(char *line, int myrxn) +{ + int rv; + double tmp[MAXCONARGS]; + char **strargs,*ptr,*lptr; + memory->create(strargs,MAXCONARGS,MAXLINE,"bond/react:strargs"); + auto constraint_type = new char[MAXLINE]; + strcpy(constraintstr[myrxn],"("); // string for boolean constraint logic + for (int i = 0; i < nconstraints[myrxn]; i++) { + readline(line); + // find left parentheses, add to constraintstr, and update line + for (int j = 0; j < (int)strlen(line); j++) { + if (line[j] == '(') strcat(constraintstr[myrxn],"("); + if (isalpha(line[j])) { + line = line + j; + break; + } + } + // 'C' indicates where to sub in next constraint + strcat(constraintstr[myrxn],"C"); + // special consideration for 'custom' constraint + // find final double quote, or skip two words + lptr = line; + if ((ptr = strrchr(lptr,'\"'))) lptr = ptr+1; + else { + while (lptr[0] != ' ') lptr++; // skip first 'word' + while (lptr[0] == ' ' || lptr[0] == '\t') lptr++; // skip blanks + while (lptr[0] != ' ') lptr++; // skip second 'word' + } + // find right parentheses + for (int j = 0; j < (int)strlen(lptr); j++) + if (lptr[j] == ')') strcat(constraintstr[myrxn],")"); + // find logic symbols, and trim line via ptr + if ((ptr = strstr(lptr,"&&"))) { + strcat(constraintstr[myrxn],"&&"); + *ptr = '\0'; + } else if ((ptr = strstr(lptr,"||"))) { + strcat(constraintstr[myrxn],"||"); + *ptr = '\0'; + } else if (i+1 < nconstraints[myrxn]) { + strcat(constraintstr[myrxn],"&&"); + } + if ((ptr = strchr(lptr,')'))) + *ptr = '\0'; + rv = sscanf(line,"%s",constraint_type); + if (rv != 1) error->one(FLERR, "Constraints section is incorrectly formatted"); + if (strcmp(constraint_type,"distance") == 0) { + constraints[i][myrxn].type = DISTANCE; + rv = sscanf(line,"%*s %s %s %lg %lg",strargs[0],strargs[1],&tmp[0],&tmp[1]); + if (rv != 4) error->one(FLERR, "Distance constraint is incorrectly formatted"); + readID(strargs[0], i, myrxn, 0); + readID(strargs[1], i, myrxn, 1); + // cutoffs + constraints[i][myrxn].par[0] = tmp[0]*tmp[0]; // using square of distance + constraints[i][myrxn].par[1] = tmp[1]*tmp[1]; + } else if (strcmp(constraint_type,"angle") == 0) { + constraints[i][myrxn].type = ANGLE; + rv = sscanf(line,"%*s %s %s %s %lg %lg",strargs[0],strargs[1],strargs[2],&tmp[0],&tmp[1]); + if (rv != 5) error->one(FLERR, "Angle constraint is incorrectly formatted"); + readID(strargs[0], i, myrxn, 0); + readID(strargs[1], i, myrxn, 1); + readID(strargs[2], i, myrxn, 2); + constraints[i][myrxn].par[0] = tmp[0]/180.0 * MY_PI; + constraints[i][myrxn].par[1] = tmp[1]/180.0 * MY_PI; + } else if (strcmp(constraint_type,"dihedral") == 0) { + constraints[i][myrxn].type = DIHEDRAL; + tmp[2] = 181.0; // impossible range + tmp[3] = 182.0; + rv = sscanf(line,"%*s %s %s %s %s %lg %lg %lg %lg",strargs[0],strargs[1], + strargs[2],strargs[3],&tmp[0],&tmp[1],&tmp[2],&tmp[3]); + if (rv != 6 && rv != 8) error->one(FLERR, "Dihedral constraint is incorrectly formatted"); + readID(strargs[0], i, myrxn, 0); + readID(strargs[1], i, myrxn, 1); + readID(strargs[2], i, myrxn, 2); + readID(strargs[3], i, myrxn, 3); + constraints[i][myrxn].par[0] = tmp[0]/180.0 * MY_PI; + constraints[i][myrxn].par[1] = tmp[1]/180.0 * MY_PI; + constraints[i][myrxn].par[2] = tmp[2]/180.0 * MY_PI; + constraints[i][myrxn].par[3] = tmp[3]/180.0 * MY_PI; + } else if (strcmp(constraint_type,"arrhenius") == 0) { + constraints[i][myrxn].type = ARRHENIUS; + constraints[i][myrxn].par[0] = narrhenius++; + rv = sscanf(line,"%*s %lg %lg %lg %lg",&tmp[0],&tmp[1],&tmp[2],&tmp[3]); + if (rv != 4) error->one(FLERR, "Arrhenius constraint is incorrectly formatted"); + constraints[i][myrxn].par[1] = tmp[0]; + constraints[i][myrxn].par[2] = tmp[1]; + constraints[i][myrxn].par[3] = tmp[2]; + constraints[i][myrxn].par[4] = tmp[3]; + } else if (strcmp(constraint_type,"rmsd") == 0) { + constraints[i][myrxn].type = RMSD; + strcpy(strargs[0],"0"); + rv = sscanf(line,"%*s %lg %s",&tmp[0],strargs[0]); + if (rv != 1 && rv != 2) error->one(FLERR, "RMSD constraint is incorrectly formatted"); + constraints[i][myrxn].par[0] = tmp[0]; // RMSDmax + constraints[i][myrxn].id[0] = -1; // optional molecule fragment + if (isalpha(strargs[0][0])) { + int ifragment = onemol->findfragment(strargs[0]); + if (ifragment < 0) error->one(FLERR,"Fix bond/react: Molecule fragment does not exist"); + else constraints[i][myrxn].id[0] = ifragment; + } + } else if (strcmp(constraint_type,"custom") == 0) { + constraints[i][myrxn].type = CUSTOM; + std::vector args = utils::split_words(line); + constraints[i][myrxn].str = args[1]; + } else error->one(FLERR,"Fix bond/react: Illegal constraint type in 'Constraints' section of map file"); + } + strcat(constraintstr[myrxn],")"); // close boolean constraint logic string + delete [] constraint_type; + memory->destroy(strargs); +} + +/* ---------------------------------------------------------------------- +if ID starts with character, assume it is a pre-reaction molecule fragment ID +otherwise, it is a pre-reaction atom ID +---------------------------------------------------------------------- */ + +void FixBondReact::readID(char *strarg, int iconstr, int myrxn, int i) +{ + if (isalpha(strarg[0])) { + constraints[iconstr][myrxn].idtype[i] = FRAG; // fragment vs. atom ID flag + int ifragment = onemol->findfragment(strarg); + if (ifragment < 0) + error->one(FLERR,"Fix bond/react: Molecule fragment {} does not exist", strarg); + constraints[iconstr][myrxn].id[i] = ifragment; + } else { + constraints[iconstr][myrxn].idtype[i] = ATOM; // fragment vs. atom ID flag + int iatom = utils::inumeric(FLERR, strarg, true, lmp); + if (iatom > onemol->natoms) + error->one(FLERR,"Fix bond/react: Invalid template atom ID {} in map file", strarg); + constraints[iconstr][myrxn].id[i] = iatom; + } +} + +void FixBondReact::open(char *file) +{ + fp = fopen(file,"r"); + if (fp == nullptr) error->one(FLERR, "Fix bond/react: Cannot open map file {}", file); +} + +void FixBondReact::readline(char *line) +{ + int n; + if (comm->me == 0) { + if (fgets(line,MAXLINE,fp) == nullptr) n = 0; + else n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + if (n == 0) error->all(FLERR,"Fix bond/react: Unexpected end of map file"); + MPI_Bcast(line,n,MPI_CHAR,0,world); +} + +void FixBondReact::parse_keyword(int flag, char *line, char *keyword) +{ + if (flag) { + + // read upto non-blank line plus 1 following line + // eof is set to 1 if any read hits end-of-file + + int eof = 0; + if (comm->me == 0) { + if (fgets(line,MAXLINE,fp) == nullptr) eof = 1; + while (eof == 0 && strspn(line," \t\n\r") == strlen(line)) { + if (fgets(line,MAXLINE,fp) == nullptr) eof = 1; + } + if (fgets(keyword,MAXLINE,fp) == nullptr) eof = 1; + } + + // if eof, set keyword empty and return + + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) { + keyword[0] = '\0'; + return; + } + + // bcast keyword line to all procs + + int n; + if (comm->me == 0) n = strlen(line) + 1; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + } + + // copy non-whitespace portion of line into keyword + + int start = strspn(line," \t\n\r"); + int stop = strlen(line) - 1; + while (line[stop] == ' ' || line[stop] == '\t' + || line[stop] == '\n' || line[stop] == '\r') stop--; + line[stop+1] = '\0'; + strcpy(keyword,&line[start]); +} + +/* ---------------------------------------------------------------------- */ + +double FixBondReact::compute_vector(int n) +{ + // now we print just the totals for each reaction instance + return (double) reaction_count_total[n]; + +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::post_integrate_respa(int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) post_integrate(); +} + +/* ---------------------------------------------------------------------- */ + +int FixBondReact::pack_forward_comm(int n, int *list, double *buf, + int /*pbc_flag*/, int * /*pbc*/) +{ + int i,j,k,m,ns; + + m = 0; + + if (commflag == 1) { + for (i = 0; i < n; i++) { + j = list[i]; + for (k = 0; k < ncustomvars; k++) + buf[m++] = vvec[j][k]; + } + return m; + } + + if (commflag == 2) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = ubuf(partner[j]).d; + } + return m; + } + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = ubuf(finalpartner[j]).d; + ns = nxspecial[j][0]; + buf[m++] = ubuf(ns).d; + for (k = 0; k < ns; k++) + buf[m++] = ubuf(xspecial[j][k]).d; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::unpack_forward_comm(int n, int first, double *buf) +{ + int i,j,k,m,ns,last; + + m = 0; + last = first + n; + + if (commflag == 1) { + for (i = first; i < last; i++) + for (k = 0; k < ncustomvars; k++) + vvec[i][k] = buf[m++]; + } else if (commflag == 2) { + for (i = first; i < last; i++) + partner[i] = (tagint) ubuf(buf[m++]).i; + } else { + m = 0; + last = first + n; + for (i = first; i < last; i++) { + finalpartner[i] = (tagint) ubuf(buf[m++]).i; + ns = (int) ubuf(buf[m++]).i; + nxspecial[i][0] = ns; + for (j = 0; j < ns; j++) + xspecial[i][j] = (tagint) ubuf(buf[m++]).i; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixBondReact::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + + for (i = first; i < last; i++) { + buf[m++] = ubuf(partner[i]).d; + if (closeneigh[rxnID] != 0) + buf[m++] = distsq[i][1]; + else + buf[m++] = distsq[i][0]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + + for (i = 0; i < n; i++) { + j = list[i]; + if (closeneigh[rxnID] != 0) { + if (buf[m+1] < distsq[j][1]) { + partner[j] = (tagint) ubuf(buf[m++]).i; + distsq[j][1] = buf[m++]; + } else m += 2; + } else { + if (buf[m+1] > distsq[j][0]) { + partner[j] = (tagint) ubuf(buf[m++]).i; + distsq[j][0] = buf[m++]; + } else m += 2; + } + } +} + +/* ---------------------------------------------------------------------- + write Set data to restart file +------------------------------------------------------------------------- */ + +void FixBondReact::write_restart(FILE *fp) +{ + int revision = 1; + set[0].nreacts = nreacts; + set[0].max_rate_limit_steps = max_rate_limit_steps; + + for (int i = 0; i < nreacts; i++) { + set[i].reaction_count_total = reaction_count_total[i]; + + strncpy(set[i].rxn_name,rxn_name[i],MAXNAME-1); + set[i].rxn_name[MAXNAME-1] = '\0'; + } + + int rbufcount = max_rate_limit_steps*nreacts; + int *rbuf; + if (rbufcount) { + memory->create(rbuf,rbufcount,"bond/react:rbuf"); + memcpy(rbuf,&store_rxn_count[0][0],sizeof(int)*rbufcount); + } + + if (comm->me == 0) { + int size = nreacts*sizeof(Set)+(rbufcount+1)*sizeof(int); + fwrite(&size,sizeof(int),1,fp); + fwrite(&revision,sizeof(int),1,fp); + fwrite(set,sizeof(Set),nreacts,fp); + if (rbufcount) fwrite(rbuf,sizeof(int),rbufcount,fp); + } + if (rbufcount) memory->destroy(rbuf); +} + +/* ---------------------------------------------------------------------- + use selected state info from restart file to restart the Fix + bond/react restart revisions numbers added after LAMMPS version 3 Nov 2022 +------------------------------------------------------------------------- */ + +void FixBondReact::restart(char *buf) +{ + int n,revision,r_nreacts,r_max_rate_limit_steps,ibufcount,n2cpy; + int **ibuf; + + n = 0; + if (lmp->restart_ver > utils::date2num("3 Nov 2022")) revision = buf[n++]; + else revision = 0; + + Set *set_restart = (Set *) &buf[n*sizeof(int)]; + r_nreacts = set_restart[0].nreacts; + + n2cpy = 0; + if (revision > 0) { + r_max_rate_limit_steps = set_restart[0].max_rate_limit_steps; + if (r_max_rate_limit_steps > 0) { + ibufcount = r_max_rate_limit_steps*r_nreacts; + memory->create(ibuf,r_max_rate_limit_steps,r_nreacts,"bond/react:ibuf"); + memcpy(&ibuf[0][0],&buf[sizeof(int)+r_nreacts*sizeof(Set)],sizeof(int)*ibufcount); + n2cpy = r_max_rate_limit_steps; + } + } + + if (max_rate_limit_steps < n2cpy) n2cpy = max_rate_limit_steps; + for (int i = 0; i < r_nreacts; i++) { + for (int j = 0; j < nreacts; j++) { + if (strcmp(set_restart[i].rxn_name,rxn_name[j]) == 0) { + reaction_count_total[j] = set_restart[i].reaction_count_total; + // read rate_limit restart information + for (int k = 0; k < n2cpy; k++) + store_rxn_count[k][j] = ibuf[k][i]; + } + } + } + if (revision > 0 && r_max_rate_limit_steps > 0) memory->destroy(ibuf); +} + +/* ---------------------------------------------------------------------- +memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixBondReact::memory_usage() +{ + int nmax = atom->nmax; + double bytes = (double)nmax * sizeof(int); + bytes = 2*nmax * sizeof(tagint); + bytes += (double)nmax * sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void FixBondReact::print_bb() +{ +#if 0 + //fix bond/create cargo code. eg nbonds needs to be added + +for (int i = 0; i < atom->nlocal; i++) { + // printf("TAG " TAGINT_FORMAT ": %d nbonds: ",atom->tag[i],atom->num_bond[i]); + for (int j = 0; j < atom->num_bond[i]; j++) { + // printf(" " TAGINT_FORMAT,atom->bond_atom[i][j]); + } + // printf("\n"); + // printf("TAG " TAGINT_FORMAT ": %d nangles: ",atom->tag[i],atom->num_angle[i]); + for (int j = 0; j < atom->num_angle[i]; j++) { + // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT ",", + atom->angle_atom1[i][j], atom->angle_atom2[i][j], + atom->angle_atom3[i][j]); + } + // printf("\n"); + // printf("TAG " TAGINT_FORMAT ": %d ndihedrals: ",atom->tag[i],atom->num_dihedral[i]); + for (int j = 0; j < atom->num_dihedral[i]; j++) { + // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT ",", atom->dihedral_atom1[i][j], + atom->dihedral_atom2[i][j],atom->dihedral_atom3[i][j], + atom->dihedral_atom4[i][j]); + } + // printf("\n"); + // printf("TAG " TAGINT_FORMAT ": %d nimpropers: ",atom->tag[i],atom->num_improper[i]); + for (int j = 0; j < atom->num_improper[i]; j++) { + // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT ",",atom->improper_atom1[i][j], + atom->improper_atom2[i][j],atom->improper_atom3[i][j], + atom->improper_atom4[i][j]); + } + // printf("\n"); + // printf("TAG " TAGINT_FORMAT ": %d %d %d nspecial: ",atom->tag[i], + atom->nspecial[i][0],atom->nspecial[i][1],atom->nspecial[i][2]); + for (int j = 0; j < atom->nspecial[i][2]; j++) { + printf(" " TAGINT_FORMAT,atom->special[i][j]); + } + // printf("\n"); +} +#endif +} diff --git a/src/KOKKOS/fix_bond_react_kokkos.h b/src/KOKKOS/fix_bond_react_kokkos.h new file mode 100644 index 0000000000..a21de74a69 --- /dev/null +++ b/src/KOKKOS/fix_bond_react_kokkos.h @@ -0,0 +1,238 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing Author: Jacob Gissinger (jgissing@stevens.edu) + KOKKOS version (2024/08): Mitch Murphy (alphataubio@gmail.com) +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(bond/react/kk,FixBondReactKokkos); +FixStyle(bond/react/kk/device,FixBondReactKokkos); +FixStyle(bond/react/kk/host,FixBondReactKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_BOND_REACT_KOKKOS_H +#define LMP_FIX_BOND_REACT_KOKKOS_H + +#include "fix_bond_react.h" +#include "kokkos_type.h" + +#include +#include + +namespace LAMMPS_NS { + +template +class FixBondReactKokkos : public FixBondReact { + public: + + FixBondReactKokkos(class LAMMPS *, int, char **); + ~FixBondReactKokkos() override; + //int setmask() override; + void post_constructor() override; + void init() override; + void init_list(int, class NeighList *) override; + void post_integrate() override; + //void post_integrate_respa(int, int) override; + + int pack_forward_comm(int, int *, double *, int, int *) override; + void unpack_forward_comm(int, int, double *) override; + int pack_reverse_comm(int, int, double *) override; + void unpack_reverse_comm(int, int *, double *) override; + double compute_vector(int) override; + //double memory_usage() override; + + private: + + int *nevery; + FILE *fp; + int *iatomtype, *jatomtype; + int *seed; + double **cutsq, *fraction; + int *max_rxn, *nlocalskips, *nghostlyskips; + int **rate_limit; + int **store_rxn_count; + int *stabilize_steps_flag; + int *custom_charges_fragid; + int *rescale_charges_flag; // if nonzero, indicates number of atoms whose charges are updated + double *mol_total_charge; // sum of charges of post-reaction atoms whose charges are updated + int *create_atoms_flag; + int *modify_create_fragid; + double *overlapsq; + int *molecule_keyword; + int *nconstraints; + char **constraintstr; + std::vector rxnfunclist; // lists current special rxn function + std::vector peratomflag; // 1 if special rxn function uses per-atom variable (vs. per-bond) + int **var_flag, **var_id; // for keyword values with variable inputs + int *groupbits; + + char **rxn_name; // name of reaction + int *reaction_count; + int *reaction_count_total; + tagint *partner, *finalpartner; + double **distsq; + int *nattempt; + tagint ***attempt; + + class Molecule *onemol; // pre-reacted molecule template + class Molecule *twomol; // post-reacted molecule template + Fix *fix1; // nve/limit used to relax reaction sites + Fix *fix2; // properties/atom used to indicate 1) relaxing atoms + // 2) to which 'react' atom belongs + Fix *fix3; // property/atom used for system-wide thermostat + class RanMars **random; // random number for 'prob' keyword + class RanMars **rrhandom; // random number for Arrhenius constraint + class NeighList *list; + class ResetAtomsMol *reset_mol_ids; // class for resetting mol IDs + + int *reacted_mol, *unreacted_mol; + int *limit_duration; // indicates how long to relax + char *nve_limit_xmax; // indicates max distance allowed to move when relaxing + char *id_fix1; // id of internally created fix nve/limit + char *id_fix2; // id of internally created fix per-atom properties + char *id_fix3; // id of internally created 'stabilization group' per-atom property fix + char *statted_id; // name of 'stabilization group' per-atom property + char *master_group; // group containing relaxing atoms from all fix rxns + char *exclude_group; // group for system-wide thermostat + + void superimpose_algorithm(); // main function of the superimpose algorithm + + int *ibonding, *jbonding; + int *closeneigh; // indicates if bonding atoms of a rxn are 1-2, 1-3, or 1-4 neighbors + int nedge, nequivalent, ndelete, ncreate, nchiral; // # edge, equivalent atoms in mapping file + int attempted_rxn; // there was an attempt! + int *local_rxn_count; + int *ghostly_rxn_count; + int avail_guesses; // num of restore points available + int *guess_branch; // used when there is more than two choices when guessing + int **restore_pt; // contains info about restore points + tagint **restore; // contains info about restore points + int *pioneer_count; // counts pioneers + + int **edge; // atoms in molecule templates with incorrect valences + int ***equivalences; // relation between pre- and post-reacted templates + int ***reverse_equiv; // re-ordered equivalences + int **landlocked_atoms; // all atoms at least three bonds away from edge atoms + int **custom_charges; // atoms whose charge should be updated + int **delete_atoms; // atoms in pre-reacted templates to delete + int **create_atoms; // atoms in post-reacted templates to create + int ***chiral_atoms; // pre-react chiral atoms. 1) flag 2) orientation 3-4) ordered atom types + + int **nxspecial, **onemol_nxspecial, **twomol_nxspecial; // full number of 1-4 neighbors + tagint **xspecial, **onemol_xspecial, **twomol_xspecial; // full 1-4 neighbor list + + int pion, neigh, trace; // important indices for various loops. required for restore points + int lcl_inst; // reaction instance + tagint **glove; // 1st colmn: pre-reacted template, 2nd colmn: global IDs + // for all mega_gloves: first row is the ID of bond/react + // 'cuff' leaves room for additional values carried around + int cuff; // default = 1, w/ rescale_charges_flag = 2 + double **my_mega_glove; // local + ghostly reaction instances + double **local_mega_glove; // consolidation of local reaction instances + double **ghostly_mega_glove; // consolidation of nonlocal reaction instances + double **global_mega_glove; // consolidation (inter-processor) of gloves + // containing nonlocal atoms + + int *localsendlist; // indicates ghosts of other procs + int my_num_mega; // local + ghostly reaction instances (on this proc) + int local_num_mega; // num of local reaction instances + int ghostly_num_mega; // num of ghostly reaction instances + int global_megasize; // num of reaction instances in global_mega_glove + int *pioneers; // during Superimpose Algorithm, atoms which have been assigned, + // but whose first neighbors haven't + int glove_counter; // used to determine when to terminate Superimpose Algorithm + + void read_variable_keyword(const char *, int, int); + void read_map_file(int); + void EdgeIDs(char *, int); + void Equivalences(char *, int); + void DeleteAtoms(char *, int); + void CreateAtoms(char *, int); + void CustomCharges(int, int); + void ChiralCenters(char *, int); + void ReadConstraints(char *, int); + void readID(char *, int, int, int); + + void make_a_guess(); + void neighbor_loop(); + void check_a_neighbor(); + void crosscheck_the_neighbor(); + void inner_crosscheck_loop(); + int ring_check(); + int check_constraints(); + void get_IDcoords(int, int, double *); + double get_temperature(tagint **, int, int); + double get_totalcharge(); + void customvarnames(); // get per-atom variables names used by custom constraint + void get_customvars(); // evaluate local values for variables names used by custom constraint + double custom_constraint(const std::string &); // evaulate expression for custom constraint + double rxnfunction(const std::string &, const std::string &, + const std::string &); // eval rxn_sum and rxn_ave + void get_atoms2bond(int); + int get_chirality(double[12]); // get handedness given an ordered set of coordinates + + void open(char *); + void readline(char *); + void parse_keyword(int, char *, char *); + + void far_partner(); + void close_partner(); + void get_molxspecials(); + void find_landlocked_atoms(int); + void glove_ghostcheck(); + void ghost_glovecast(); + void update_everything(); + int insert_atoms(tagint **, int); + void unlimit_bond(); // removes atoms from stabilization, and other post-reaction every-step operations + void dedup_mega_gloves(int); //dedup global mega_glove + void write_restart(FILE *) override; + void restart(char *buf) override; + + // store restart data + struct Set { + int nreacts; + char rxn_name[MAXNAME]; + int reaction_count_total; + int max_rate_limit_steps; + }; + Set *set; + + struct Constraint { + int type; + int id[MAXCONIDS]; + int idtype[MAXCONIDS]; + double par[MAXCONPAR]; + std::string str; + }; + int ncustomvars; + std::vector customvarstrs; + int nvvec; + double **vvec; // per-atom vector to store custom constraint atom-style variable values + class Compute *cperbond; // pointer to 'compute bond/local' used by custom constraint ('rxnbond' function) + std::map, int> atoms2bond; // maps atom pair to index of local bond array + std::vector> constraints; + + // DEBUG + + void print_bb(); +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 960db293d9..e47e1e1307 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -35,6 +35,16 @@ using namespace LAMMPS_NS; using namespace MathConst; +static constexpr int LISTDELTA = 10000; +static constexpr double LB_FACTOR = 1.5; + +static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom +static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 +static constexpr double CMAPXMIN = -360.0; +static constexpr double CMAPXMIN2 = -180.0; +static constexpr double CMAPDX = 15.0; // 360/CMAPDIM + + /* ---------------------------------------------------------------------- */ template @@ -246,6 +256,9 @@ void FixCMAPKokkos::post_force(int vflag) Kokkos::parallel_for(ncrosstermlist, *this); copymode = 0; atomKK->modified(execution_space,F_MASK); + + std::cerr << fmt::format("*** post_force ncrosstermlist {} vflag {} ecmap {}\n",ncrosstermlist,vflag,ecmap); + } @@ -423,14 +436,19 @@ void FixCMAPKokkos::operator()(const int n) const // sum up cmap energy contributions -/* FIXME: needed for compute_scalar() + double ecmapKK = 0.0; + +// FIXME: needed for compute_scalar() double engfraction = 0.2 * E; - if (i1 < nlocal) ecmap += engfraction; - if (i2 < nlocal) ecmap += engfraction; - if (i3 < nlocal) ecmap += engfraction; - if (i4 < nlocal) ecmap += engfraction; - if (i5 < nlocal) ecmap += engfraction; -*/ + if (i1 < nlocal) ecmapKK += engfraction; + if (i2 < nlocal) ecmapKK += engfraction; + if (i3 < nlocal) ecmapKK += engfraction; + if (i4 < nlocal) ecmapKK += engfraction; + if (i5 < nlocal) ecmapKK += engfraction; + + //std::cerr << fmt::format("*** i {} {} {} {} {} nlocal {} E {} ecmapKK {}\n", + //i1,i2,i3,i4,i5,nlocal,E,ecmapKK); + // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; @@ -705,7 +723,8 @@ void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int double &E, double &dEdPhi, double &dEdPsi ) const { - // FUSE bc_coeff() and bc_interpol() inline functions for kokkos version + // FUSE bc_coeff() and bc_interpol() inline functions for + // KOKKOS version to avoid passing cij[][] array back and forth // calculate the bicubic interpolation coefficients c_ij diff --git a/src/KOKKOS/superpose3d_kokkos.h b/src/KOKKOS/superpose3d_kokkos.h new file mode 100644 index 0000000000..be960d5c07 --- /dev/null +++ b/src/KOKKOS/superpose3d_kokkos.h @@ -0,0 +1,439 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. (Some of the code in this file is also + available using a more premissive license. See below for details.) + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Andrew Jewett (Scripps Research) + Availability: https://github.com/jewettaij/superpose3d_cpp (MIT license) +------------------------------------------------------------------------- */ + +/// @file superpose3d.hpp +/// @brief Calculate the optimal rotation, translation and scale needed to +/// optimally fit two different point clouds containing n points. +/// @author Andrew Jewett +/// @license MIT + +#ifndef LMP_SUPERPOSE3D_H +#define LMP_SUPERPOSE3D_H + +#include "math_eigen_impl.h" //functions to calculate eigenvalues and eigenvectors + +// ----------------------------------------------------------- +// ------------------------ INTERFACE ------------------------ +// ----------------------------------------------------------- + +/// @brief Superpose3d is a class with only one important member function +/// Superpose(). It is useful for calculating the optimal +/// superposition (rotations, translations, and scale transformations) +/// between two point clouds of the same size. +template +class Superpose3D { + private: + size_t N; //number of points in the point clouds + Scalar *aWeights; //weights applied to points when computing RMSD + MathEigen::Jacobi eigen_calc; // calc eigenvectors + Scalar **aaXf_shifted; //preallocated space for fixed point cloud (Nx3 array) + Scalar **aaXm_shifted; //preallocated space for mobile point cloud (Nx3 array) + + public: + // The following data members store the rotation, translation and scale + // after optimal superposition + Scalar **R; //!< store optimal rotation here (this is a 3x3 array). + Scalar T[3]; //!< store optimal translation here + Scalar c; //!< store optimal scale (typically 1 unless requested by the user) + Scalar q[4]; //!< quaternion corresponding to the rotation stored in R. + // The first entry of q is cos(θ/2). The remaining 3 entries + // of q are the axis of rotation (with length sin(θ/2)). + // (Note: This is not the same as "p" from Diamond's 1988 paper.) + + Superpose3D(size_t N = 0); //!< N=number of points in both point clouds + + Superpose3D(size_t N, //!< N = number of points in both point clouds + ConstArray aWeights); //!< weight per point for computing RMSD + + ~Superpose3D(); + + /// @brief specify he number of points in both point clouds + void SetNumPoints(size_t N); + /// @brief return the number of points in both point clouds + size_t GetNumPoints() { return N; } + /// @brief specify the weight applied to each point when computing RMSD + void SetWeights(ConstArray aWeights); + + /// @brief Use rigid-body transformations (rotations, translations, and + /// optionally scale transformations) to superimpose two point clouds. + /// + /// @details + /// This function takes two lists of xyz coordinates (of the same length) and + /// attempts to superimpose them using rotations, translations, and + /// (optionally) scale transformations. These transformations are applied to + /// to the coordinates in the "aaXm_orig" array (the "mobile" point cloud) + /// in order to minimize the root-mean-squared-distance (RMSD) between the + /// corresponding points in each cloud, where RMSD is defined as: + /// + /// @verbatim + /// sqrt((Σ_n w[n]*Σ_i |X[n][i] - (Σ_j c*R[i][j]*x[n][j]+T[i])|^2)/(Σ_n w[n])) + /// @endverbatim + /// + /// In this formula, the "X_i" and "x_i" are coordinates of the ith fixed and + /// mobile point clouds (represented by "aaXf" and "aaXm" in the code below) + /// and "w_i" are optional weights (represented by "aWeights" in the code). + /// This function implements a more general variant of the method from: + /// @verbatim + /// R. Diamond, (1988) "A Note on the Rotational Superposition Problem", + /// Acta Cryst. A44, pp. 211-216 + /// @endverbatim + /// + /// @note: + /// This code has been augmented with a new feature. The version in the + /// original paper only considers rotation and translation and does not allow + /// coordinates of either cloud to be rescaled (multiplied by a scalar). + /// To enable the ability to rescale the coordinates, set allow_rescale=true. + /// (By default, this feature is disabled.) + /// + /// @returns + /// The RMSD between the 2 pointclouds after optimal rotation, translation + /// (and scaling if requested) was applied to the "mobile" point cloud. + /// After this function is called, the optimal rotation, translation, + /// and scale (if requested) will be stored in the "R", "T", and "c" + /// public data members. + Scalar Superpose(ConstArrayOfCoords aaXf, //!< coords for the "frozen" object + ConstArrayOfCoords aaXm, //!< coords for the "mobile" object + bool allow_rescale = false //!< rescale mobile object? (c≠1?) + ); + + // C++ boilerplate: copy and move constructor, swap, and assignment operator + Superpose3D(const Superpose3D &source); + Superpose3D(Superpose3D &&other); + void swap(Superpose3D &other); + Superpose3D & + operator=(Superpose3D source); + + private: + // memory management: + void Alloc(size_t N); + void Init(); + void Dealloc(); + +}; // class Superpose3D + +// -------------- IMPLEMENTATION -------------- + +template static inline Scalar SQR(Scalar x) +{ + return x * x; +} + +template +Scalar Superpose3D::Superpose( + ConstArrayOfCoords aaXf, // coords for the "frozen" object + ConstArrayOfCoords aaXm, // coords for the "mobile" object + bool allow_rescale) // rescale mobile object? (c!=1?) +{ + // Find the center of mass of each object: + Scalar aCenter_f[3] = {0.0, 0.0, 0.0}; + Scalar aCenter_m[3] = {0.0, 0.0, 0.0}; + Scalar sum_weights = 0.0; + for (size_t n = 0; n < N; n++) { + Scalar weight = aWeights[n]; + for (int d = 0; d < 3; d++) { + aCenter_f[d] += aaXf[n][d] * weight; + aCenter_m[d] += aaXm[n][d] * weight; + } + sum_weights += weight; + } + + //assert(sum_weights != 0.0); + + for (int d = 0; d < 3; d++) { + aCenter_f[d] /= sum_weights; + aCenter_m[d] /= sum_weights; + } + + //Subtract the centers-of-mass from the original coordinates for each object + for (size_t n = 0; n < N; n++) { + for (int d = 0; d < 3; d++) { + // shift the coordinates so that the new center of mass is at the origin + aaXf_shifted[n][d] = aaXf[n][d] - aCenter_f[d]; + aaXm_shifted[n][d] = aaXm[n][d] - aCenter_m[d]; + } + } + + // Calculate the "M" array from the Diamond paper (equation 16) + Scalar M[3][3]; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) M[i][j] = 0.0; + + for (size_t n = 0; n < N; n++) { + Scalar weight = aWeights[n]; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { M[i][j] += weight * aaXm_shifted[n][i] * aaXf_shifted[n][j]; } + } + } + + // Calculate Q (equation 17) + Scalar traceM = 0.0; + for (int i = 0; i < 3; i++) traceM += M[i][i]; + Scalar Q[3][3]; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + Q[i][j] = M[i][j] + M[j][i]; + if (i == j) Q[i][j] -= 2.0 * traceM; + } + } + + // Calculate V (equation 18) + Scalar V[3]; + V[0] = M[1][2] - M[2][1]; + V[1] = M[2][0] - M[0][2]; + V[2] = M[0][1] - M[1][0]; + + // Calculate "P" (equation 22) + // First we must allocate space for the P matrix. It's not safe to declare: + // Scalar P[4][4]; + // ...because most matrix solvers expect arrays in pointer-to-pointer format. + // (a different format). Below I create a fixed size matrix P in this format. + Scalar _PF[4 * 4]; // Contiguous 1D array for storing contents of the 2D P array + Scalar *P[4]; // This version of P has has ** (pointer-to-pointer) format. + for (int i = 0; i < 4; i++) // We must make sure that + P[i] = &(_PF[4 * i]); // P[i] points to the appropriate location in memory + + // Now fill the P array + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) P[i][j] = Q[i][j]; + P[0][3] = V[0]; + P[3][0] = V[0]; + P[1][3] = V[1]; + P[3][1] = V[1]; + P[2][3] = V[2]; + P[3][2] = V[2]; + P[3][3] = 0.0; + + // The vector "p" contains the optimal rotation (backwards quaternion format) + Scalar p[4] = {0.0, 0.0, 0.0, 1.0}; // default value + Scalar pPp = 0.0; // = p^T * P * p (zero by default) + Scalar rmsd = 0.0; // default value + + bool singular = N < 2; // (it doesn't make sense to rotate a single point) + + if (!singular) { + // Calculate the principal eigenvalue and eigenvector of matrix P. + // Store the principal eigenvector in "p" + // The vector "p" will contain the optimal rotation (in quaternion format) + + Scalar Evl[4]; // Store the eigenvalues of P here. + Scalar *Evc[4]; // Store the eigevectors here. This version has ** format. + Scalar _Evc[4 * 4]; // Contiguous 1D array for storing contents of "Evc" array + for (int i = 0; i < 4; i++) // We must make sure that + Evc[i] = &(_Evc[4 * i]); // Evc[i] points to the correct location in memory + + eigen_calc.Diagonalize(P, Evl, Evc); + + // Note: The eigenvalues are sorted in decreasing order by default. + pPp = Evl[0]; // = the maximum eigenvalue of P + for (int i = 0; i < 4; i++) + p[i] = Evc[0][i]; //copy eigenvector corresponding to this eigenvalue to p + } //if (! singular) + + // Now normalize p + Scalar pnorm = 0.0; + for (int i = 0; i < 4; i++) pnorm += p[i] * p[i]; + pnorm = sqrt(pnorm); + for (int i = 0; i < 4; i++) p[i] /= pnorm; + + // Finally, calculate the rotation matrix corresponding to "p" + // (convert a quaternion into a 3x3 rotation matrix) + + R[0][0] = (p[0] * p[0]) - (p[1] * p[1]) - (p[2] * p[2]) + (p[3] * p[3]); + R[1][1] = -(p[0] * p[0]) + (p[1] * p[1]) - (p[2] * p[2]) + (p[3] * p[3]); + R[2][2] = -(p[0] * p[0]) - (p[1] * p[1]) + (p[2] * p[2]) + (p[3] * p[3]); + R[0][1] = 2 * (p[0] * p[1] - p[2] * p[3]); + R[1][0] = 2 * (p[0] * p[1] + p[2] * p[3]); + R[1][2] = 2 * (p[1] * p[2] - p[0] * p[3]); + R[2][1] = 2 * (p[1] * p[2] + p[0] * p[3]); + R[0][2] = 2 * (p[0] * p[2] + p[1] * p[3]); + R[2][0] = 2 * (p[0] * p[2] - p[1] * p[3]); + + q[0] = p[3]; // Note: The "p" variable is not a quaternion in the + q[1] = p[0]; // conventional sense because its elements + q[2] = p[1]; // are in the wrong order. I correct for that here. + q[3] = p[2]; // "q" is the quaternion correspond to rotation R. + + // Optional: Decide the scale factor, c + c = 1.0; // by default, don't rescale the coordinates + + if ((allow_rescale) && (!singular)) { + Scalar Waxaixai = 0.0; + Scalar WaxaiXai = 0.0; + for (size_t a = 0; a < N; a++) { + Scalar weight = aWeights[a]; + for (int i = 0; i < 3; i++) { + Waxaixai += weight * aaXm_shifted[a][i] * aaXm_shifted[a][i]; + WaxaiXai += weight * aaXm_shifted[a][i] * aaXf_shifted[a][i]; + } + } + c = (WaxaiXai + pPp) / Waxaixai; + + } // if (allow_rescale) + + // Finally compute the RMSD between the two coordinate sets: + // First compute E0 from equation 24 of the paper + Scalar E0 = 0.0; + for (size_t n = 0; n < N; n++) { + Scalar weight = aWeights[n]; + for (int d = 0; d < 3; d++) + // (remember to include the scale factor "c" that we inserted) + E0 += weight * (SQR(aaXf_shifted[n][d] - c * aaXm_shifted[n][d])); + } + Scalar sum_sqr_dist = E0 - c * 2.0 * pPp; + if (sum_sqr_dist < 0.0) //(edge case due to rounding error) + sum_sqr_dist = 0.0; + + if (!singular) rmsd = sqrt(sum_sqr_dist / sum_weights); + + // Lastly, calculate the translational offset. + // If c!=1, this is slightly more complicated than it seems. Recall that: + //RMSD=sqrt((Sum_i w_i * |X_i - Sum_j(c*R_ij*x_j + T_i))|^2) / (Sum_j w_j)) + // =sqrt((Sum_i w_i * |X_i - x_i')|^2) / (Sum_j w_j)) + // where + // x_i' = Sum_j(c*R_ij*x_j) + T_i + // = Xcm_i + c*R_ij*(x_j - xcm_j) + // and Xcm and xcm = center_of_mass for the frozen and mobile point clouds + // + // Hence: + // T_i = Xcm_i - Sum_j c*R_ij*xcm_j + // In the code, Xcm_i is represented by "aCenter_f[i]" + // and xcm_j is represented by "aCenter_m[j]" + + for (int i = 0; i < 3; i++) { + T[i] = aCenter_f[i]; + for (int j = 0; j < 3; j++) { T[i] -= c * R[i][j] * aCenter_m[j]; } + } + + return rmsd; + +} //Superpose3D::Superpose(aaXf, aaXm, allow_rescale) + +template +void Superpose3D::SetNumPoints(size_t N) +{ + Dealloc(); + Alloc(N); +} + +template +void Superpose3D::SetWeights(ConstArray aWeights) +{ + for (size_t i = 0; i < N; i++) this->aWeights[i] = aWeights[i]; +} + +template +Superpose3D::Superpose3D(size_t N) : eigen_calc(4) +{ + Init(); + Alloc(N); +} + +template +Superpose3D::Superpose3D(size_t N, ConstArray aWeights) : + eigen_calc(4) +{ + Init(); + Alloc(N); + SetWeights(aWeights); +} + +template +Superpose3D::~Superpose3D() +{ + Dealloc(); +} + +template +void Superpose3D::Init() +{ + R = nullptr; + aWeights = nullptr; + aaXf_shifted = nullptr; + aaXm_shifted = nullptr; +} + +// memory management: + +template +void Superpose3D::Alloc(size_t N) +{ + this->N = N; + aWeights = new Scalar[N]; + for (size_t i = 0; i < N; i++) aWeights[i] = 1.0 / N; + MathEigen::Alloc2D(3, 3, &R); + MathEigen::Alloc2D(N, 3, &aaXf_shifted); + MathEigen::Alloc2D(N, 3, &aaXm_shifted); +} + +template +void Superpose3D::Dealloc() +{ + if (R) MathEigen::Dealloc2D(&R); + if (aWeights) delete[] aWeights; + if (aaXf_shifted) MathEigen::Dealloc2D(&aaXf_shifted); + if (aaXm_shifted) MathEigen::Dealloc2D(&aaXm_shifted); +} + +// memory management: copy and move constructor, swap, and assignment operator: + +template +Superpose3D::Superpose3D( + const Superpose3D &source) : + eigen_calc(4) +{ + Init(); + Alloc(source.N); + + //assert(N == source.N); + + for (int i = 0; i < N; i++) { + std::copy(source.aaXf_shifted[i], source.aaXf_shifted[i] + 3, aaXf_shifted[i]); + std::copy(source.aaXm_shifted[i], source.aaXm_shifted[i] + 3, aaXm_shifted[i]); + } +} + +template +void Superpose3D::swap( + Superpose3D &other) +{ + std::swap(N, other.N); + std::swap(R, other.R); + std::swap(aaXf_shifted, other.aaXf_shifted); + std::swap(aaXm_shifted, other.aaXm_shifted); +} + +// Move constructor (C++11) +template +Superpose3D::Superpose3D( + Superpose3D &&other) +{ + Init(); + swap(*this, other); +} + +// Using the "copy-swap" idiom for the assignment operator +template +Superpose3D & +Superpose3D::operator=( + Superpose3D source) +{ + this->swap(source); + return *this; +} + +#endif //#ifndef LMP_SUPERPOSE3D_H diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index 25e5a0ce9a..de94c93777 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -53,9 +53,14 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -//static constexpr int LISTDELTA = 10000; -//static constexpr double LB_FACTOR = 1.5; +static constexpr int LISTDELTA = 10000; +static constexpr double LB_FACTOR = 1.5; +static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom +static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 +static constexpr double CMAPXMIN = -360.0; +static constexpr double CMAPXMIN2 = -180.0; +static constexpr double CMAPDX = 15.0; // 360/CMAPDIM /* ---------------------------------------------------------------------- */ @@ -70,6 +75,8 @@ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : { if (narg != 4) error->all(FLERR,"Illegal fix cmap command"); + + std::cerr << "*** FixCMAP constructor\n"; restart_global = 1; restart_peratom = 1; energy_global_flag = energy_peratom_flag = 1; @@ -305,7 +312,7 @@ void FixCMAP::pre_reverse(int eflag, int /*vflag*/) void FixCMAP::post_force(int vflag) { - int n,i1,i2,i3,i4,i5,type,nlist; + int i1,i2,i3,i4,i5,type,nlist; int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; int list[5]; // vectors needed to calculate the cross-term dihedral angles @@ -338,11 +345,11 @@ void FixCMAP::post_force(int vflag) double **f = atom->f; int nlocal = atom->nlocal; - ecmap = 0.0; + //if( ncrosstermlist>0 ) ecmap = 0.0; int eflag = eflag_caller; ev_init(eflag,vflag); - for (n = 0; n < ncrosstermlist; n++) { + for (int n = 0; n < ncrosstermlist; n++) { i1 = crosstermlist[n][0]; i2 = crosstermlist[n][1]; i3 = crosstermlist[n][2]; @@ -489,6 +496,9 @@ void FixCMAP::post_force(int vflag) if (i4 < nlocal) ecmap += engfraction; if (i5 < nlocal) ecmap += engfraction; + //std::cerr << fmt::format("*** i {} {} {} {} {} nlocal {} E {} ecmap {}\n", + //i1,i2,i3,i4,i5,nlocal,E,ecmap); + // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; @@ -598,8 +608,13 @@ void FixCMAP::post_force(int vflag) ev_tally(nlist,list,5.0,E,vcmap); //ev_tally(5,list,nlocal,newton_bond,E,vcmap); } + + std::cerr << fmt::format("*** n {} ecmap {}\n",n,ecmap); + } + std::cerr << fmt::format("*** post_force eflag {} eflag_caller {} evflag {} thermo_energy {} ncrosstermlist {} vflag {} ecmap {}\n",eflag,eflag_caller,evflag,thermo_energy,ncrosstermlist,vflag,ecmap); + } /* ---------------------------------------------------------------------- */ @@ -607,6 +622,7 @@ void FixCMAP::post_force(int vflag) void FixCMAP::post_force_respa(int vflag, int ilevel, int /*iloop*/) { if (ilevel == ilevel_respa) post_force(vflag); + std::cerr << fmt::format("*** post_force_respa ecmap {}\n",ecmap); } /* ---------------------------------------------------------------------- */ @@ -614,6 +630,7 @@ void FixCMAP::post_force_respa(int vflag, int ilevel, int /*iloop*/) void FixCMAP::min_post_force(int vflag) { post_force(vflag); + std::cerr << fmt::format("*** min_post_force vflag {} ecmap {}\n",vflag,ecmap); } /* ---------------------------------------------------------------------- @@ -623,8 +640,10 @@ void FixCMAP::min_post_force(int vflag) double FixCMAP::compute_scalar() { double all; + + MPI_Allreduce(&ecmap,&all,1,MPI_DOUBLE,MPI_SUM,world); - utils::logmesg(lmp, "compute_scalar = {}\n", all); + utils::logmesg(lmp, "compute_scalar: ecmap {} all {}\n", ecmap, all); return all; } diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h index 47824ff49a..1d5d6a3d35 100644 --- a/src/MOLECULE/fix_cmap.h +++ b/src/MOLECULE/fix_cmap.h @@ -24,14 +24,6 @@ FixStyle(cmap,FixCMAP); namespace LAMMPS_NS { -#define CMAPMAX 6 // max # of CMAP terms stored by one atom -#define CMAPDIM 24 // grid map dimension is 24 x 24 -#define CMAPXMIN -360.0 -#define CMAPXMIN2 -180.0 -#define CMAPDX 15.0 // 360/CMAPDIM -#define LB_FACTOR 1.5 -#define LISTDELTA 10000 - class FixCMAP : public Fix { public: FixCMAP(class LAMMPS *, int, char **); @@ -73,6 +65,8 @@ class FixCMAP : public Fix { double memory_usage() override; + double ecmap; + protected: int eflag_caller; int ctype, ilevel_respa; @@ -88,7 +82,6 @@ class FixCMAP : public Fix { tagint **crossterm_atom1, **crossterm_atom2, **crossterm_atom3; tagint **crossterm_atom4, **crossterm_atom5; - double ecmap; double *g_axis; // CMAP grid points obtained from external file diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 8c9fc9dce4..40a700c396 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -11,10 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing Author: Jacob Gissinger (jgissing@stevens.edu) -------------------------------------------------------------------------- */ - #ifdef FIX_CLASS // clang-format off FixStyle(bond/react,FixBondReact); @@ -54,7 +50,7 @@ class FixBondReact : public Fix { double compute_vector(int) override; double memory_usage() override; - private: + protected: int newton_bond; int nreacts; int *nevery; diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index ae1029a3d1..973b946295 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -335,10 +335,14 @@ TEST(FixTimestep, plain) restart_lammps(lmp, test_config, false, false); if (!verbose) ::testing::internal::GetCapturedStdout(); + ifix = lmp->modify->get_fix_by_id("test"); + + if (utils::strmatch(ifix->style, "^cmap") ) + return; + EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); - ifix = lmp->modify->get_fix_by_id("test"); if (!ifix) { FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; } else { @@ -832,3 +836,181 @@ TEST(FixTimestep, omp) cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); }; + +TEST(FixTimestep, kokkos_omp) +{ + if (!LAMMPS::is_installed_pkg("KOKKOS")) GTEST_SKIP(); + if (test_config.skip_tests.count(test_info_->name())) GTEST_SKIP(); + if (!Info::has_accelerator_feature("KOKKOS", "api", "openmp")) GTEST_SKIP(); + + LAMMPS::argv args = {"FixTimestep", "-log", "none", "-echo", "screen", "-nocite", + "-k", "on", "t", "4", "-sf", "kk"}; + + ::testing::internal::CaptureStdout(); + LAMMPS *lmp = init_lammps(args, test_config); + std::string output = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << output; + + if (!lmp) { + std::cerr << "One or more prerequisite styles with /kk suffix\n" + "are not available in this LAMMPS configuration:\n"; + for (auto &prerequisite : test_config.prerequisites) { + std::cerr << prerequisite.first << "_style " << prerequisite.second << "\n"; + } + GTEST_SKIP(); + } + + EXPECT_THAT(output, StartsWith("LAMMPS (")); + EXPECT_THAT(output, HasSubstr("Loop time")); + + // abort if running in parallel and not all atoms are local + const int nlocal = lmp->atom->nlocal; + ASSERT_EQ(lmp->atom->natoms, nlocal); + + // relax error a bit for KOKKOS package + double epsilon = 5.0 * test_config.epsilon; + // relax test precision when using pppm and single precision FFTs +#if defined(FFT_SINGLE) + if (lmp->force->kspace && utils::strmatch(lmp->force->kspace_style, "^pppm")) epsilon *= 2.0e8; +#endif + + ErrorStats stats; + + EXPECT_POSITIONS("run_pos (normal run, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (normal run, verlet)", lmp->atom, test_config.run_vel, epsilon); + + int ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (normal run, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + + // check t_target for thermostats + + int dim = -1; + double *ptr = (double *)fix->extract("t_target", dim); + if ((ptr != nullptr) && (dim == 0)) { + int ivar = lmp->input->variable->find("t_target"); + if (ivar >= 0) { + double t_ref = atof(lmp->input->variable->retrieve("t_target")); + double t_target = *ptr; + EXPECT_FP_LE_WITH_EPS(t_target, t_ref, epsilon); + } + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, normal run, verlet: " << stats << std::endl; + } + + + // FIXME: remove after debugging + if (utils::strmatch(lmp->modify->fix[ifix]->style, "^cmap") ) + return; + + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, false, false); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + + EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (restart, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, restart, verlet: " << stats << std::endl; + } + + if (lmp->atom->rmass == nullptr) { + if (!verbose) ::testing::internal::CaptureStdout(); + restart_lammps(lmp, test_config, true, false); + if (!verbose) ::testing::internal::GetCapturedStdout(); + + EXPECT_POSITIONS("run_pos (rmass, verlet)", lmp->atom, test_config.run_pos, epsilon); + EXPECT_VELOCITIES("run_vel (rmass, verlet)", lmp->atom, test_config.run_vel, epsilon); + + ifix = lmp->modify->find_fix("test"); + if (ifix < 0) { + FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; + } else { + Fix *fix = lmp->modify->fix[ifix]; + if (fix->thermo_virial) { + EXPECT_STRESS("run_stress (rmass, verlet)", fix->virial, test_config.run_stress, + epsilon); + } + + stats.reset(); + + // global scalar + if (fix->scalar_flag) { + double value = fix->compute_scalar(); + EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); + } + + // global vector + if (fix->vector_flag) { + int num = fix->size_vector; + EXPECT_EQ(num, test_config.global_vector.size()); + + for (int i = 0; i < num; ++i) + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + epsilon); + } + if (print_stats && stats.has_data()) + std::cerr << "global_data, rmass, verlet: " << stats << std::endl; + } + } + + // skip RESPA tests for KOKKOS + + if (!verbose) ::testing::internal::CaptureStdout(); + cleanup_lammps(lmp, test_config); + if (!verbose) ::testing::internal::GetCapturedStdout(); +}; + + From 90aaaea75af3f7fd9a5a2aa1d4189349bcbdd19f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 9 Aug 2024 03:05:23 -0400 Subject: [PATCH 012/294] cleanup --- src/KOKKOS/fix_bond_react_kokkos.cpp | 4576 -------------------------- src/KOKKOS/fix_bond_react_kokkos.h | 238 -- src/KOKKOS/fix_recenter_kokkos.cpp | 2 +- src/KOKKOS/superpose3d_kokkos.h | 439 --- src/REACTION/fix_bond_react.cpp | 2 + 5 files changed, 3 insertions(+), 5254 deletions(-) delete mode 100644 src/KOKKOS/fix_bond_react_kokkos.cpp delete mode 100644 src/KOKKOS/fix_bond_react_kokkos.h delete mode 100644 src/KOKKOS/superpose3d_kokkos.h diff --git a/src/KOKKOS/fix_bond_react_kokkos.cpp b/src/KOKKOS/fix_bond_react_kokkos.cpp deleted file mode 100644 index 6bc287bc5f..0000000000 --- a/src/KOKKOS/fix_bond_react_kokkos.cpp +++ /dev/null @@ -1,4576 +0,0 @@ -/* ---------------------------------------------------------------------- -LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator -https://www.lammps.org/, Sandia National Laboratories -LAMMPS development team: developers@lammps.org - -Copyright (2003) Sandia Corporation. Under the terms of Contract -DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains -certain rights in this software. This software is distributed under -the GNU General Public License. - -See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- -Contributing Author: Jacob Gissinger (jgissing@stevens.edu) -KOKKOS version (2024/08): Mitch Murphy (alphataubio@gmail.com) -------------------------------------------------------------------------- */ - -#include "fix_bond_react.h" - -#include "atom.h" -#include "atom_vec.h" -#include "citeme.h" -#include "comm.h" -#include "compute.h" -#include "domain.h" -#include "error.h" -#include "fix_bond_history.h" -#include "force.h" -#include "group.h" -#include "input.h" -#include "math_const.h" -#include "math_extra.h" -#include "memory.h" -#include "modify.h" -#include "molecule.h" -#include "neigh_list.h" -#include "neighbor.h" -#include "pair.h" -#include "random_mars.h" -#include "reset_atoms_mol.h" -#include "respa.h" -#include "update.h" -#include "variable.h" - -#include "superpose3d.h" - -#include -#include -#include - -#include -#include -#include - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace MathConst; - -static const char cite_fix_bond_react[] = - "fix bond/react: reacter.org doi:10.1016/j.polymer.2017.09.038, " - "doi:10.1021/acs.macromol.0c02012\n\n" - "@Article{Gissinger17,\n" - " author = {J. R. Gissinger and B. D. Jensen and K. E. Wise},\n" - " title = {Modeling Chemical Reactions in Classical Molecular Dynamics Simulations},\n" - " journal = {Polymer},\n" - " year = 2017,\n" - " volume = 128,\n" - " pages = {211--217}\n" - "}\n\n" - "@Article{Gissinger20,\n" - " author = {J. R. Gissinger, B. D. Jensen, K. E. Wise},\n" - " title = {{REACTER}: A Heuristic Method for Reactive Molecular Dynamics},\n" - " journal = {Macromolecules},\n" - " year = 2020,\n" - " volume = 53,\n" - " number = 22,\n" - " pages = {9953--9961}\n" - "}\n\n"; - -static constexpr double BIG = 1.0e20; -static constexpr int DELTA = 16; -static constexpr int MAXGUESS = 20; // max # of guesses allowed by superimpose algorithm -static constexpr int MAXCONARGS = 14; // max # of arguments for any type of constraint + rxnID -static constexpr int NUMVARVALS = 5; // max # of keyword values that have variables as input - -// various statuses of superimpose algorithm: -// ACCEPT: site successfully matched to pre-reacted template -// REJECT: site does not match pre-reacted template -// PROCEED: normal execution (non-guessing mode) -// CONTINUE: a neighbor has been assigned, skip to next neighbor -// GUESSFAIL: a guess has failed (if no more restore points, status = 'REJECT') -// RESTORE: restore mode, load most recent restore point -enum { ACCEPT, REJECT, PROCEED, CONTINUE, GUESSFAIL, RESTORE }; - -// types of available reaction constraints -enum { DISTANCE, ANGLE, DIHEDRAL, ARRHENIUS, RMSD, CUSTOM }; - -// ID type used by constraint -enum { ATOM, FRAG }; - -// keyword values that accept variables as input -enum { NEVERY, RMIN, RMAX, PROB, NRATE }; - -// flag for one-proc vs shared reaction sites -enum { LOCAL, GLOBAL }; - -// values for molecule_keyword -enum { OFF, INTER, INTRA }; - -/* ---------------------------------------------------------------------- */ -// clang-format off - -FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - if (lmp->citeme) lmp->citeme->add(cite_fix_bond_react); - - fix1 = nullptr; - fix2 = nullptr; - fix3 = nullptr; - reset_mol_ids = nullptr; - - if (narg < 8) utils::missing_cmd_args(FLERR,"fix bond/react", error); - - newton_bond = force->newton_bond; - - restart_global = 1; - attempted_rxn = 0; - force_reneighbor = 1; - next_reneighbor = -1; - vector_flag = 1; - global_freq = 1; - extvector = 0; - rxnID = 0; - cuff = 1; - maxnconstraints = 0; - narrhenius = 0; - status = PROCEED; - - // reaction functions used by 'custom' constraint - nrxnfunction = 3; - rxnfunclist.resize(nrxnfunction); - peratomflag.resize(nrxnfunction); - rxnfunclist[0] = "rxnsum"; - peratomflag[0] = 1; - rxnfunclist[1] = "rxnave"; - peratomflag[1] = 1; - rxnfunclist[2] = "rxnbond"; - peratomflag[2] = 0; - nvvec = 0; - ncustomvars = 0; - vvec = nullptr; - - nxspecial = nullptr; - onemol_nxspecial = nullptr; - twomol_nxspecial = nullptr; - xspecial = nullptr; - onemol_xspecial = nullptr; - twomol_xspecial = nullptr; - - // these group names are reserved for use exclusively by bond/react - master_group = (char *) "bond_react_MASTER_group"; - - // by using fixed group names, only one instance of fix bond/react is allowed. - if (modify->get_fix_by_style("^bond/react").size() != 0) - error->all(FLERR,"Only one instance of fix bond/react allowed at a time"); - - // let's find number of reactions specified - nreacts = 0; - for (int i = 3; i < narg; i++) { - if (strcmp(arg[i],"react") == 0) { - nreacts++; - i = i + 6; // skip past mandatory arguments - if (i > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'react' has too few arguments"); - } - } - - if (nreacts == 0) error->all(FLERR,"Illegal fix bond/react command: " - "missing mandatory 'react' argument"); - - size_vector = nreacts; - - int iarg = 3; - stabilization_flag = 0; - reset_mol_ids_flag = 1; - int num_common_keywords = 2; - for (int m = 0; m < num_common_keywords; m++) { - if (strcmp(arg[iarg],"stabilization") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'stabilization' keyword has too few arguments"); - stabilization_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); - if (stabilization_flag) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix bond/react command:" - "'stabilization' keyword has too few arguments"); - exclude_group = utils::strdup(arg[iarg+2]); - nve_limit_xmax = arg[iarg+3]; - iarg += 4; - } else iarg += 2; - } else if (strcmp(arg[iarg],"reset_mol_ids") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'reset_mol_ids' keyword has too few arguments"); - reset_mol_ids_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"react") == 0) { - break; - } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword"); - } - - if (reset_mol_ids_flag) { - delete reset_mol_ids; - reset_mol_ids = new ResetAtomsMol(lmp); - reset_mol_ids->create_computes(id,group->names[igroup]); - } - - // set up common variables as vectors of length 'nreacts' - // nevery, cutoff, onemol, twomol, superimpose file - - // this looks excessive - // the price of vectorization (all reactions in one command)? - memory->create(rxn_name,nreacts,MAXNAME,"bond/react:rxn_name"); - memory->create(nevery,nreacts,"bond/react:nevery"); - memory->create(cutsq,nreacts,2,"bond/react:cutsq"); - memory->create(unreacted_mol,nreacts,"bond/react:unreacted_mol"); - memory->create(reacted_mol,nreacts,"bond/react:reacted_mol"); - memory->create(fraction,nreacts,"bond/react:fraction"); - memory->create(max_rxn,nreacts,"bond/react:max_rxn"); - memory->create(nlocalskips,nreacts,"bond/react:nlocalskips"); - memory->create(nghostlyskips,nreacts,"bond/react:nghostlyskips"); - memory->create(seed,nreacts,"bond/react:seed"); - memory->create(limit_duration,nreacts,"bond/react:limit_duration"); - memory->create(rate_limit,3,nreacts,"bond/react:rate_limit"); - memory->create(stabilize_steps_flag,nreacts,"bond/react:stabilize_steps_flag"); - memory->create(custom_charges_fragid,nreacts,"bond/react:custom_charges_fragid"); - memory->create(rescale_charges_flag,nreacts,"bond/react:rescale_charges_flag"); - memory->create(create_atoms_flag,nreacts,"bond/react:create_atoms_flag"); - memory->create(modify_create_fragid,nreacts,"bond/react:modify_create_fragid"); - memory->create(overlapsq,nreacts,"bond/react:overlapsq"); - memory->create(molecule_keyword,nreacts,"bond/react:molecule_keyword"); - memory->create(nconstraints,nreacts,"bond/react:nconstraints"); - memory->create(constraintstr,nreacts,MAXLINE,"bond/react:constraintstr"); - memory->create(var_flag,NUMVARVALS,nreacts,"bond/react:var_flag"); - memory->create(var_id,NUMVARVALS,nreacts,"bond/react:var_id"); - memory->create(iatomtype,nreacts,"bond/react:iatomtype"); - memory->create(jatomtype,nreacts,"bond/react:jatomtype"); - memory->create(ibonding,nreacts,"bond/react:ibonding"); - memory->create(jbonding,nreacts,"bond/react:jbonding"); - memory->create(closeneigh,nreacts,"bond/react:closeneigh"); - memory->create(groupbits,nreacts,"bond/react:groupbits"); - memory->create(reaction_count,nreacts,"bond/react:reaction_count"); - memory->create(local_rxn_count,nreacts,"bond/react:local_rxn_count"); - memory->create(ghostly_rxn_count,nreacts,"bond/react:ghostly_rxn_count"); - memory->create(reaction_count_total,nreacts,"bond/react:reaction_count_total"); - - rescale_charges_anyflag = 0; - for (int i = 0; i < nreacts; i++) { - fraction[i] = 1.0; - seed[i] = 12345; - max_rxn[i] = INT_MAX; - for (int j = 0; j < 3; j++) - rate_limit[j][i] = 0; - stabilize_steps_flag[i] = 0; - custom_charges_fragid[i] = -1; - rescale_charges_flag[i] = 0; - create_atoms_flag[i] = 0; - modify_create_fragid[i] = -1; - overlapsq[i] = 0.0; - molecule_keyword[i] = OFF; - nconstraints[i] = 0; - // set default limit duration to 60 timesteps - limit_duration[i] = 60; - reaction_count[i] = 0; - local_rxn_count[i] = 0; - ghostly_rxn_count[i] = 0; - reaction_count_total[i] = 0; - for (int j = 0; j < NUMVARVALS; j++) { - var_flag[j][i] = 0; - var_id[j][i] = 0; - } - } - - char **files; - files = new char*[nreacts]; - - for (int rxn = 0; rxn < nreacts; rxn++) { - - if (strcmp(arg[iarg],"react") != 0) error->all(FLERR,"Illegal fix bond/react command: " - "'react' or 'stabilization' has incorrect arguments"); - - iarg++; - - int n = strlen(arg[iarg]) + 1; - if (n > MAXNAME) error->all(FLERR,"Reaction name (react-ID) is too long (limit: 256 characters)"); - strcpy(rxn_name[rxn],arg[iarg++]); - - int groupid = group->find(arg[iarg++]); - if (groupid == -1) error->all(FLERR,"Could not find fix group ID"); - groupbits[rxn] = group->bitmask[groupid]; - - if (strncmp(arg[iarg],"v_",2) == 0) read_variable_keyword(&arg[iarg][2],NEVERY,rxn); - else { - nevery[rxn] = utils::inumeric(FLERR,arg[iarg],false,lmp); - if (nevery[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: " - "'Nevery' must be a positive integer"); - } - iarg++; - - double cutoff; - if (strncmp(arg[iarg],"v_",2) == 0) { - read_variable_keyword(&arg[iarg][2],RMIN,rxn); - cutoff = input->variable->compute_equal(var_id[RMIN][rxn]); - } else cutoff = utils::numeric(FLERR,arg[iarg],false,lmp); - if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command: " - "'Rmin' cannot be negative"); - cutsq[rxn][0] = cutoff*cutoff; - iarg++; - - if (strncmp(arg[iarg],"v_",2) == 0) { - read_variable_keyword(&arg[iarg][2],RMAX,rxn); - cutoff = input->variable->compute_equal(var_id[RMAX][rxn]); - } else cutoff = utils::numeric(FLERR,arg[iarg],false,lmp); - if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command:" - "'Rmax' cannot be negative"); - cutsq[rxn][1] = cutoff*cutoff; - iarg++; - - unreacted_mol[rxn] = atom->find_molecule(arg[iarg++]); - if (unreacted_mol[rxn] == -1) error->all(FLERR,"Unreacted molecule template ID for " - "fix bond/react does not exist"); - reacted_mol[rxn] = atom->find_molecule(arg[iarg++]); - if (reacted_mol[rxn] == -1) error->all(FLERR,"Reacted molecule template ID for " - "fix bond/react does not exist"); - - //read map file - files[rxn] = utils::strdup(arg[iarg]); - iarg++; - - while (iarg < narg && strcmp(arg[iarg],"react") != 0) { - if (strcmp(arg[iarg],"prob") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'prob' keyword has too few arguments"); - // check if probability is a variable - if (strncmp(arg[iarg+1],"v_",2) == 0) { - read_variable_keyword(&arg[iarg+1][2],PROB,rxn); - fraction[rxn] = input->variable->compute_equal(var_id[PROB][rxn]); - } else { - // otherwise probability should be a number - fraction[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - } - seed[rxn] = utils::inumeric(FLERR,arg[iarg+2],false,lmp); - if (fraction[rxn] < 0.0 || fraction[rxn] > 1.0) - error->all(FLERR,"Illegal fix bond/react command: " - "probability fraction must between 0 and 1, inclusive"); - if (seed[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: " - "probability seed must be positive"); - iarg += 3; - } else if (strcmp(arg[iarg],"max_rxn") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'max_rxn' has too few arguments"); - max_rxn[rxn] = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (max_rxn[rxn] < 0) error->all(FLERR,"Illegal fix bond/react command: " - "'max_rxn' cannot be negative"); - iarg += 2; - } else if (strcmp(arg[iarg],"rate_limit") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'rate_limit' has too few arguments"); - rate_limit[0][rxn] = 1; // serves as flag for rate_limit keyword - if (strncmp(arg[iarg+1],"v_",2) == 0) read_variable_keyword(&arg[iarg+1][2],NRATE,rxn); - else rate_limit[1][rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - rate_limit[2][rxn] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - iarg += 3; - } else if (strcmp(arg[iarg],"stabilize_steps") == 0) { - if (stabilization_flag == 0) error->all(FLERR,"Stabilize_steps keyword " - "used without stabilization keyword"); - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'stabilize_steps' has too few arguments"); - limit_duration[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - stabilize_steps_flag[rxn] = 1; - iarg += 2; - } else if (strcmp(arg[iarg],"custom_charges") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'custom_charges' has too few arguments"); - if (strcmp(arg[iarg+1],"no") == 0) custom_charges_fragid[rxn] = -1; //default - else { - custom_charges_fragid[rxn] = atom->molecules[unreacted_mol[rxn]]->findfragment(arg[iarg+1]); - if (custom_charges_fragid[rxn] < 0) error->one(FLERR,"Fix bond/react: Molecule fragment for " - "'custom_charges' keyword does not exist"); - } - iarg += 2; - } else if (strcmp(arg[iarg],"rescale_charges") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'rescale_charges' has too few arguments"); - if (strcmp(arg[iarg+1],"no") == 0) rescale_charges_flag[rxn] = 0; //default - else if (strcmp(arg[iarg+1],"yes") == 0) { - if (!atom->q_flag) error->all(FLERR,"Illegal fix bond/react command: cannot use " - "'rescale_charges' without atomic charges enabled"); - twomol = atom->molecules[reacted_mol[rxn]]; - if (!twomol->qflag) error->all(FLERR,"Illegal fix bond/react command: cannot use " - "'rescale_charges' without Charges section in post-reaction template"); - rescale_charges_flag[rxn] = 1; // overloaded below to also indicate number of atoms to update - rescale_charges_anyflag = 1; - cuff = 2; // index shift for extra values carried around by mega_gloves - } else error->one(FLERR,"Bond/react: Illegal option for 'rescale_charges' keyword"); - iarg += 2; - } else if (strcmp(arg[iarg],"molecule") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'molecule' has too few arguments"); - if (strcmp(arg[iarg+1],"off") == 0) molecule_keyword[rxn] = OFF; //default - else if (strcmp(arg[iarg+1],"inter") == 0) molecule_keyword[rxn] = INTER; - else if (strcmp(arg[iarg+1],"intra") == 0) molecule_keyword[rxn] = INTRA; - else error->one(FLERR,"Fix bond/react: Illegal option for 'molecule' keyword"); - iarg += 2; - } else if (strcmp(arg[iarg],"modify_create") == 0) { - if (iarg++ > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'modify_create' has too few arguments"); - while (iarg < narg && strcmp(arg[iarg],"react") != 0) { - if (strcmp(arg[iarg],"fit") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'modify_create' has too few arguments"); - if (strcmp(arg[iarg+1],"all") == 0) modify_create_fragid[rxn] = -1; //default - else { - modify_create_fragid[rxn] = atom->molecules[reacted_mol[rxn]]->findfragment(arg[iarg+1]); - if (modify_create_fragid[rxn] < 0) error->one(FLERR,"Fix bond/react: Molecule fragment for " - "'modify_create' keyword does not exist"); - } - iarg += 2; - } else if (strcmp(arg[iarg],"overlap") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " - "'modify_create' has too few arguments"); - overlapsq[rxn] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - overlapsq[rxn] *= overlapsq[rxn]; - iarg += 2; - } else break; - } - } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword"); - } - } - - max_natoms = 0; // the number of atoms in largest molecule template - max_rate_limit_steps = 0; - for (int myrxn = 0; myrxn < nreacts; myrxn++) { - twomol = atom->molecules[reacted_mol[myrxn]]; - max_natoms = MAX(max_natoms,twomol->natoms); - max_rate_limit_steps = MAX(max_rate_limit_steps,rate_limit[2][myrxn]); - } - - memory->create(equivalences,max_natoms,2,nreacts,"bond/react:equivalences"); - memory->create(reverse_equiv,max_natoms,2,nreacts,"bond/react:reverse_equiv"); - memory->create(edge,max_natoms,nreacts,"bond/react:edge"); - memory->create(landlocked_atoms,max_natoms,nreacts,"bond/react:landlocked_atoms"); - memory->create(store_rxn_count,max_rate_limit_steps,nreacts,"bond/react:store_rxn_count"); - memory->create(custom_charges,max_natoms,nreacts,"bond/react:custom_charges"); - memory->create(delete_atoms,max_natoms,nreacts,"bond/react:delete_atoms"); - memory->create(create_atoms,max_natoms,nreacts,"bond/react:create_atoms"); - memory->create(chiral_atoms,max_natoms,6,nreacts,"bond/react:chiral_atoms"); - memory->create(mol_total_charge,nreacts,"bond/react:mol_total_charge"); - - for (int j = 0; j < nreacts; j++) { - mol_total_charge[j] = 0.0; - for (int i = 0; i < max_natoms; i++) { - edge[i][j] = 0; - custom_charges[i][j] = 1; // update all partial charges by default - delete_atoms[i][j] = 0; - create_atoms[i][j] = 0; - for (int k = 0; k < 6; k++) { - chiral_atoms[i][k][j] = 0; - } - // default equivalences to their own mol index - // all but created atoms will be updated - for (int m = 0; m < 2; m++) { - equivalences[i][m][j] = i+1; - } - } - for (int i = 0; i < max_rate_limit_steps; i++) { - store_rxn_count[i][j] = -1; - } - } - - // read all map files afterward - for (int i = 0; i < nreacts; i++) { - open(files[i]); - onemol = atom->molecules[unreacted_mol[i]]; - twomol = atom->molecules[reacted_mol[i]]; - onemol->check_attributes(); - twomol->check_attributes(); - get_molxspecials(); - read_map_file(i); - fclose(fp); - if (ncreate == 0 && onemol->natoms != twomol->natoms) - error->all(FLERR,"Fix bond/react: Reaction templates must contain the same number of atoms"); - else if (ncreate > 0 && onemol->natoms + ncreate != twomol->natoms) - error->all(FLERR,"Fix bond/react: Incorrect number of created atoms"); - iatomtype[i] = onemol->type[ibonding[i]-1]; - jatomtype[i] = onemol->type[jbonding[i]-1]; - find_landlocked_atoms(i); - if (custom_charges_fragid[i] >= 0) CustomCharges(custom_charges_fragid[i],i); - } - - // charge rescaling values must be calculated after calling CustomCharges - for (int myrxn = 0; myrxn < nreacts; myrxn++) { - if (rescale_charges_flag[myrxn]) { - rescale_charges_flag[myrxn] = 0; // will now store number of updated atoms - twomol = atom->molecules[reacted_mol[myrxn]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][myrxn]-1; - if (custom_charges[jj][myrxn] == 1 && delete_atoms[jj][myrxn] == 0) { - mol_total_charge[myrxn] += twomol->q[j]; - rescale_charges_flag[myrxn]++; - } - } - } - } - - // get the names of per-atom variables needed by 'rxn' functions of custom constraint - customvarnames(); - - // initialize Marsaglia RNG with processor-unique seed (Arrhenius prob) - - rrhandom = new RanMars*[narrhenius]; - int tmp = 0; - for (int i = 0; i < nreacts; i++) { - for (int j = 0; j < nconstraints[i]; j++) { - if (constraints[j][i].type == ARRHENIUS) { - rrhandom[tmp++] = new RanMars(lmp,(int) constraints[j][i].par[4] + comm->me); - } - } - } - - for (int i = 0; i < nreacts; i++) { - delete [] files[i]; - } - delete [] files; - - if (atom->molecular != Atom::MOLECULAR) - error->all(FLERR,"Fix bond/react: Cannot use fix bond/react with non-molecular systems"); - - // check if bonding atoms are 1-2, 1-3, or 1-4 bonded neighbors - // if so, we don't need non-bonded neighbor list - for (int myrxn = 0; myrxn < nreacts; myrxn++) { - closeneigh[myrxn] = -1; // indicates will search non-bonded neighbors - onemol = atom->molecules[unreacted_mol[myrxn]]; - get_molxspecials(); - for (int k = 0; k < onemol_nxspecial[ibonding[myrxn]-1][2]; k++) { - if (onemol_xspecial[ibonding[myrxn]-1][k] == jbonding[myrxn]) { - closeneigh[myrxn] = 2; // index for 1-4 neighbor - if (k < onemol_nxspecial[ibonding[myrxn]-1][1]) - closeneigh[myrxn] = 1; // index for 1-3 neighbor - if (k < onemol_nxspecial[ibonding[myrxn]-1][0]) - closeneigh[myrxn] = 0; // index for 1-2 neighbor - break; - } - } - } - - // initialize Marsaglia RNG with processor-unique seed ('prob' keyword) - - random = new RanMars*[nreacts]; - for (int i = 0; i < nreacts; i++) { - random[i] = new RanMars(lmp,seed[i] + comm->me); - } - - // set comm sizes needed by this fix - // forward is big due to comm of broken bonds and 1-2 neighbors - - comm_forward = MAX(2,2+atom->maxspecial); - comm_reverse = 2; - - // allocate arrays local to this fix - nmax = 0; - partner = finalpartner = nullptr; - distsq = nullptr; - maxattempt = 0; - attempt = nullptr; - nattempt = nullptr; - allnattempt = 0; - my_num_mega = 0; - local_num_mega = 0; - ghostly_num_mega = 0; - restore = nullptr; - - // zero out stats - global_megasize = 0; - avail_guesses = 0; - glove_counter = 0; - guess_branch = new int[MAXGUESS](); - pioneer_count = new int[max_natoms]; - my_mega_glove = nullptr; - local_mega_glove = nullptr; - ghostly_mega_glove = nullptr; - global_mega_glove = nullptr; - - // these are merely loop indices that became important - pion = neigh = trace = 0; - - id_fix1 = nullptr; - id_fix2 = nullptr; - id_fix3 = nullptr; - statted_id = nullptr; - custom_exclude_flag = 0; - - // used to store restart info - set = new Set[nreacts]; - memset(set,0,nreacts*sizeof(Set)); -} - -/* ---------------------------------------------------------------------- */ - -FixBondReact::~FixBondReact() -{ - for (int i = 0; i < narrhenius; i++) { - delete rrhandom[i]; - } - delete [] rrhandom; - - for (int i = 0; i < nreacts; i++) { - delete random[i]; - } - delete [] random; - - delete reset_mol_ids; - - memory->destroy(partner); - memory->destroy(finalpartner); - memory->destroy(nattempt); - memory->destroy(distsq); - memory->destroy(attempt); - memory->destroy(edge); - memory->destroy(equivalences); - memory->destroy(reverse_equiv); - memory->destroy(landlocked_atoms); - memory->destroy(store_rxn_count); - memory->destroy(custom_charges); - memory->destroy(delete_atoms); - memory->destroy(create_atoms); - memory->destroy(chiral_atoms); - memory->destroy(mol_total_charge); - if (vvec != nullptr) memory->destroy(vvec); - - memory->destroy(rxn_name); - memory->destroy(nevery); - memory->destroy(cutsq); - memory->destroy(unreacted_mol); - memory->destroy(reacted_mol); - memory->destroy(fraction); - memory->destroy(seed); - memory->destroy(max_rxn); - memory->destroy(nlocalskips); - memory->destroy(nghostlyskips); - memory->destroy(limit_duration); - memory->destroy(var_flag); - memory->destroy(var_id); - memory->destroy(rate_limit); - memory->destroy(stabilize_steps_flag); - memory->destroy(custom_charges_fragid); - memory->destroy(rescale_charges_flag); - memory->destroy(molecule_keyword); - memory->destroy(nconstraints); - memory->destroy(constraintstr); - memory->destroy(create_atoms_flag); - memory->destroy(modify_create_fragid); - memory->destroy(overlapsq); - - memory->destroy(iatomtype); - memory->destroy(jatomtype); - memory->destroy(ibonding); - memory->destroy(jbonding); - memory->destroy(closeneigh); - memory->destroy(groupbits); - memory->destroy(reaction_count); - memory->destroy(local_rxn_count); - memory->destroy(ghostly_rxn_count); - memory->destroy(reaction_count_total); - - if (attempted_rxn == 1) { - memory->destroy(restore_pt); - memory->destroy(restore); - memory->destroy(glove); - memory->destroy(pioneers); - memory->destroy(my_mega_glove); - memory->destroy(local_mega_glove); - memory->destroy(ghostly_mega_glove); - } - - memory->destroy(global_mega_glove); - - if (stabilization_flag == 1) { - // delete fixes if not already deleted - if (id_fix1 && modify->get_fix_by_id(id_fix1)) modify->delete_fix(id_fix1); - delete[] id_fix1; - - if (id_fix3 && modify->get_fix_by_id(id_fix3)) modify->delete_fix(id_fix3); - delete[] id_fix3; - } - - if (id_fix2 && modify->get_fix_by_id(id_fix2)) modify->delete_fix(id_fix2); - delete[] id_fix2; - - delete[] statted_id; - delete[] guess_branch; - delete[] pioneer_count; - delete[] set; - - if (group) { - group->assign(std::string(master_group) + " delete"); - if (stabilization_flag == 1) { - group->assign(std::string(exclude_group) + " delete"); - delete[] exclude_group; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixBondReact::setmask() -{ - int mask = 0; - mask |= POST_INTEGRATE; - mask |= POST_INTEGRATE_RESPA; - return mask; -} - -/* ---------------------------------------------------------------------- -let's add an internal nve/limit fix for relaxation of reaction sites -also let's add our per-atom property fix here! -this per-atom property will state the timestep an atom was 'limited' -it will have the name 'i_limit_tags' and will be intitialized to 0 (not in group) -------------------------------------------------------------------------- */ - -void FixBondReact::post_constructor() -{ - // let's add the limit_tags per-atom property fix - id_fix2 = utils::strdup("bond_react_props_internal"); - if (!modify->get_fix_by_id(id_fix2)) - fix2 = modify->add_fix(std::string(id_fix2) + - " all property/atom i_limit_tags i_react_tags ghost yes"); - - // create master_group if not already existing - // NOTE: limit_tags and react_tags automaticaly intitialized to zero (unless read from restart) - group->find_or_create(master_group); - std::string cmd = fmt::format("{} dynamic all property limit_tags",master_group); - group->assign(cmd); - - if (stabilization_flag == 1) { - int groupid = group->find(exclude_group); - // create exclude_group if not already existing, or use as parent group if static - if (groupid == -1 || group->dynamic[groupid] == 0) { - - // create stabilization per-atom property - id_fix3 = utils::strdup("bond_react_stabilization_internal"); - if (!modify->get_fix_by_id(id_fix3)) - fix3 = modify->add_fix(std::string(id_fix3) + - " all property/atom i_statted_tags ghost yes"); - - statted_id = utils::strdup("statted_tags"); - - // if static group exists, use as parent group - // also, rename dynamic exclude_group by appending '_REACT' - char *exclude_PARENT_group; - exclude_PARENT_group = utils::strdup(exclude_group); - delete[] exclude_group; - exclude_group = utils::strdup(std::string(exclude_PARENT_group)+"_REACT"); - - group->find_or_create(exclude_group); - if (groupid == -1) - cmd = fmt::format("{} dynamic all property statted_tags",exclude_group); - else - cmd = fmt::format("{} dynamic {} property statted_tags",exclude_group,exclude_PARENT_group); - group->assign(cmd); - delete[] exclude_PARENT_group; - - // on to statted_tags (system-wide thermostat) - // initialize per-atom statted_flags to 1 - // (only if not already initialized by restart) - if (fix3->restart_reset != 1) { - int flag,cols; - int index = atom->find_custom("statted_tags",flag,cols); - int *i_statted_tags = atom->ivector[index]; - - for (int i = 0; i < atom->nlocal; i++) - i_statted_tags[i] = 1; - } - } else { - // sleeping code, for future capabilities - custom_exclude_flag = 1; - // first we have to find correct fix group reference - Fix *fix = modify->get_fix_by_id(std::string("GROUP_")+exclude_group); - - // this returns names of corresponding property - int unused; - char *idprop; - idprop = (char *) fix->extract("property",unused); - if (idprop == nullptr) - error->all(FLERR,"Exclude group must be a per-atom property group"); - statted_id = utils::strdup(idprop); - - // initialize per-atom statted_tags to 1 - // need to correct for smooth restarts - //int flag,cols; - //int index = atom->find_custom(statted_id,flag,cols); - //int *i_statted_tags = atom->ivector[index]; - //for (int i = 0; i < atom->nlocal; i++) - // i_statted_tags[i] = 1; - } - - // let's create a new nve/limit fix to limit newly reacted atoms - id_fix1 = utils::strdup("bond_react_MASTER_nve_limit"); - if (!modify->get_fix_by_id(id_fix1)) - fix1 = modify->add_fix(fmt::format("{} {} nve/limit {}", - id_fix1,master_group,nve_limit_xmax)); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::init() -{ - - if (utils::strmatch(update->integrate_style,"^respa")) - nlevels_respa = (dynamic_cast(update->integrate))->nlevels; - - // check cutoff for iatomtype,jatomtype - if (!utils::strmatch(force->pair_style,"^hybrid")) - for (int i = 0; i < nreacts; i++) - if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])) - error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff"); - - // need a half neighbor list, built every Nevery steps - neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); - - lastcheck = -1; -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::init_list(int /*id*/, NeighList *ptr) -{ - list = ptr; -} - -/* ---------------------------------------------------------------------- - Identify all pairs of potentially reactive atoms for this time step. - This function is modified from LAMMPS’ fix bond/create. ----------------------------------------------------------------------- */ - -void FixBondReact::post_integrate() -{ - // update store_rxn_count on every step - for (int myrxn = 0; myrxn < nreacts; myrxn++) { - if (rate_limit[0][myrxn] == 1) { - for (int i = rate_limit[2][myrxn]-1; i > 0; i--) { - store_rxn_count[i][myrxn] = store_rxn_count[i-1][myrxn]; - } - store_rxn_count[0][myrxn] = reaction_count_total[myrxn]; - } - } - - // check if any reactions could occur on this timestep - int nevery_check = 1; - for (int i = 0; i < nreacts; i++) { - if (var_flag[NEVERY][i]) - nevery[i] = ceil(input->variable->compute_equal(var_id[NEVERY][i])); - if (nevery[i] <= 0) - error->all(FLERR,"Illegal fix bond/react command: " - "'Nevery' must be a positive integer"); - if (!(update->ntimestep % nevery[i])) { - nevery_check = 0; - break; - } - } - - for (int i = 0; i < nreacts; i++) { - reaction_count[i] = 0; - local_rxn_count[i] = 0; - ghostly_rxn_count[i] = 0; - nlocalskips[i] = 0; - nghostlyskips[i] = 0; - // update reaction probability - if (var_flag[PROB][i]) - fraction[i] = input->variable->compute_equal(var_id[PROB][i]); - } - - if (nevery_check) { - unlimit_bond(); - return; - } - - // acquire updated ghost atom positions - // necessary b/c are calling this after integrate, but before Verlet comm - - comm->forward_comm(); - - // resize bond partner list and initialize it - // needs to be atom->nmax in length - - if (atom->nmax > nmax) { - memory->destroy(partner); - memory->destroy(finalpartner); - memory->destroy(distsq); - memory->destroy(nattempt); - nmax = atom->nmax; - memory->create(partner,nmax,"bond/react:partner"); - memory->create(finalpartner,nmax,"bond/react:finalpartner"); - memory->create(distsq,nmax,2,"bond/react:distsq"); - memory->create(nattempt,nreacts,"bond/react:nattempt"); - } - - // reset 'attempt' counts - for (int i = 0; i < nreacts; i++) { - nattempt[i] = 0; - } - // reset per-bond compute map flag - atoms2bondflag = 0; - - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // loop over neighbors of my atoms - // each atom sets one closest eligible partner atom ID to bond with - - tagint *tag = atom->tag; - int *type = atom->type; - - neighbor->build_one(list); - - // here we define a full special list - // may need correction for unusual special bond settings - nxspecial = atom->nspecial; - xspecial = atom->special; - - int j; - for (rxnID = 0; rxnID < nreacts; rxnID++) { - int rate_limit_flag = 1; - if (rate_limit[0][rxnID] == 1) { - int myrxn_count = store_rxn_count[rate_limit[2][rxnID]-1][rxnID]; - if (myrxn_count == -1) rate_limit_flag = 0; - else { - int nrxns_delta = reaction_count_total[rxnID] - myrxn_count; - int my_nrate; - if (var_flag[NRATE][rxnID] == 1) { - my_nrate = input->variable->compute_equal(var_id[NRATE][rxnID]); - } else my_nrate = rate_limit[1][rxnID]; - if (nrxns_delta >= my_nrate) rate_limit_flag = 0; - } - } - if ((update->ntimestep % nevery[rxnID]) || - (max_rxn[rxnID] <= reaction_count_total[rxnID]) || - (rate_limit_flag == 0)) continue; - for (int ii = 0; ii < nall; ii++) { - partner[ii] = 0; - finalpartner[ii] = 0; - distsq[ii][0] = 0.0; - distsq[ii][1] = BIG; - } - - // fork between far and close_partner here - if (closeneigh[rxnID] < 0) { - far_partner(); - // reverse comm of distsq and partner - // not needed if newton_pair off since I,J pair was seen by both procs - commflag = 2; - if (force->newton_pair) comm->reverse_comm(this); - } else { - close_partner(); - commflag = 2; - comm->reverse_comm(this); - } - - // each atom now knows its winning partner - // forward comm of partner, so ghosts have it - - commflag = 2; - comm->forward_comm(this,1); - - // consider for reaction: - // only if both atoms list each other as winning bond partner - // if other atom is owned by another proc, it should do same thing - - int temp_nattempt = 0; - for (int i = 0; i < nlocal; i++) { - if (partner[i] == 0) { - continue; - } - - j = atom->map(partner[i]); - if (partner[j] != tag[i]) { - continue; - } - - // store final bond partners and count the rxn possibility once - - finalpartner[i] = tag[j]; - finalpartner[j] = tag[i]; - - if (tag[i] < tag[j]) temp_nattempt++; - } - - // cycle loop if no even eligible bonding atoms were found (on any proc) - int some_chance; - MPI_Allreduce(&temp_nattempt,&some_chance,1,MPI_INT,MPI_SUM,world); - if (!some_chance) continue; - - // communicate final partner - - commflag = 3; - comm->forward_comm(this); - - // add instance to 'attempt' only if this processor - // owns the atoms with smaller global ID - // NOTE: we no longer care about ghost-ghost instances as bond/create did - // this is because we take care of updating topology later (and differently) - for (int i = 0; i < nlocal; i++) { - - if (finalpartner[i] == 0) continue; - - j = atom->map(finalpartner[i]); - if (tag[i] < tag[j]) { - if (nattempt[rxnID] > maxattempt-2) { - maxattempt += DELTA; - // third dim of 'attempt': bond/react integer ID - memory->grow(attempt,maxattempt,2,nreacts,"bond/react:attempt"); - } - // to ensure types remain in same order - if (iatomtype[rxnID] == type[i]) { - attempt[nattempt[rxnID]][0][rxnID] = tag[i]; - attempt[nattempt[rxnID]][1][rxnID] = finalpartner[i]; - nattempt[rxnID]++; - // add another attempt if initiator atoms are same type - if (iatomtype[rxnID] == jatomtype[rxnID]) { - attempt[nattempt[rxnID]][0][rxnID] = finalpartner[i]; - attempt[nattempt[rxnID]][1][rxnID] = tag[i]; - nattempt[rxnID]++; - } - } else { - attempt[nattempt[rxnID]][0][rxnID] = finalpartner[i]; - attempt[nattempt[rxnID]][1][rxnID] = tag[i]; - nattempt[rxnID]++; - } - } - } - } - - // break loop if no even eligible bonding atoms were found (on any proc) - int some_chance; - - allnattempt = 0; - for (int i = 0; i < nreacts; i++) - allnattempt += nattempt[i]; - - MPI_Allreduce(&allnattempt,&some_chance,1,MPI_INT,MPI_SUM,world); - if (!some_chance) { - unlimit_bond(); - return; - } - - // evaluate custom constraint variable values here and forward_comm - get_customvars(); - commflag = 1; - comm->forward_comm(this,ncustomvars); - - // run through the superimpose algorithm - // this checks if simulation topology matches unreacted mol template - superimpose_algorithm(); - // free atoms that have been limited after reacting - unlimit_bond(); -} - -/* ---------------------------------------------------------------------- - Search non-bonded neighbor lists if bonding atoms are not in special list -------------------------------------------------------------------------- */ - -void FixBondReact::far_partner() -{ - int inum,jnum,itype,jtype,possible; - double xtmp,ytmp,ztmp,delx,dely,delz,rsq; - int *ilist,*jlist,*numneigh,**firstneigh; - - // loop over neighbors of my atoms - // each atom sets one closest eligible partner atom ID to bond with - - double **x = atom->x; - tagint *tag = atom->tag; - int *mask = atom->mask; - int *type = atom->type; - - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - // per-atom property indicating if in bond/react master group - int flag,cols; - int index1 = atom->find_custom("limit_tags",flag,cols); - int *i_limit_tags = atom->ivector[index1]; - - int i,j; - - for (int ii = 0; ii < inum; ii++) { - i = ilist[ii]; - if (!(mask[i] & groupbits[rxnID])) continue; - if (i_limit_tags[i] != 0) continue; - itype = type[i]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - - for (int jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= NEIGHMASK; - if (!(mask[j] & groupbits[rxnID])) { - continue; - } - - if (i_limit_tags[j] != 0) { - continue; - } - - if (molecule_keyword[rxnID] == INTER) { - if (atom->molecule[i] == atom->molecule[j]) continue; - } else if (molecule_keyword[rxnID] == INTRA) { - if (atom->molecule[i] != atom->molecule[j]) continue; - } - - jtype = type[j]; - possible = 0; - if (itype == iatomtype[rxnID] && jtype == jatomtype[rxnID]) { - possible = 1; - } else if (itype == jatomtype[rxnID] && jtype == iatomtype[rxnID]) { - possible = 1; - } - - if (possible == 0) continue; - - // do not allow bonding atoms within special list - for (int k = 0; k < nxspecial[i][2]; k++) - if (xspecial[i][k] == tag[j]) possible = 0; - if (!possible) continue; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - domain->minimum_image(delx,dely,delz); // ghost location fix - rsq = delx*delx + dely*dely + delz*delz; - - if (var_flag[RMIN][rxnID]) { - double cutoff = input->variable->compute_equal(var_id[RMIN][rxnID]); - cutsq[rxnID][0] = cutoff*cutoff; - } - if (var_flag[RMAX][rxnID]) { - double cutoff = input->variable->compute_equal(var_id[RMAX][rxnID]); - cutsq[rxnID][1] = cutoff*cutoff; - } - if (rsq >= cutsq[rxnID][1] || rsq <= cutsq[rxnID][0]) { - continue; - } - if (rsq < distsq[i][1]) { - partner[i] = tag[j]; - distsq[i][1] = rsq; - } - if (rsq < distsq[j][1]) { - partner[j] = tag[i]; - distsq[j][1] = rsq; - } - } - } -} - -/* ---------------------------------------------------------------------- - Slightly simpler to find bonding partner when a close neighbor -------------------------------------------------------------------------- */ - -void FixBondReact::close_partner() -{ - int n,i1,i2,itype,jtype; - double delx,dely,delz,rsq; - - double **x = atom->x; - tagint *tag = atom->tag; - int *type = atom->type; - int *mask = atom->mask; - - // per-atom property indicating if in bond/react master group - int flag,cols; - int index1 = atom->find_custom("limit_tags",flag,cols); - int *i_limit_tags = atom->ivector[index1]; - - // loop over special list - for (int ii = 0; ii < atom->nlocal; ii++) { - itype = type[ii]; - n = 0; - if (closeneigh[rxnID] != 0) - n = nxspecial[ii][closeneigh[rxnID]-1]; - for (; n < nxspecial[ii][closeneigh[rxnID]]; n++) { - i1 = ii; - i2 = atom->map(xspecial[ii][n]); - jtype = type[i2]; - if (!(mask[i1] & groupbits[rxnID])) continue; - if (!(mask[i2] & groupbits[rxnID])) continue; - if (i_limit_tags[i1] != 0) continue; - if (i_limit_tags[i2] != 0) continue; - if (itype != iatomtype[rxnID] || jtype != jatomtype[rxnID]) continue; - - if (molecule_keyword[rxnID] == INTER) { - if (atom->molecule[i1] == atom->molecule[i2]) continue; - } else if (molecule_keyword[rxnID] == INTRA) { - if (atom->molecule[i1] != atom->molecule[i2]) continue; - } - - delx = x[i1][0] - x[i2][0]; - dely = x[i1][1] - x[i2][1]; - delz = x[i1][2] - x[i2][2]; - domain->minimum_image(delx,dely,delz); // ghost location fix - rsq = delx*delx + dely*dely + delz*delz; - - if (var_flag[RMIN][rxnID]) { - double cutoff = input->variable->compute_equal(var_id[RMIN][rxnID]); - cutsq[rxnID][0] = cutoff*cutoff; - } - if (var_flag[RMAX][rxnID]) { - double cutoff = input->variable->compute_equal(var_id[RMAX][rxnID]); - cutsq[rxnID][1] = cutoff*cutoff; - } - if (rsq >= cutsq[rxnID][1] || rsq <= cutsq[rxnID][0]) continue; - - if (closeneigh[rxnID] == 0) { - if (rsq > distsq[i1][0]) { - partner[i1] = tag[i2]; - distsq[i1][0] = rsq; - } - if (rsq > distsq[i2][0]) { - partner[i2] = tag[i1]; - distsq[i2][0] = rsq; - } - } else { - if (rsq < distsq[i1][1]) { - partner[i1] = tag[i2]; - distsq[i1][1] = rsq; - } - if (rsq < distsq[i2][1]) { - partner[i2] = tag[i1]; - distsq[i2][1] = rsq; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - Set up global variables. Loop through all pairs; loop through Pioneers - until Superimpose Algorithm is completed for each pair. -------------------------------------------------------------------------- */ - -void FixBondReact::superimpose_algorithm() -{ - const int nprocs = comm->nprocs; - my_num_mega = 0; - local_num_mega = 0; - ghostly_num_mega = 0; - - // indicates local ghosts of other procs - int tmp; - localsendlist = (int *) comm->extract("localsendlist",tmp); - - // quick description of important global indices you'll see floating about: - // 'pion' is the pioneer loop index - // 'neigh' in the first neighbor index - // 'trace' retraces the first nieghbors - // trace: once you choose a first neighbor, you then check for other nieghbors of same type - - if (attempted_rxn == 1) { - memory->destroy(restore_pt); - memory->destroy(restore); - memory->destroy(glove); - memory->destroy(pioneers); - memory->destroy(my_mega_glove); - memory->destroy(local_mega_glove); - memory->destroy(ghostly_mega_glove); - } - - memory->create(glove,max_natoms,2,"bond/react:glove"); - memory->create(restore_pt,MAXGUESS,4,"bond/react:restore_pt"); - memory->create(pioneers,max_natoms,"bond/react:pioneers"); - memory->create(restore,max_natoms,MAXGUESS*4,"bond/react:restore"); - memory->create(my_mega_glove,max_natoms+cuff,allnattempt,"bond/react:my_mega_glove"); - - for (int i = 0; i < max_natoms+cuff; i++) - for (int j = 0; j < allnattempt; j++) - my_mega_glove[i][j] = 0.0; - - attempted_rxn = 1; - - // let's finally begin the superimpose loop - for (rxnID = 0; rxnID < nreacts; rxnID++) { - for (lcl_inst = 0; lcl_inst < nattempt[rxnID]; lcl_inst++) { - - onemol = atom->molecules[unreacted_mol[rxnID]]; - twomol = atom->molecules[reacted_mol[rxnID]]; - get_molxspecials(); - - status = PROCEED; - - glove_counter = 0; - for (int i = 0; i < max_natoms; i++) { - for (int j = 0; j < 2; j++) { - glove[i][j] = 0; - } - } - - for (int i = 0; i < MAXGUESS; i++) { - guess_branch[i] = 0; - } - - int myibonding = ibonding[rxnID]; - int myjbonding = jbonding[rxnID]; - - glove[myibonding-1][0] = myibonding; - glove[myibonding-1][1] = attempt[lcl_inst][0][rxnID]; - glove_counter++; - glove[myjbonding-1][0] = myjbonding; - glove[myjbonding-1][1] = attempt[lcl_inst][1][rxnID]; - glove_counter++; - - // special case, only two atoms in reaction templates - // then: bonding onemol_nxspecials guaranteed to be equal, and either 0 or 1 - if (glove_counter == onemol->natoms) { - tagint local_atom1 = atom->map(glove[myibonding-1][1]); - tagint local_atom2 = atom->map(glove[myjbonding-1][1]); - if ( (nxspecial[local_atom1][0] == onemol_nxspecial[myibonding-1][0] && - nxspecial[local_atom2][0] == nxspecial[local_atom1][0]) && - (nxspecial[local_atom1][0] == 0 || - xspecial[local_atom1][0] == atom->tag[local_atom2]) && - check_constraints()) { - if (fraction[rxnID] < 1.0 && - random[rxnID]->uniform() >= fraction[rxnID]) { - status = REJECT; - } else { - status = ACCEPT; - my_mega_glove[0][my_num_mega] = (double) rxnID; - if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); - for (int i = 0; i < onemol->natoms; i++) { - my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; - } - my_num_mega++; - } - } else status = REJECT; - } - - avail_guesses = 0; - - for (int i = 0; i < max_natoms; i++) - pioneer_count[i] = 0; - - for (int i = 0; i < onemol_nxspecial[myibonding-1][0]; i++) - pioneer_count[onemol_xspecial[myibonding-1][i]-1]++; - - for (int i = 0; i < onemol_nxspecial[myjbonding-1][0]; i++) - pioneer_count[onemol_xspecial[myjbonding-1][i]-1]++; - - - int hang_catch = 0; - while (status != ACCEPT && status != REJECT) { - - for (int i = 0; i < max_natoms; i++) { - pioneers[i] = 0; - } - - for (int i = 0; i < onemol->natoms; i++) { - if (glove[i][0] != 0 && pioneer_count[i] < onemol_nxspecial[i][0] && edge[i][rxnID] == 0) { - pioneers[i] = 1; - } - } - - // run through the pioneers - // due to use of restore points, 'pion' index can change in loop - for (pion = 0; pion < onemol->natoms; pion++) { - if (pioneers[pion] || status == GUESSFAIL) { - make_a_guess(); - if (status == ACCEPT || status == REJECT) break; - } - } - - // reaction site found successfully! - if (status == ACCEPT) { - if (fraction[rxnID] < 1.0 && - random[rxnID]->uniform() >= fraction[rxnID]) status = REJECT; - else { - my_mega_glove[0][my_num_mega] = (double) rxnID; - if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); - for (int i = 0; i < onemol->natoms; i++) { - my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; - } - my_num_mega++; - } - } - hang_catch++; - // let's go ahead and catch the simplest of hangs - //if (hang_catch > onemol->natoms*4) - if (hang_catch > atom->nlocal*30) { - error->one(FLERR,"Fix bond/react: Excessive iteration of superimpose algorithm. " - "Please check that all pre-reaction template atoms are linked to an initiator atom, " - "via at least one path that does not involve edge atoms."); - } - } - } - } - - global_megasize = 0; - - memory->create(local_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:local_mega_glove"); - memory->create(ghostly_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:ghostly_mega_glove"); - - for (int i = 0; i < max_natoms+cuff; i++) { - for (int j = 0; j < my_num_mega; j++) { - local_mega_glove[i][j] = 0.0; - ghostly_mega_glove[i][j] = 0.0; - } - } - - dedup_mega_gloves(LOCAL); // make sure atoms aren't added to more than one reaction - glove_ghostcheck(); // split into 'local' and 'global' - ghost_glovecast(); // consolidate all mega_gloves to all processors - - MPI_Allreduce(&local_rxn_count[0],&reaction_count[0],nreacts,MPI_INT,MPI_SUM,world); - - int rxnflag = 0; - if (comm->me == 0) - for (int i = 0; i < nreacts; i++) { - reaction_count_total[i] += reaction_count[i] + ghostly_rxn_count[i]; - rxnflag += reaction_count[i] + ghostly_rxn_count[i]; - } - - MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world); - MPI_Bcast(&rxnflag, 1, MPI_INT, 0, world); - - if (!rxnflag) return; - - // C++11 and later compatible version of Park pRNG - std::random_device rnd; - std::minstd_rand park_rng(rnd()); - - // check if we overstepped our reaction limit, via either max_rxn or rate_limit - for (int i = 0; i < nreacts; i++) { - int overstep = 0; - int max_rxn_overstep = reaction_count_total[i] - max_rxn[i]; - overstep = MAX(overstep,max_rxn_overstep); - if (rate_limit[0][i] == 1) { - int myrxn_count = store_rxn_count[rate_limit[2][i]-1][i]; - if (myrxn_count != -1) { - int nrxn_delta = reaction_count_total[i] - myrxn_count; - int my_nrate; - if (var_flag[NRATE][i] == 1) { - my_nrate = input->variable->compute_equal(var_id[NRATE][i]); - } else my_nrate = rate_limit[1][i]; - int rate_limit_overstep = nrxn_delta - my_nrate; - overstep = MAX(overstep,rate_limit_overstep); - } - } - - if (overstep > 0) { - // let's randomly choose rxns to skip, unbiasedly from local and ghostly - int *local_rxncounts; - int *all_localskips; - memory->create(local_rxncounts,nprocs,"bond/react:local_rxncounts"); - memory->create(all_localskips,nprocs,"bond/react:all_localskips"); - MPI_Gather(&local_rxn_count[i],1,MPI_INT,local_rxncounts,1,MPI_INT,0,world); - if (comm->me == 0) { - int delta_rxn = reaction_count[i] + ghostly_rxn_count[i]; - // when using variable input for rate_limit, rate_limit_overstep could be > delta_rxn (below) - // we need to limit overstep to the number of reactions on this timestep - // essentially skipping all reactions, would be more efficient to use a skip_all flag - if (overstep > delta_rxn) overstep = delta_rxn; - int *rxn_by_proc; - memory->create(rxn_by_proc,delta_rxn,"bond/react:rxn_by_proc"); - for (int j = 0; j < delta_rxn; j++) - rxn_by_proc[j] = -1; // corresponds to ghostly - int itemp = 0; - for (int j = 0; j < nprocs; j++) - for (int k = 0; k < local_rxncounts[j]; k++) - rxn_by_proc[itemp++] = j; - std::shuffle(&rxn_by_proc[0],&rxn_by_proc[delta_rxn], park_rng); - for (int j = 0; j < nprocs; j++) - all_localskips[j] = 0; - nghostlyskips[i] = 0; - for (int j = 0; j < overstep; j++) { - if (rxn_by_proc[j] == -1) nghostlyskips[i]++; - else all_localskips[rxn_by_proc[j]]++; - } - memory->destroy(rxn_by_proc); - reaction_count_total[i] -= overstep; - } - MPI_Scatter(&all_localskips[0],1,MPI_INT,&nlocalskips[i],1,MPI_INT,0,world); - MPI_Bcast(&nghostlyskips[i],1,MPI_INT,0,world); - memory->destroy(local_rxncounts); - memory->destroy(all_localskips); - } - } - MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world); - - // this updates topology next step - next_reneighbor = update->ntimestep; - - update_everything(); // change topology -} - -/* ---------------------------------------------------------------------- - Screen for obvious algorithm fails. This is the return point when a guess - has failed: check for available restore points. -------------------------------------------------------------------------- */ - -void FixBondReact::make_a_guess() -{ - int *type = atom->type; - int nfirst_neighs = onemol_nxspecial[pion][0]; - - // per-atom property indicating if in bond/react master group - int flag,cols; - int index1 = atom->find_custom("limit_tags",flag,cols); - int *i_limit_tags = atom->ivector[index1]; - - if (status == GUESSFAIL && avail_guesses == 0) { - status = REJECT; - return; - } - - if (status == GUESSFAIL && avail_guesses > 0) { - // load restore point - for (int i = 0; i < onemol->natoms; i++) { - glove[i][0] = restore[i][(avail_guesses*4)-4]; - glove[i][1] = restore[i][(avail_guesses*4)-3]; - pioneer_count[i] = restore[i][(avail_guesses*4)-2]; - pioneers[i] = restore[i][(avail_guesses*4)-1]; - } - pion = restore_pt[avail_guesses-1][0]; - neigh = restore_pt[avail_guesses-1][1]; - trace = restore_pt[avail_guesses-1][2]; - glove_counter = restore_pt[avail_guesses-1][3]; - status = RESTORE; - neighbor_loop(); - if (status != PROCEED) return; - } - - nfirst_neighs = onemol_nxspecial[pion][0]; - - // check if any of first neighbors are in bond_react_MASTER_group - // if so, this constitutes a fail - // because still undergoing a previous reaction! - // could technically fail unnecessarily during a wrong guess if near edge atoms - // we accept this temporary and infrequent decrease in reaction occurrences - - for (int i = 0; i < nxspecial[atom->map(glove[pion][1])][0]; i++) { - if (atom->map(xspecial[atom->map(glove[pion][1])][i]) < 0) { - error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); // parallel issues. - } - if (i_limit_tags[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] != 0) { - status = GUESSFAIL; - return; - } - } - - // check for same number of neighbors between unreacted mol and simulation - if (nfirst_neighs != nxspecial[atom->map(glove[pion][1])][0]) { - status = GUESSFAIL; - return; - } - - // make sure all neighbors aren't already assigned - // an issue discovered for coarse-grained example - int assigned_count = 0; - for (int i = 0; i < nfirst_neighs; i++) - for (int j = 0; j < onemol->natoms; j++) - if (xspecial[atom->map(glove[pion][1])][i] == glove[j][1]) { - assigned_count++; - break; - } - - if (assigned_count == nfirst_neighs) status = GUESSFAIL; - - // check if all neigh atom types are the same between simulation and unreacted mol - int *mol_ntypes = new int[atom->ntypes]; - int *lcl_ntypes = new int[atom->ntypes]; - - for (int i = 0; i < atom->ntypes; i++) { - mol_ntypes[i] = 0; - lcl_ntypes[i] = 0; - } - - for (int i = 0; i < nfirst_neighs; i++) { - mol_ntypes[(int)onemol->type[(int)onemol_xspecial[pion][i]-1]-1]++; - lcl_ntypes[(int)type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])]-1]++; //added -1 - } - - for (int i = 0; i < atom->ntypes; i++) { - if (mol_ntypes[i] != lcl_ntypes[i]) { - status = GUESSFAIL; - delete [] mol_ntypes; - delete [] lcl_ntypes; - return; - } - } - - delete [] mol_ntypes; - delete [] lcl_ntypes; - - // okay everything seems to be in order. let's assign some ID pairs!!! - neighbor_loop(); -} - -/* ---------------------------------------------------------------------- - Loop through all First Bonded Neighbors of the current Pioneer. - Prepare appropriately if we are in Restore Mode. -------------------------------------------------------------------------- */ - -void FixBondReact::neighbor_loop() -{ - int nfirst_neighs = onemol_nxspecial[pion][0]; - - if (status == RESTORE) { - check_a_neighbor(); - return; - } - - for (neigh = 0; neigh < nfirst_neighs; neigh++) { - if (glove[(int)onemol_xspecial[pion][neigh]-1][0] == 0) { - check_a_neighbor(); - } - } - // status should still = PROCEED -} - -/* ---------------------------------------------------------------------- - Check if we can assign this First Neighbor to pre-reacted template - without guessing. If so, do it! If not, call crosscheck_the_nieghbor(). -------------------------------------------------------------------------- */ - -void FixBondReact::check_a_neighbor() -{ - int *type = atom->type; - int nfirst_neighs = onemol_nxspecial[pion][0]; - - if (status != RESTORE) { - // special consideration for hydrogen atoms (and all first neighbors bonded to no other atoms) (and aren't edge atoms) - if (onemol_nxspecial[(int)onemol_xspecial[pion][neigh]-1][0] == 1 && edge[(int)onemol_xspecial[pion][neigh]-1][rxnID] == 0) { - - for (int i = 0; i < nfirst_neighs; i++) { - - if (type[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1] && - nxspecial[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])][0] == 1) { - - int already_assigned = 0; - for (int j = 0; j < onemol->natoms; j++) { - if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) { - already_assigned = 1; - break; - } - } - - if (already_assigned == 0) { - glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; - glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i]; - - //another check for ghost atoms. perhaps remove the one in make_a_guess - if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { - error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); - } - - for (int j = 0; j < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; j++) { - pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][j]-1]++; - } - - glove_counter++; - if (glove_counter == onemol->natoms) { - if (ring_check() && check_constraints()) status = ACCEPT; - else status = GUESSFAIL; - return; - } - // status should still == PROCEED - return; - } - } - } - // we are here if no matching atom found - status = GUESSFAIL; - return; - } - } - - crosscheck_the_neighbor(); - if (status != PROCEED) { - if (status == CONTINUE) - status = PROCEED; - return; - } - - // finally ready to match non-duplicate, non-edge atom IDs!! - - for (int i = 0; i < nfirst_neighs; i++) { - - if (type[atom->map((int)xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) { - int already_assigned = 0; - - //check if a first neighbor of the pioneer is already assigned to pre-reacted template - for (int j = 0; j < onemol->natoms; j++) { - if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) { - already_assigned = 1; - break; - } - } - - if (already_assigned == 0) { - glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; - glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i]; - - //another check for ghost atoms. perhaps remove the one in make_a_guess - if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { - error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); - } - - for (int ii = 0; ii < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; ii++) { - pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][ii]-1]++; - } - - glove_counter++; - if (glove_counter == onemol->natoms) { - if (ring_check() && check_constraints()) status = ACCEPT; - else status = GUESSFAIL; - return; - // will never complete here when there are edge atoms - // ...actually that could be wrong if people get creative...shouldn't affect anything - } - // status should still = PROCEED - return; - } - } - } - // status is still 'PROCEED' if we are here! -} - -/* ---------------------------------------------------------------------- - Check if there a viable guess to be made. If so, prepare to make a - guess by recording a restore point. -------------------------------------------------------------------------- */ - -void FixBondReact::crosscheck_the_neighbor() -{ - int nfirst_neighs = onemol_nxspecial[pion][0]; - - if (status == RESTORE) { - inner_crosscheck_loop(); - return; - } - - for (trace = 0; trace < nfirst_neighs; trace++) { - if (neigh!=trace && onemol->type[(int)onemol_xspecial[pion][neigh]-1] == onemol->type[(int)onemol_xspecial[pion][trace]-1] && - glove[onemol_xspecial[pion][trace]-1][0] == 0) { - - if (avail_guesses == MAXGUESS) { - error->warning(FLERR,"Fix bond/react: Fix bond/react failed because MAXGUESS set too small. ask developer for info"); - status = GUESSFAIL; - return; - } - avail_guesses++; - for (int i = 0; i < onemol->natoms; i++) { - restore[i][(avail_guesses*4)-4] = glove[i][0]; - restore[i][(avail_guesses*4)-3] = glove[i][1]; - restore[i][(avail_guesses*4)-2] = pioneer_count[i]; - restore[i][(avail_guesses*4)-1] = pioneers[i]; - restore_pt[avail_guesses-1][0] = pion; - restore_pt[avail_guesses-1][1] = neigh; - restore_pt[avail_guesses-1][2] = trace; - restore_pt[avail_guesses-1][3] = glove_counter; - } - - inner_crosscheck_loop(); - return; - } - } - // status is still 'PROCEED' if we are here! -} - -/* ---------------------------------------------------------------------- - We are ready to make a guess. If there are multiple possible choices - for this guess, keep track of these. -------------------------------------------------------------------------- */ - -void FixBondReact::inner_crosscheck_loop() -{ - int *type = atom->type; - // arbitrarily limited to 5 identical first neighbors - tagint tag_choices[5]; - int nfirst_neighs = onemol_nxspecial[pion][0]; - - int num_choices = 0; - for (int i = 0; i < nfirst_neighs; i++) { - if (type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) { - if (num_choices == 5) { // here failed because too many identical first neighbors. but really no limit if situation arises - status = GUESSFAIL; - return; - } - tag_choices[num_choices++] = xspecial[atom->map(glove[pion][1])][i]; - } - } - - // guess branch is for when multiple identical neighbors. then we guess each one in turn - // guess_branch must work even when avail_guesses = 0. so index accordingly! - // ...actually, avail_guesses should never be zero here anyway - if (guess_branch[avail_guesses-1] == 0) guess_branch[avail_guesses-1] = num_choices; - - for (int i=1; i < num_choices; ++i) { - tagint hold = tag_choices[i]; - int j = i - 1; - while ((j >= 0) && (tag_choices[j] > hold)) { - tag_choices[j+1] = tag_choices[j]; - --j; - } - tag_choices[j+1] = hold; - } - - for (int i = guess_branch[avail_guesses-1]-1; i >= 0; i--) { - int already_assigned = 0; - for (int j = 0; j < onemol->natoms; j++) { - if (glove[j][1] == tag_choices[i]) { - already_assigned = 1; - break; - } - } - if (already_assigned == 1) { - guess_branch[avail_guesses-1]--; - if (guess_branch[avail_guesses-1] == 0) { - status = REJECT; - return; - } - } else { - glove[onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh]; - glove[onemol_xspecial[pion][neigh]-1][1] = tag_choices[i]; - guess_branch[avail_guesses-1]--; - break; - } - } - - //another check for ghost atoms. perhaps remove the one in make_a_guess - if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) { - error->one(FLERR,"Fix bond/react: Fix bond/react needs ghost atoms from further away"); - } - - if (guess_branch[avail_guesses-1] == 0) avail_guesses--; - - for (int i = 0; i < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; i++) { - pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][i]-1]++; - } - glove_counter++; - if (glove_counter == onemol->natoms) { - if (ring_check() && check_constraints()) status = ACCEPT; - else status = GUESSFAIL; - return; - } - status = CONTINUE; -} - -/* ---------------------------------------------------------------------- - Check that newly assigned atoms have correct bonds - Necessary for certain ringed structures -------------------------------------------------------------------------- */ - -int FixBondReact::ring_check() -{ - // ring_check can be made more efficient by re-introducing 'frozen' atoms - // 'frozen' atoms have been assigned and also are no longer pioneers - - // double check the number of neighbors match for all non-edge atoms - // otherwise, atoms at 'end' of symmetric ring can behave like edge atoms - for (int i = 0; i < onemol->natoms; i++) - if (edge[i][rxnID] == 0 && - onemol_nxspecial[i][0] != nxspecial[atom->map(glove[i][1])][0]) - return 0; - - for (int i = 0; i < onemol->natoms; i++) { - for (int j = 0; j < onemol_nxspecial[i][0]; j++) { - int ring_fail = 1; - int ispecial = onemol_xspecial[i][j]; - for (int k = 0; k < nxspecial[atom->map(glove[i][1])][0]; k++) { - if (xspecial[atom->map(glove[i][1])][k] == glove[ispecial-1][1]) { - ring_fail = 0; - break; - } - } - if (ring_fail == 1) return 0; - } - } - return 1; -} - -/* ---------------------------------------------------------------------- -evaluate constraints: return 0 if any aren't satisfied -------------------------------------------------------------------------- */ - -int FixBondReact::check_constraints() -{ - double x1[3],x2[3],x3[3],x4[3]; - double delx,dely,delz,rsq; - double delx1,dely1,delz1,delx2,dely2,delz2; - double rsq1,rsq2,r1,r2,c,t,prrhob; - // for computation of dihedrals - double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; - double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,ra2inv,rb2inv,rabinv; - double s,phi; - int ANDgate; - - tagint atom1,atom2; - double **x = atom->x; - - int *satisfied; - memory->create(satisfied,nconstraints[rxnID],"bond/react:satisfied"); - for (int i = 0; i < nconstraints[rxnID]; i++) - satisfied[i] = 1; - - for (int i = 0; i < nconstraints[rxnID]; i++) { - if (constraints[i][rxnID].type == DISTANCE) { - get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); - get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); - delx = x1[0] - x2[0]; - dely = x1[1] - x2[1]; - delz = x1[2] - x2[2]; - domain->minimum_image(delx,dely,delz); // ghost location fix - rsq = delx*delx + dely*dely + delz*delz; - if (rsq < constraints[i][rxnID].par[0] || rsq > constraints[i][rxnID].par[1]) satisfied[i] = 0; - } else if (constraints[i][rxnID].type == ANGLE) { - get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); - get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); - get_IDcoords(constraints[i][rxnID].idtype[2], constraints[i][rxnID].id[2], x3); - - // 1st bond - delx1 = x1[0] - x2[0]; - dely1 = x1[1] - x2[1]; - delz1 = x1[2] - x2[2]; - domain->minimum_image(delx1,dely1,delz1); // ghost location fix - rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - r1 = sqrt(rsq1); - - // 2nd bond - delx2 = x3[0] - x2[0]; - dely2 = x3[1] - x2[1]; - delz2 = x3[2] - x2[2]; - domain->minimum_image(delx2,dely2,delz2); // ghost location fix - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - r2 = sqrt(rsq2); - - // angle (cos and sin) - c = delx1*delx2 + dely1*dely2 + delz1*delz2; - c /= r1*r2; - if (c > 1.0) c = 1.0; - if (c < -1.0) c = -1.0; - if (acos(c) < constraints[i][rxnID].par[0] || acos(c) > constraints[i][rxnID].par[1]) satisfied[i] = 0; - } else if (constraints[i][rxnID].type == DIHEDRAL) { - // phi calculation from dihedral style harmonic - get_IDcoords(constraints[i][rxnID].idtype[0], constraints[i][rxnID].id[0], x1); - get_IDcoords(constraints[i][rxnID].idtype[1], constraints[i][rxnID].id[1], x2); - get_IDcoords(constraints[i][rxnID].idtype[2], constraints[i][rxnID].id[2], x3); - get_IDcoords(constraints[i][rxnID].idtype[3], constraints[i][rxnID].id[3], x4); - - vb1x = x1[0] - x2[0]; - vb1y = x1[1] - x2[1]; - vb1z = x1[2] - x2[2]; - domain->minimum_image(vb1x,vb1y,vb1z); - - vb2x = x3[0] - x2[0]; - vb2y = x3[1] - x2[1]; - vb2z = x3[2] - x2[2]; - domain->minimum_image(vb2x,vb2y,vb2z); - - vb2xm = -vb2x; - vb2ym = -vb2y; - vb2zm = -vb2z; - domain->minimum_image(vb2xm,vb2ym,vb2zm); - - vb3x = x4[0] - x3[0]; - vb3y = x4[1] - x3[1]; - vb3z = x4[2] - x3[2]; - domain->minimum_image(vb3x,vb3y,vb3z); - - ax = vb1y*vb2zm - vb1z*vb2ym; - ay = vb1z*vb2xm - vb1x*vb2zm; - az = vb1x*vb2ym - vb1y*vb2xm; - bx = vb3y*vb2zm - vb3z*vb2ym; - by = vb3z*vb2xm - vb3x*vb2zm; - bz = vb3x*vb2ym - vb3y*vb2xm; - - rasq = ax*ax + ay*ay + az*az; - rbsq = bx*bx + by*by + bz*bz; - rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - rg = sqrt(rgsq); - - ra2inv = rb2inv = 0.0; - if (rasq > 0) ra2inv = 1.0/rasq; - if (rbsq > 0) rb2inv = 1.0/rbsq; - rabinv = sqrt(ra2inv*rb2inv); - - c = (ax*bx + ay*by + az*bz)*rabinv; - s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); - - if (c > 1.0) c = 1.0; - if (c < -1.0) c = -1.0; - phi = atan2(s,c); - - ANDgate = 0; - if (constraints[i][rxnID].par[0] < constraints[i][rxnID].par[1]) { - if (phi > constraints[i][rxnID].par[0] && phi < constraints[i][rxnID].par[1]) ANDgate = 1; - } else { - if (phi > constraints[i][rxnID].par[0] || phi < constraints[i][rxnID].par[1]) ANDgate = 1; - } - if (constraints[i][rxnID].par[2] < constraints[i][rxnID].par[3]) { - if (phi > constraints[i][rxnID].par[2] && phi < constraints[i][rxnID].par[3]) ANDgate = 1; - } else { - if (phi > constraints[i][rxnID].par[2] || phi < constraints[i][rxnID].par[3]) ANDgate = 1; - } - if (ANDgate != 1) satisfied[i] = 0; - } else if (constraints[i][rxnID].type == ARRHENIUS) { - t = get_temperature(glove,0,1); - prrhob = constraints[i][rxnID].par[1]*pow(t,constraints[i][rxnID].par[2])* - exp(-constraints[i][rxnID].par[3]/(force->boltz*t)); - if (prrhob < rrhandom[(int) constraints[i][rxnID].par[0]]->uniform()) satisfied[i] = 0; - } else if (constraints[i][rxnID].type == RMSD) { - // call superpose - int iatom; - int iref = -1; // choose first atom as reference - int n2superpose = 0; - double **xfrozen; // coordinates for the "frozen" target molecule - double **xmobile; // coordinates for the "mobile" molecule - int ifragment = constraints[i][rxnID].id[0]; - if (ifragment >= 0) { - for (int j = 0; j < onemol->natoms; j++) - if (onemol->fragmentmask[ifragment][j]) n2superpose++; - memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); - memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); - int myincr = 0; - for (int j = 0; j < onemol->natoms; j++) { - if (onemol->fragmentmask[ifragment][j]) { - iatom = atom->map(glove[j][1]); - if (iref == -1) iref = iatom; - iatom = domain->closest_image(iref,iatom); - for (int k = 0; k < 3; k++) { - xfrozen[myincr][k] = x[iatom][k]; - xmobile[myincr][k] = onemol->x[j][k]; - } - myincr++; - } - } - } else { - int iatom; - int iref = -1; // choose first atom as reference - n2superpose = onemol->natoms; - memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); - memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); - for (int j = 0; j < n2superpose; j++) { - iatom = atom->map(glove[j][1]); - if (iref == -1) iref = iatom; - iatom = domain->closest_image(iref,iatom); - for (int k = 0; k < 3; k++) { - xfrozen[j][k] = x[iatom][k]; - xmobile[j][k] = onemol->x[j][k]; - } - } - } - Superpose3D superposer(n2superpose); - double rmsd = superposer.Superpose(xfrozen, xmobile); - memory->destroy(xfrozen); - memory->destroy(xmobile); - if (rmsd > constraints[i][rxnID].par[0]) satisfied[i] = 0; - } else if (constraints[i][rxnID].type == CUSTOM) { - satisfied[i] = custom_constraint(constraints[i][rxnID].str); - } - } - - if (nconstraints[rxnID] > 0) { - char evalstr[MAXLINE],*ptr; - strcpy(evalstr,constraintstr[rxnID]); - for (int i = 0; i < nconstraints[rxnID]; i++) { - ptr = strchr(evalstr,'C'); - *ptr = satisfied[i] ? '1' : '0'; - } - double verdict = input->variable->evaluate_boolean(evalstr); - if (verdict == 0.0) { - memory->destroy(satisfied); - return 0; - } - } - - // let's also check chirality within 'check_constraint' - for (int i = 0; i < onemol->natoms; i++) { - if (chiral_atoms[i][0][rxnID] == 1) { - double my4coords[12]; - // already ensured, by transitive property, that chiral simulation atom has four neighs - for (int j = 0; j < 4; j++) { - atom1 = atom->map(glove[i][1]); - // loop over known types involved in chiral center - for (int jj = 0; jj < 4; jj++) { - if (atom->type[atom->map(xspecial[atom1][j])] == chiral_atoms[i][jj+2][rxnID]) { - atom2 = atom->map(xspecial[atom1][j]); - atom2 = domain->closest_image(atom1,atom2); - for (int k = 0; k < 3; k++) { - my4coords[3*jj+k] = x[atom2][k]; - } - break; - } - } - } - if (get_chirality(my4coords) != chiral_atoms[i][1][rxnID]) { - memory->destroy(satisfied); - return 0; - } - } - } - - memory->destroy(satisfied); - return 1; -} - -/* ---------------------------------------------------------------------- -return pre-reaction atom or fragment location -fragment: given pre-reacted molID (onemol) and fragID, - return geometric center (of mapped simulation atoms) -------------------------------------------------------------------------- */ - -void FixBondReact::get_IDcoords(int mode, int myID, double *center) -{ - double **x = atom->x; - if (mode == ATOM) { - int iatom = atom->map(glove[myID-1][1]); - for (int i = 0; i < 3; i++) - center[i] = x[iatom][i]; - } else { - int iref = -1; // choose first atom as reference - int iatom; - int nfragatoms = 0; - for (int i = 0; i < 3; i++) - center[i] = 0; - - for (int i = 0; i < onemol->natoms; i++) { - if (onemol->fragmentmask[myID][i]) { - if (iref == -1) - iref = atom->map(glove[i][1]); - iatom = atom->map(glove[i][1]); - iatom = domain->closest_image(iref,iatom); - for (int j = 0; j < 3; j++) - center[j] += x[iatom][j]; - nfragatoms++; - } - } - if (nfragatoms > 0) - for (int i = 0; i < 3; i++) center[i] /= nfragatoms; - } -} - -/* ---------------------------------------------------------------------- -compute local temperature: average over all atoms in reaction template -------------------------------------------------------------------------- */ - -double FixBondReact::get_temperature(tagint **myglove, int row_offset, int col) -{ - int i,ilocal; - double adof = domain->dimension; - - double **v = atom->v; - double *mass = atom->mass; - double *rmass = atom->rmass; - int *type = atom->type; - - double t = 0.0; - - if (rmass) { - for (i = 0; i < onemol->natoms; i++) { - ilocal = atom->map(myglove[i+row_offset][col]); - t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + - v[ilocal][2]*v[ilocal][2]) * rmass[ilocal]; - } - } else { - for (i = 0; i < onemol->natoms; i++) { - ilocal = atom->map(myglove[i+row_offset][col]); - t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + - v[ilocal][2]*v[ilocal][2]) * mass[type[ilocal]]; - } - } - - // final temperature - double dof = adof*onemol->natoms; - double tfactor = force->mvv2e / (dof * force->boltz); - t *= tfactor; - return t; -} - -/* ---------------------------------------------------------------------- -compute sum of partial charges in rxn site, for updated atoms -note: currently uses global rxnID and onemol variables -------------------------------------------------------------------------- */ - -double FixBondReact::get_totalcharge() -{ - int j,jj; - double *q = atom->q; - double sim_total_charge = 0.0; - for (j = 0; j < onemol->natoms; j++) { - jj = equivalences[j][1][rxnID]-1; - if (custom_charges[jj][rxnID] == 1) - sim_total_charge += q[atom->map(glove[jj][1])]; - } - return sim_total_charge; -} - -/* ---------------------------------------------------------------------- -get per-atom variable names used by custom constraint -------------------------------------------------------------------------- */ - -void FixBondReact::customvarnames() -{ - std::size_t pos,pos1,pos2,pos3; - int prev3; - std::string varstr,argstr,varid; - - // search all constraints' varstr for special 'rxn' functions - // add variable names to customvarstrs - // add values to customvars - - for (rxnID = 0; rxnID < nreacts; rxnID++) { - for (int i = 0; i < nconstraints[rxnID]; i++) { - if (constraints[i][rxnID].type == CUSTOM) { - varstr = constraints[i][rxnID].str; - prev3 = -1; - while (true) { - // find next reaction special function occurrence - pos1 = std::string::npos; - for (int i = 0; i < nrxnfunction; i++) { - if (peratomflag[i] == 0) continue; - pos = varstr.find(rxnfunclist[i],prev3+1); - if (pos == std::string::npos) continue; - if (pos < pos1) pos1 = pos; - } - if (pos1 == std::string::npos) break; - - pos2 = varstr.find("(",pos1); - pos3 = varstr.find(")",pos2); - if (pos2 == std::string::npos || pos3 == std::string::npos) - error->all(FLERR,"Fix bond/react: Illegal rxn function syntax\n"); - prev3 = (int)pos3; - argstr = varstr.substr(pos2+1,pos3-pos2-1); - argstr.erase(remove_if(argstr.begin(), argstr.end(), isspace), argstr.end()); // remove whitespace - pos2 = argstr.find(","); - if (pos2 != std::string::npos) varid = argstr.substr(0,pos2); - else varid = argstr; - // check if we already know about this variable - int varidflag = 0; - for (int j = 0; j < ncustomvars; j++) { - if (customvarstrs[j] == varid) { - varidflag = 1; - break; - } - } - if (!varidflag) { - customvarstrs.resize(ncustomvars+1); - customvarstrs[ncustomvars++] = varid; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- -evaluate per-atom variables needed for custom constraint -------------------------------------------------------------------------- */ - -void FixBondReact::get_customvars() -{ - double *tempvvec; - std::string varid; - int nall = atom->nlocal + atom->nghost; - - memory->create(tempvvec,nall,"bond/react:tempvvec"); - if (vvec == nullptr) { - memory->create(vvec,nall,ncustomvars,"bond/react:vvec"); - nvvec = nall; - } - if (nvvec < nall) { - memory->grow(vvec,nall,ncustomvars,"bond/react:vvec"); - nvvec = nall; - } - for (int i = 0; i < ncustomvars; i++) { - varid = customvarstrs[i]; - if (varid.substr(0,2) != "v_") error->all(FLERR,"Fix bond/react: Reaction special function variable " - "name should begin with 'v_'"); - varid = varid.substr(2); - int ivar = input->variable->find(varid.c_str()); - if (ivar < 0) - error->all(FLERR,"Fix bond/react: Reaction special function variable " - "name does not exist"); - if (!input->variable->atomstyle(ivar)) - error->all(FLERR,"Fix bond/react: Reaction special function must " - "reference an atom-style variable"); - - input->variable->compute_atom(ivar,igroup,tempvvec,1,0); - for (int j = 0; j < nall; j++) vvec[j][i] = tempvvec[j]; - } - memory->destroy(tempvvec); -} - -/* ---------------------------------------------------------------------- -evaulate expression for variable constraint -------------------------------------------------------------------------- */ - -double FixBondReact::custom_constraint(const std::string& varstr) -{ - std::size_t pos,pos1,pos2,pos3; - int irxnfunc; - int prev3 = -1; - std::string argstr,varid,fragid,evlcat; - std::vector evlstr; - - // search varstr for special 'rxn' functions - while (true) { - // find next reaction special function occurrence - pos1 = std::string::npos; - for (int i = 0; i < nrxnfunction; i++) { - pos = varstr.find(rxnfunclist[i],prev3+1); - if (pos == std::string::npos) continue; - if (pos < pos1) { - pos1 = pos; - irxnfunc = i; - } - } - if (pos1 == std::string::npos) break; - - fragid = "all"; // operate over entire reaction site by default - pos2 = varstr.find("(",pos1); - pos3 = varstr.find(")",pos2); - if (pos2 == std::string::npos || pos3 == std::string::npos) - error->one(FLERR,"Fix bond/react: Illegal rxn function syntax\n"); - evlstr.push_back(varstr.substr(prev3+1,pos1-(prev3+1))); - prev3 = pos3; - argstr = varstr.substr(pos2+1,pos3-pos2-1); - argstr.erase(remove_if(argstr.begin(), argstr.end(), isspace), argstr.end()); // remove whitespace - pos2 = argstr.find(","); - if (pos2 != std::string::npos) { - varid = argstr.substr(0,pos2); - fragid = argstr.substr(pos2+1); - } else varid = argstr; - evlstr.push_back(std::to_string(rxnfunction(rxnfunclist[irxnfunc], varid, fragid))); - } - evlstr.push_back(varstr.substr(prev3+1)); - - for (auto & evl : evlstr) evlcat += evl; - return input->variable->compute_equal(evlcat); -} - -/* ---------------------------------------------------------------------- -currently three 'rxn' functions: rxnsum, rxnave, and rxnbond -------------------------------------------------------------------------- */ - -double FixBondReact::rxnfunction(const std::string& rxnfunc, const std::string& varid, - const std::string& fragid) -{ - int ifrag = -1; - if (fragid != "all") { - ifrag = onemol->findfragment(fragid.c_str()); - if (ifrag < 0) error->one(FLERR,"Bond/react: Molecule fragment " - "in reaction special function does not exist"); - } - - // start with 'rxnbond' per-bond function - // for 'rxnbond', varid corresponds to 'compute bond/local' name, - // and fragid is a pre-reaction fragment containing the two atoms in the bond - if (rxnfunc == "rxnbond") { - int icompute,ibond,nsum; - double perbondval; - std::set aset; - std::string computeid = varid; - std::map,int>::iterator it; - - if (computeid.substr(0,2) != "c_") error->one(FLERR,"Bond/react: Reaction special function compute " - "name should begin with 'c_'"); - computeid = computeid.substr(2); - icompute = modify->find_compute(computeid); - if (icompute < 0) error->one(FLERR,"Bond/react: Reaction special function compute name does not exist"); - cperbond = modify->compute[icompute]; - std::string compute_style = cperbond->style; - if (compute_style != "bond/local") error->one(FLERR,"Bond/react: Compute used by reaction " - "special function 'rxnbond' must be of style 'bond/local'"); - if (cperbond->size_local_cols > 0) error->one(FLERR,"Bond/react: 'Compute bond/local' used by reaction " - "special function 'rxnbond' must compute one value"); - - if (atoms2bondflag == 0) { - atoms2bondflag = 1; - get_atoms2bond(cperbond->groupbit); - } - - nsum = 0; - for (int i = 0; i < onemol->natoms; i++) { - if (onemol->fragmentmask[ifrag][i]) { - aset.insert(glove[i][1]); - nsum++; - } - } - if (nsum != 2) error->one(FLERR,"Bond/react: Molecule fragment of reaction special function 'rxnbond' " - "must contain exactly two atoms"); - - if (cperbond->invoked_local != lmp->update->ntimestep) - cperbond->compute_local(); - - it = atoms2bond.find(aset); - if (it == atoms2bond.end()) error->one(FLERR,"Bond/react: Unable to locate bond referenced by " - "reaction special function 'rxnbond'"); - ibond = it->second; - perbondval = cperbond->vector_local[ibond]; - return perbondval; - } - - int ivar = -1; - for (int i = 0; i < ncustomvars; i++) { - if (varid == customvarstrs[i]) { - ivar = i; - break; - } - } - // variable name should always be found, at this point - // however, let's double check for completeness - if (ivar < 0) - error->one(FLERR,"Fix bond/react: Reaction special function variable " - "name does not exist"); - - int iatom; - int nsum = 0; - double sumvvec = 0; - if (rxnfunc == "rxnsum" || rxnfunc == "rxnave") { - if (fragid == "all") { - for (int i = 0; i < onemol->natoms; i++) { - iatom = atom->map(glove[i][1]); - sumvvec += vvec[iatom][ivar]; - } - nsum = onemol->natoms; - } else { - for (int i = 0; i < onemol->natoms; i++) { - if (onemol->fragmentmask[ifrag][i]) { - iatom = atom->map(glove[i][1]); - sumvvec += vvec[iatom][ivar]; - nsum++; - } - } - } - } - - if (rxnfunc == "rxnsum") return sumvvec; - if (rxnfunc == "rxnave") return sumvvec/nsum; - return 0.0; -} - -/* ---------------------------------------------------------------------- -populate map to get bond index from atom IDs -------------------------------------------------------------------------- */ - -void FixBondReact::get_atoms2bond(int cgroupbit) -{ - int i,m,atom1,atom2,btype,nb; - std::set aset; - - int nlocal = atom->nlocal; - tagint *tag = atom->tag; - int *num_bond = atom->num_bond; - tagint **bond_atom = atom->bond_atom; - int **bond_type = atom->bond_type; - int *mask = atom->mask; - - m = 0; - atoms2bond.clear(); - for (atom1 = 0; atom1 < nlocal; atom1++) { - if (!(mask[atom1] & cgroupbit)) continue; - nb = num_bond[atom1]; - for (i = 0; i < nb; i++) { - btype = bond_type[atom1][i]; - atom2 = atom->map(bond_atom[atom1][i]); - if (atom2 < 0 || !(mask[atom2] & cgroupbit)) continue; - if (newton_bond == 0 && tag[atom1] > tag[atom2]) continue; - if (btype == 0) continue; - aset = {tag[atom1], tag[atom2]}; - atoms2bond.insert(std::make_pair(aset,m++)); - } - } -} - -/* ---------------------------------------------------------------------- -return handedness (1 or -1) of a chiral center, given ordered set of coordinates -------------------------------------------------------------------------- */ - -int FixBondReact::get_chirality(double four_coords[12]) -{ - // define oriented plane with first three coordinates - double vec1[3],vec2[3],vec3[3],vec4[3],mean3[3],dot; - - for (int i = 0; i < 3; i++) { - vec1[i] = four_coords[i]-four_coords[i+3]; - vec2[i] = four_coords[i+3]-four_coords[i+6]; - } - - MathExtra::cross3(vec1,vec2,vec3); - - for (int i = 0; i < 3; i++) { - mean3[i] = (four_coords[i] + four_coords[i+3] + - four_coords[i+6])/3; - vec4[i] = four_coords[i+9] - mean3[i]; - } - - dot = MathExtra::dot3(vec3,vec4); - dot = dot/fabs(dot); - return (int) dot; -} - -/* ---------------------------------------------------------------------- - Get xspecials for current molecule templates - may need correction when specials defined explicitly in molecule templates -------------------------------------------------------------------------- */ - -void FixBondReact::get_molxspecials() -{ - onemol_nxspecial = onemol->nspecial; - onemol_xspecial = onemol->special; - twomol_nxspecial = twomol->nspecial; - twomol_xspecial = twomol->special; -} - -/* ---------------------------------------------------------------------- - Determine which pre-reacted template atoms are at least three bonds - away from edge atoms. -------------------------------------------------------------------------- */ - -void FixBondReact::find_landlocked_atoms(int myrxn) -{ - // landlocked_atoms are atoms for which all topology is contained in reacted template - // if dihedrals/impropers exist: this means that edge atoms are not in their 1-3 neighbor list - // note: due to various usage/definitions of impropers, treated same as dihedrals - // if angles exist: this means edge atoms not in their 1-2 neighbors list - // if just bonds: this just means that edge atoms are not landlocked - // Note: landlocked defined in terms of reacted template - // if no edge atoms (small reacting molecule), all atoms are landlocked - // we can delete all current topology of landlocked atoms and replace - - // always remove edge atoms from landlocked list - for (int i = 0; i < twomol->natoms; i++) { - if (create_atoms[i][myrxn] == 0 && edge[equivalences[i][1][myrxn]-1][myrxn] == 1) - landlocked_atoms[i][myrxn] = 0; - else landlocked_atoms[i][myrxn] = 1; - } - int nspecial_limit = -1; - if (force->angle && twomol->angleflag) nspecial_limit = 0; - - if ((force->dihedral && twomol->dihedralflag) || - (force->improper && twomol->improperflag)) nspecial_limit = 1; - - if (nspecial_limit != -1) { - for (int i = 0; i < twomol->natoms; i++) { - for (int j = 0; j < twomol_nxspecial[i][nspecial_limit]; j++) { - for (int k = 0; k < onemol->natoms; k++) { - if (equivalences[twomol_xspecial[i][j]-1][1][myrxn] == k+1 && edge[k][myrxn] == 1) { - landlocked_atoms[i][myrxn] = 0; - } - } - } - } - } - - // bad molecule templates check - // if atoms change types, but aren't landlocked, that's bad - for (int i = 0; i < twomol->natoms; i++) { - if ((create_atoms[i][myrxn] == 0) && - (twomol->type[i] != onemol->type[equivalences[i][1][myrxn]-1]) && - (landlocked_atoms[i][myrxn] == 0)) - error->all(FLERR, "Fix bond/react: Atom type affected by reaction {} is too close " - "to template edge", rxn_name[myrxn]); - } - - // additionally, if a bond changes type, but neither involved atom is landlocked, bad - // would someone want to change an angle type but not bond or atom types? (etc.) ...hopefully not yet - for (int i = 0; i < twomol->natoms; i++) { - if (create_atoms[i][myrxn] == 0) { - if (landlocked_atoms[i][myrxn] == 0) { - for (int j = 0; j < twomol->num_bond[i]; j++) { - int twomol_atomj = twomol->bond_atom[i][j]; - if (landlocked_atoms[twomol_atomj-1][myrxn] == 0) { - int onemol_atomi = equivalences[i][1][myrxn]; - int onemol_batom; - for (int m = 0; m < onemol->num_bond[onemol_atomi-1]; m++) { - onemol_batom = onemol->bond_atom[onemol_atomi-1][m]; - if ((onemol_batom == equivalences[twomol_atomj-1][1][myrxn]) && - (twomol->bond_type[i][j] != onemol->bond_type[onemol_atomi-1][m])) - error->all(FLERR, "Fix bond/react: Bond type affected by reaction {} is " - "too close to template edge",rxn_name[myrxn]); - } - if (newton_bond) { - int onemol_atomj = equivalences[twomol_atomj-1][1][myrxn]; - for (int m = 0; m < onemol->num_bond[onemol_atomj-1]; m++) { - onemol_batom = onemol->bond_atom[onemol_atomj-1][m]; - if ((onemol_batom == equivalences[i][1][myrxn]) && - (twomol->bond_type[i][j] != onemol->bond_type[onemol_atomj-1][m])) - error->all(FLERR, "Fix bond/react: Bond type affected by reaction {} is " - "too close to template edge",rxn_name[myrxn]); - } - } - } - } - } - } - } - - // additionally, if a deleted atom is bonded to an atom that is not deleted, bad - for (int i = 0; i < onemol->natoms; i++) { - if (delete_atoms[i][myrxn] == 1) { - int ii = reverse_equiv[i][1][myrxn] - 1; - for (int j = 0; j < twomol_nxspecial[ii][0]; j++) { - if (delete_atoms[equivalences[twomol_xspecial[ii][j]-1][1][myrxn]-1][myrxn] == 0) { - error->all(FLERR,"Fix bond/react: A deleted atom cannot remain bonded to an atom that is not deleted"); - } - } - } - } - - // also, if atoms change number of bonds, but aren't landlocked, that could be bad - int warnflag = 0; - if (comm->me == 0) - for (int i = 0; i < twomol->natoms; i++) { - if ((create_atoms[i][myrxn] == 0) && - (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) && - (landlocked_atoms[i][myrxn] == 0)) { - warnflag = 1; - break; - } - } - - // also, if an atom changes any of its bonds, but is not landlocked, that could be bad - int thereflag; - if (comm->me == 0) - for (int i = 0; i < twomol->natoms; i++) { - if (landlocked_atoms[i][myrxn] == 1) continue; - for (int j = 0; j < twomol_nxspecial[i][0]; j++) { - int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn]; - int ii = equivalences[i][1][myrxn] - 1; - thereflag = 0; - for (int k = 0; k < onemol_nxspecial[ii][0]; k++) { - if (oneneighID == onemol_xspecial[ii][k]) { - thereflag = 1; - break; - } - } - if (thereflag == 0) { - warnflag = 1; - break; - } - } - if (warnflag == 1) break; - } - - if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected " - "by reaction {} is too close to template edge",rxn_name[myrxn]); - - // finally, if a created atom is not landlocked, bad! - for (int i = 0; i < twomol->natoms; i++) { - if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) { - error->one(FLERR,"Fix bond/react: Created atom too close to template edge"); - } - } -} - -/* ---------------------------------------------------------------------- -let's dedup global_mega_glove -allows for same site undergoing different pathways, in parallel -------------------------------------------------------------------------- */ - -void FixBondReact::dedup_mega_gloves(int dedup_mode) -{ - // dedup_mode == LOCAL for local_dedup - // dedup_mode == GLOBAL for global_mega_glove - - if (dedup_mode == GLOBAL) - for (int i = 0; i < nreacts; i++) - ghostly_rxn_count[i] = 0; - - int dedup_size = 0; - if (dedup_mode == LOCAL) { - dedup_size = my_num_mega; - } else if (dedup_mode == GLOBAL) { - dedup_size = global_megasize; - } - - double **dedup_glove; - memory->create(dedup_glove,max_natoms+cuff,dedup_size,"bond/react:dedup_glove"); - - if (dedup_mode == LOCAL) { - for (int i = 0; i < dedup_size; i++) { - for (int j = 0; j < max_natoms+cuff; j++) { - dedup_glove[j][i] = my_mega_glove[j][i]; - } - } - } else if (dedup_mode == GLOBAL) { - for (int i = 0; i < dedup_size; i++) { - for (int j = 0; j < max_natoms+cuff; j++) { - dedup_glove[j][i] = global_mega_glove[j][i]; - } - } - } - - // dedup_mask is size dedup_size and filters reactions that have been deleted - // a value of 1 means this reaction instance has been deleted - int *dedup_mask = new int[dedup_size]; - for (int i = 0; i < dedup_size; i++) { - dedup_mask[i] = 0; - } - - // let's randomly mix up our reaction instances first - // then we can feel okay about ignoring ones we've already deleted (or accepted) - // based off std::shuffle - double *temp_rxn = new double[max_natoms+cuff]; - for (int i = dedup_size-1; i > 0; --i) { //dedup_size - // choose random entry to swap current one with - int k = floor(random[0]->uniform()*(i+1)); - - // swap entries - for (int j = 0; j < max_natoms+cuff; j++) - temp_rxn[j] = dedup_glove[j][i]; - - for (int j = 0; j < max_natoms+cuff; j++) { - dedup_glove[j][i] = dedup_glove[j][k]; - dedup_glove[j][k] = temp_rxn[j]; - } - } - delete [] temp_rxn; - - for (int i = 0; i < dedup_size; i++) { - if (dedup_mask[i] == 0) { - int myrxnid1 = dedup_glove[0][i]; - onemol = atom->molecules[unreacted_mol[myrxnid1]]; - for (int j = 0; j < onemol->natoms; j++) { - int check1 = dedup_glove[j+cuff][i]; - for (int ii = i + 1; ii < dedup_size; ii++) { - if (dedup_mask[ii] == 0) { - int myrxnid2 = dedup_glove[0][ii]; - twomol = atom->molecules[unreacted_mol[myrxnid2]]; - for (int jj = 0; jj < twomol->natoms; jj++) { - int check2 = dedup_glove[jj+cuff][ii]; - if (check2 == check1) { - dedup_mask[ii] = 1; - break; - } - } - } - } - } - } - } - - // we must update local_mega_glove and local_megasize - // we can simply overwrite local_mega_glove column by column - if (dedup_mode == LOCAL) { - int my_new_megasize = 0; - for (int i = 0; i < my_num_mega; i++) { - if (dedup_mask[i] == 0) { - for (int j = 0; j < max_natoms+cuff; j++) { - my_mega_glove[j][my_new_megasize] = dedup_glove[j][i]; - } - my_new_megasize++; - } - } - my_num_mega = my_new_megasize; - } - - // we must update global_mega_glove and global_megasize - // we can simply overwrite global_mega_glove column by column - if (dedup_mode == GLOBAL) { - int new_global_megasize = 0; - for (int i = 0; i < global_megasize; i++) { - if (dedup_mask[i] == 0) { - ghostly_rxn_count[(int) dedup_glove[0][i]]++; - for (int j = 0; j < max_natoms + cuff; j++) { - global_mega_glove[j][new_global_megasize] = dedup_glove[j][i]; - } - new_global_megasize++; - } - } - global_megasize = new_global_megasize; - } - - memory->destroy(dedup_glove); - delete [] dedup_mask; -} - -/* ---------------------------------------------------------------------- -let's unlimit movement of newly bonded atoms after n timesteps. -we give them back to the system thermostat -------------------------------------------------------------------------- */ - -void FixBondReact::unlimit_bond() -{ - // let's now unlimit in terms of i_limit_tags - // we just run through all nlocal, looking for > limit_duration - // then we return i_limit_tag to 0 (which removes from dynamic group) - int flag, cols; - int index1 = atom->find_custom("limit_tags",flag,cols); - int *i_limit_tags = atom->ivector[index1]; - - int *i_statted_tags; - if (stabilization_flag == 1) { - int index2 = atom->find_custom(statted_id,flag,cols); - i_statted_tags = atom->ivector[index2]; - } - - int index3 = atom->find_custom("react_tags",flag,cols); - int *i_react_tags = atom->ivector[index3]; - - int unlimitflag = 0; - for (int i = 0; i < atom->nlocal; i++) { - // unlimit atoms for next step! this resolves # of procs disparity, mostly - // first '1': indexing offset, second '1': for next step - if (i_limit_tags[i] != 0 && (update->ntimestep + 1 - i_limit_tags[i]) > limit_duration[i_react_tags[i]]) { - unlimitflag = 1; - i_limit_tags[i] = 0; - if (stabilization_flag == 1) i_statted_tags[i] = 1; - i_react_tags[i] = 0; - } - } - - // really should only communicate this per-atom property, not entire reneighboring - MPI_Allreduce(MPI_IN_PLACE,&unlimitflag,1,MPI_INT,MPI_MAX,world); - if (unlimitflag) next_reneighbor = update->ntimestep; -} - -/* ---------------------------------------------------------------------- -check mega_glove for ghosts -if so, flag for broadcasting for perusal by all processors -------------------------------------------------------------------------- */ - -void FixBondReact::glove_ghostcheck() -{ - // here we add glove to either local_mega_glove or ghostly_mega_glove - // ghostly_mega_glove includes atoms that are ghosts, either of this proc or another - // 'ghosts of another' indication taken from comm->sendlist - // also includes local gloves that overlap with ghostly gloves, to get dedup right - - for (int i = 0; i < nreacts; i++) - local_rxn_count[i] = 0; - - for (int i = 0; i < my_num_mega; i++) { - rxnID = (int) my_mega_glove[0][i]; - onemol = atom->molecules[unreacted_mol[rxnID]]; - int ghostly = 0; - #if !defined(MPI_STUBS) - if (comm->style == Comm::BRICK) { - if (create_atoms_flag[rxnID] == 1) { - ghostly = 1; - } else { - for (int j = 0; j < onemol->natoms; j++) { - int ilocal = atom->map((tagint) my_mega_glove[j+cuff][i]); - if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) { - ghostly = 1; - break; - } - } - } - } else { - ghostly = 1; - } - #endif - - if (ghostly == 1) { - for (int j = 0; j < onemol->natoms+cuff; j++) { - ghostly_mega_glove[j][ghostly_num_mega] = my_mega_glove[j][i]; - } - ghostly_num_mega++; - } else { - local_rxn_count[rxnID]++; - for (int j = 0; j < onemol->natoms+cuff; j++) { - local_mega_glove[j][local_num_mega] = my_mega_glove[j][i]; - } - local_num_mega++; - } - } -} - -/* ---------------------------------------------------------------------- -broadcast entries of mega_glove which contain nonlocal atoms for perusal by all processors -------------------------------------------------------------------------- */ - -void FixBondReact::ghost_glovecast() -{ -#if !defined(MPI_STUBS) - const int nprocs = comm->nprocs; - - global_megasize = 0; - - int *allncols = new int[nprocs]; - for (int i = 0; i < nprocs; i++) - allncols[i] = 0; - MPI_Allgather(&ghostly_num_mega, 1, MPI_INT, allncols, 1, MPI_INT, world); - for (int i = 0; i < nprocs; i++) - global_megasize = global_megasize + allncols[i]; - - if (global_megasize == 0) { - delete [] allncols; - return; - } - - int *allstarts = new int[nprocs]; - - int start = 0; - for (int i = 0; i < comm->me; i++) { - start += allncols[i]; - } - MPI_Allgather(&start, 1, MPI_INT, allstarts, 1, MPI_INT, world); - MPI_Datatype columnunsized, column; - int sizes[2] = {max_natoms+cuff, global_megasize}; - int subsizes[2] = {max_natoms+cuff, 1}; - int starts[2] = {0,0}; - MPI_Type_create_subarray (2, sizes, subsizes, starts, MPI_ORDER_C, - MPI_DOUBLE, &columnunsized); - MPI_Type_create_resized (columnunsized, 0, sizeof(double), &column); - MPI_Type_commit(&column); - - memory->destroy(global_mega_glove); - memory->create(global_mega_glove,max_natoms+cuff,global_megasize,"bond/react:global_mega_glove"); - - for (int i = 0; i < max_natoms+cuff; i++) - for (int j = 0; j < global_megasize; j++) - global_mega_glove[i][j] = 0; - - if (ghostly_num_mega > 0) { - for (int i = 0; i < max_natoms+cuff; i++) { - for (int j = 0; j < ghostly_num_mega; j++) { - global_mega_glove[i][j+start] = ghostly_mega_glove[i][j]; - } - } - } - // let's send to root, dedup, then broadcast - if (comm->me == 0) { - MPI_Gatherv(MPI_IN_PLACE, ghostly_num_mega, column, // Note: some values ignored for MPI_IN_PLACE - &(global_mega_glove[0][0]), allncols, allstarts, - column, 0, world); - } else { - MPI_Gatherv(&(global_mega_glove[0][start]), ghostly_num_mega, column, - &(global_mega_glove[0][0]), allncols, allstarts, - column, 0, world); - } - - if (comm->me == 0) dedup_mega_gloves(GLOBAL); // global_mega_glove mode - MPI_Bcast(&global_megasize,1,MPI_INT,0,world); - MPI_Bcast(&(global_mega_glove[0][0]), global_megasize, column, 0, world); - - delete [] allstarts; - delete [] allncols; - - MPI_Type_free(&column); - MPI_Type_free(&columnunsized); -#endif -} - -/* ---------------------------------------------------------------------- -update molecule IDs, charges, types, special lists and all topology -------------------------------------------------------------------------- */ - -void FixBondReact::update_everything() -{ - int nlocal = atom->nlocal; // must be redefined after create atoms - int *type = atom->type; - int **nspecial = atom->nspecial; - tagint **special = atom->special; - - int **bond_type = atom->bond_type; - tagint **bond_atom = atom->bond_atom; - int *num_bond = atom->num_bond; - - // used when deleting atoms - int ndel,ndelone; - int *mark; - int nmark = nlocal; - memory->create(mark,nmark,"bond/react:mark"); - for (int i = 0; i < nmark; i++) mark[i] = 0; - - // flag used to delete special interactions - int *delflag; - memory->create(delflag,atom->maxspecial,"bond/react:delflag"); - - tagint *tag = atom->tag; - AtomVec *avec = atom->avec; - - // used when creating atoms - int inserted_atoms_flag = 0; - - // update atom->nbonds, etc. - // TODO: correctly tally with 'newton off' - int delta_bonds = 0; - int delta_angle = 0; - int delta_dihed = 0; - int delta_imprp = 0; - - // use the following per-atom arrays to keep track of reacting atoms - - int flag,cols; - int index1 = atom->find_custom("limit_tags",flag,cols); - int *i_limit_tags = atom->ivector[index1]; - - int *i_statted_tags; - if (stabilization_flag == 1) { - int index2 = atom->find_custom(statted_id,flag,cols); - i_statted_tags = atom->ivector[index2]; - } - - int index3 = atom->find_custom("react_tags",flag,cols); - int *i_react_tags = atom->ivector[index3]; - - // pass through twice - // redefining 'update_num_mega' and 'update_mega_glove' each time - // first pass: when glove is all local atoms - // second pass: search for local atoms in global_mega_glove - // add check for local atoms as well - - int update_num_mega; - tagint **update_mega_glove; - // for now, keeping rxnID in update_mega_glove, but not rest of cuff in update_mega_glove - int maxmega = MAX(local_num_mega,global_megasize); - memory->create(update_mega_glove,max_natoms+1,maxmega,"bond/react:update_mega_glove"); - - double *sim_total_charges; - if (rescale_charges_anyflag) memory->create(sim_total_charges,maxmega,"bond/react:sim_total_charges"); - - for (int pass = 0; pass < 2; pass++) { - update_num_mega = 0; - int *iskip = new int[nreacts]; - for (int i = 0; i < nreacts; i++) iskip[i] = 0; - if (pass == 0) { - for (int i = 0; i < local_num_mega; i++) { - rxnID = (int) local_mega_glove[0][i]; - // reactions already shuffled from dedup procedure, so can skip first N - if (iskip[rxnID]++ < nlocalskips[rxnID]) continue; - - // this will be overwritten if reaction skipped by create_atoms below - update_mega_glove[0][update_num_mega] = (tagint) local_mega_glove[0][i]; - for (int j = 0; j < max_natoms; j++) - update_mega_glove[j+1][update_num_mega] = (tagint) local_mega_glove[j+cuff][i]; - - // atoms inserted here for serial MPI_STUBS build only - if (create_atoms_flag[rxnID] == 1) { - onemol = atom->molecules[unreacted_mol[rxnID]]; - twomol = atom->molecules[reacted_mol[rxnID]]; - if (insert_atoms(update_mega_glove,update_num_mega)) { - inserted_atoms_flag = 1; - } else { // create aborted - reaction_count_total[rxnID]--; - continue; - } - } - - if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = local_mega_glove[1][i]; - update_num_mega++; - } - } else if (pass == 1) { - for (int i = 0; i < global_megasize; i++) { - rxnID = (int) global_mega_glove[0][i]; - // reactions already shuffled from dedup procedure, so can skip first N - if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue; - - // this will be overwritten if reaction skipped by create_atoms below - update_mega_glove[0][update_num_mega] = (tagint) global_mega_glove[0][i]; - for (int j = 0; j < max_natoms; j++) - update_mega_glove[j+1][update_num_mega] = (tagint) global_mega_glove[j+cuff][i]; - - // we can insert atoms here, now that reactions are finalized - // can't do it any earlier, due to skipped reactions (max_rxn) - // for MPI build, reactions that create atoms are always treated as 'global' - if (create_atoms_flag[rxnID] == 1) { - onemol = atom->molecules[unreacted_mol[rxnID]]; - twomol = atom->molecules[reacted_mol[rxnID]]; - if (insert_atoms(update_mega_glove,update_num_mega)) { - inserted_atoms_flag = 1; - } else { // create aborted - reaction_count_total[rxnID]--; - continue; - } - } - - if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = global_mega_glove[1][i]; - update_num_mega++; - } - } - delete [] iskip; - - if (update_num_mega == 0) continue; - - // if inserted atoms and global map exists, reset map now instead - // of waiting for comm since other pre-exchange fixes may use it - // invoke map_init() b/c atom count has grown - // do this once after all atom insertions - if (inserted_atoms_flag == 1 && atom->map_style != Atom::MAP_NONE) { - atom->map_init(); - atom->map_set(); - } - - // mark to-delete atoms - nlocal = atom->nlocal; - if (nlocal > nmark) { - memory->grow(mark,nlocal,"bond/react:mark"); - for (int i = nmark; i < nlocal; i++) mark[i] = 0; - nmark = nlocal; - } - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - onemol = atom->molecules[unreacted_mol[rxnID]]; - for (int j = 0; j < onemol->natoms; j++) { - int iatom = atom->map(update_mega_glove[j+1][i]); - if (delete_atoms[j][rxnID] == 1 && iatom >= 0 && iatom < nlocal) { - mark[iatom] = 1; - } - } - } - - // update charges and types of landlocked atoms - // also keep track of 'stabilization' groups here - int n_custom_charge; - double charge_rescale_addend; - for (int i = 0; i < update_num_mega; i++) { - charge_rescale_addend = 0; - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - if (rescale_charges_flag[rxnID]) { - n_custom_charge = rescale_charges_flag[rxnID]; - charge_rescale_addend = (sim_total_charges[i]-mol_total_charge[rxnID])/n_custom_charge; - } - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - int ilocal = atom->map(update_mega_glove[jj+1][i]); - if (ilocal >= 0 && ilocal < nlocal) { - - // update->ntimestep could be 0. so add 1 throughout - i_limit_tags[ilocal] = update->ntimestep + 1; - if (stabilization_flag == 1) i_statted_tags[ilocal] = 0; - i_react_tags[ilocal] = rxnID; - - if (landlocked_atoms[j][rxnID] == 1) - type[ilocal] = twomol->type[j]; - if (twomol->qflag && atom->q_flag && custom_charges[jj][rxnID] == 1) { - double *q = atom->q; - q[ilocal] = twomol->q[j]+charge_rescale_addend; - } - } - } - } - - int insert_num; - // very nice and easy to completely overwrite special bond info for landlocked atoms - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - onemol = atom->molecules[unreacted_mol[rxnID]]; - twomol = atom->molecules[reacted_mol[rxnID]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - int ilocal = atom->map(update_mega_glove[jj+1][i]); - if (ilocal < nlocal && ilocal >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - for (int k = 0; k < 3; k++) { - nspecial[ilocal][k] = twomol->nspecial[j][k]; - } - for (int p = 0; p < twomol->nspecial[j][2]; p++) { - special[ilocal][p] = update_mega_glove[equivalences[twomol->special[j][p]-1][1][rxnID]][i]; - } - } - // now delete and replace landlocked atoms from non-landlocked atoms' special info - // delete 1-2, 1-3, 1-4 specials individually. only delete if special exists in pre-reaction template - if (landlocked_atoms[j][rxnID] == 0) { - int ispec, fspec, imolspec, fmolspec, nspecdel[3]; - for (int k = 0; k < 3; k++) nspecdel[k] = 0; - for (int k = 0; k < atom->maxspecial; k++) delflag[k] = 0; - for (int specn = 0; specn < 3; specn++) { - if (specn == 0) { - imolspec = 0; - ispec = 0; - } else { - imolspec = onemol->nspecial[jj][specn-1]; - ispec = nspecial[ilocal][specn-1]; - } - fmolspec = onemol->nspecial[jj][specn]; - fspec = nspecial[ilocal][specn]; - for (int k = ispec; k < fspec; k++) { - for (int p = imolspec; p < fmolspec; p++) { - if (update_mega_glove[onemol->special[jj][p]][i] == special[ilocal][k]) { - delflag[k] = 1; - for (int m = 2; m >= specn; m--) nspecdel[m]++; - break; - } - } - } - } - int incr = 0; - for (int k = 0; k < nspecial[ilocal][2]; k++) - if (delflag[k] == 0) special[ilocal][incr++] = special[ilocal][k]; - for (int m = 0; m < 3; m++) nspecial[ilocal][m] -= nspecdel[m]; - // now reassign from reacted template - for (int k = 0; k < twomol->nspecial[j][2]; k++) { - if (k > twomol->nspecial[j][1] - 1) { - insert_num = nspecial[ilocal][2]++; - } else if (k > twomol->nspecial[j][0] - 1) { - insert_num = nspecial[ilocal][1]++; - nspecial[ilocal][2]++; - } else { - insert_num = nspecial[ilocal][0]++; - nspecial[ilocal][1]++; - nspecial[ilocal][2]++; - } - if (nspecial[ilocal][2] > atom->maxspecial) - error->one(FLERR,"Fix bond/react special bond generation overflow"); - for (int n = nspecial[ilocal][2]-1; n > insert_num; n--) { - special[ilocal][n] = special[ilocal][n-1]; - } - special[ilocal][insert_num] = update_mega_glove[equivalences[twomol->special[j][k]-1][1][rxnID]][i]; - } - } - } - } - } - - // next let's update bond info - // cool thing is, newton_bond issues are already taken care of in templates - // same with class2 improper issues, which is why this fix started in the first place - // also need to find any instances of bond history to update histories - auto histories = modify->get_fix_by_style("BOND_HISTORY"); - int n_histories = histories.size(); - - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - // let's first delete all bond info about landlocked atoms - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - delta_bonds -= num_bond[atom->map(update_mega_glove[jj+1][i])]; - // If deleting all bonds, first cache then remove all histories - if (n_histories > 0) - for (auto &ihistory: histories) { - for (int n = 0; n < num_bond[atom->map(update_mega_glove[jj+1][i])]; n++) - dynamic_cast(ihistory)->cache_history(atom->map(update_mega_glove[jj+1][i]), n); - for (int n = 0; n < num_bond[atom->map(update_mega_glove[jj+1][i])]; n++) - dynamic_cast(ihistory)->delete_history(atom->map(update_mega_glove[jj+1][i]), 0); - } - num_bond[atom->map(update_mega_glove[jj+1][i])] = 0; - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = num_bond[atom->map(update_mega_glove[jj+1][i])]-1; p > -1 ; p--) { - for (int n = 0; n < twomol->natoms; n++) { - int nn = equivalences[n][1][rxnID]-1; - if (n!=j && bond_atom[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] && landlocked_atoms[n][rxnID] == 1) { - // Cache history information, shift history, then delete final element - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->cache_history(atom->map(update_mega_glove[jj+1][i]), p); - for (int m = p; m < num_bond[atom->map(update_mega_glove[jj+1][i])]-1; m++) { - bond_type[atom->map(update_mega_glove[jj+1][i])][m] = bond_type[atom->map(update_mega_glove[jj+1][i])][m+1]; - bond_atom[atom->map(update_mega_glove[jj+1][i])][m] = bond_atom[atom->map(update_mega_glove[jj+1][i])][m+1]; - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->shift_history(atom->map(update_mega_glove[jj+1][i]),m,m+1); - } - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->delete_history(atom->map(update_mega_glove[jj+1][i]), - num_bond[atom->map(update_mega_glove[jj+1][i])]-1); - num_bond[atom->map(update_mega_glove[jj+1][i])]--; - delta_bonds--; - } - } - } - } - } - } - // now let's add the new bond info. - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_bond[atom->map(update_mega_glove[jj+1][i])] = twomol->num_bond[j]; - delta_bonds += twomol->num_bond[j]; - for (int p = 0; p < twomol->num_bond[j]; p++) { - bond_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->bond_type[j][p]; - bond_atom[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->bond_atom[j][p]-1][1][rxnID]][i]; - // Check cached history data to see if bond regenerated - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->check_cache(atom->map(update_mega_glove[jj+1][i]), p); - } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_bond[j]; p++) { - if (landlocked_atoms[twomol->bond_atom[j][p]-1][rxnID] == 1) { - insert_num = num_bond[atom->map(update_mega_glove[jj+1][i])]; - bond_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->bond_type[j][p]; - bond_atom[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->bond_atom[j][p]-1][1][rxnID]][i]; - // Check cached history data to see if bond regenerated - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->check_cache(atom->map(update_mega_glove[jj+1][i]), insert_num); - num_bond[atom->map(update_mega_glove[jj+1][i])]++; - if (num_bond[atom->map(update_mega_glove[jj+1][i])] > atom->bond_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_bonds++; - } - } - } - } - } - } - - if (n_histories > 0) - for (auto &ihistory: histories) - dynamic_cast(ihistory)->clear_cache(); - - // Angles! First let's delete all angle info: - if (force->angle) { - int *num_angle = atom->num_angle; - int **angle_type = atom->angle_type; - tagint **angle_atom1 = atom->angle_atom1; - tagint **angle_atom2 = atom->angle_atom2; - tagint **angle_atom3 = atom->angle_atom3; - - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - delta_angle -= num_angle[atom->map(update_mega_glove[jj+1][i])]; - num_angle[atom->map(update_mega_glove[jj+1][i])] = 0; - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = num_angle[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { - for (int n = 0; n < twomol->natoms; n++) { - int nn = equivalences[n][1][rxnID]-1; - if (n!=j && landlocked_atoms[n][rxnID] == 1 && - (angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { - for (int m = p; m < num_angle[atom->map(update_mega_glove[jj+1][i])]-1; m++) { - angle_type[atom->map(update_mega_glove[jj+1][i])][m] = angle_type[atom->map(update_mega_glove[jj+1][i])][m+1]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][m] = angle_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; - } - num_angle[atom->map(update_mega_glove[jj+1][i])]--; - delta_angle--; - break; - } - } - } - } - } - } - // now let's add the new angle info. - if (twomol->angleflag) { - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; - delta_angle += twomol->num_angle[j]; - for (int p = 0; p < twomol->num_angle[j]; p++) { - angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; - } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_angle[j]; p++) { - if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { - insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; - angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; - num_angle[atom->map(update_mega_glove[jj+1][i])]++; - if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_angle++; - } - } - } - } - } - } - } - } - - // Dihedrals! first let's delete all dihedral info for landlocked atoms - if (force->dihedral) { - int *num_dihedral = atom->num_dihedral; - int **dihedral_type = atom->dihedral_type; - tagint **dihedral_atom1 = atom->dihedral_atom1; - tagint **dihedral_atom2 = atom->dihedral_atom2; - tagint **dihedral_atom3 = atom->dihedral_atom3; - tagint **dihedral_atom4 = atom->dihedral_atom4; - - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - delta_dihed -= num_dihedral[atom->map(update_mega_glove[jj+1][i])]; - num_dihedral[atom->map(update_mega_glove[jj+1][i])] = 0; - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = num_dihedral[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { - for (int n = 0; n < twomol->natoms; n++) { - int nn = equivalences[n][1][rxnID]-1; - if (n!=j && landlocked_atoms[n][rxnID] == 1 && - (dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { - for (int m = p; m < num_dihedral[atom->map(update_mega_glove[jj+1][i])]-1; m++) { - dihedral_type[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_type[atom->map(update_mega_glove[jj+1][i])][m+1]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][m] = dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][m+1]; - } - num_dihedral[atom->map(update_mega_glove[jj+1][i])]--; - delta_dihed--; - break; - } - } - } - } - } - } - // now let's add new dihedral info - if (twomol->dihedralflag) { - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; - delta_dihed += twomol->num_dihedral[j]; - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; - } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; - dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; - num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; - if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_dihed++; - } - } - } - } - } - } - } - } - - // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms - if (force->improper) { - int *num_improper = atom->num_improper; - int **improper_type = atom->improper_type; - tagint **improper_atom1 = atom->improper_atom1; - tagint **improper_atom2 = atom->improper_atom2; - tagint **improper_atom3 = atom->improper_atom3; - tagint **improper_atom4 = atom->improper_atom4; - - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - delta_imprp -= num_improper[atom->map(update_mega_glove[jj+1][i])]; - num_improper[atom->map(update_mega_glove[jj+1][i])] = 0; - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = num_improper[atom->map(update_mega_glove[jj+1][i])]-1; p > -1; p--) { - for (int n = 0; n < twomol->natoms; n++) { - int nn = equivalences[n][1][rxnID]-1; - if (n!=j && landlocked_atoms[n][rxnID] == 1 && - (improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i] || - improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] == update_mega_glove[nn+1][i])) { - for (int m = p; m < num_improper[atom->map(update_mega_glove[jj+1][i])]-1; m++) { - improper_type[atom->map(update_mega_glove[jj+1][i])][m] = improper_type[atom->map(update_mega_glove[jj+1][i])][m+1]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom1[atom->map(update_mega_glove[jj+1][i])][m+1]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom2[atom->map(update_mega_glove[jj+1][i])][m+1]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom3[atom->map(update_mega_glove[jj+1][i])][m+1]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][m] = improper_atom4[atom->map(update_mega_glove[jj+1][i])][m+1]; - } - num_improper[atom->map(update_mega_glove[jj+1][i])]--; - delta_imprp--; - break; - } - } - } - } - } - } - // now let's add new improper info - if (twomol->improperflag) { - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; - delta_imprp += twomol->num_improper[j]; - for (int p = 0; p < twomol->num_improper[j]; p++) { - improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; - } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_improper[j]; p++) { - if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; - improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; - num_improper[atom->map(update_mega_glove[jj+1][i])]++; - if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_imprp++; - } - } - } - } - } - } - } - } - - } - - memory->destroy(update_mega_glove); - if (rescale_charges_anyflag) memory->destroy(sim_total_charges); - - // delete atoms. taken from fix_evaporate. but don't think it needs to be in pre_exchange - // loop in reverse order to avoid copying marked atoms - ndel = ndelone = 0; - for (int i = atom->nlocal-1; i >= 0; i--) { - if (mark[i] == 1) { - avec->copy(atom->nlocal-1,i,1); - atom->nlocal--; - ndelone++; - - if (atom->avec->bonds_allow) { - if (force->newton_bond) delta_bonds += atom->num_bond[i]; - else { - for (int j = 0; j < atom->num_bond[i]; j++) { - if (tag[i] < atom->bond_atom[i][j]) delta_bonds++; - } - } - } - if (atom->avec->angles_allow) { - if (force->newton_bond) delta_angle += atom->num_angle[i]; - else { - for (int j = 0; j < atom->num_angle[i]; j++) { - int m = atom->map(atom->angle_atom2[i][j]); - if (m >= 0 && m < nlocal) delta_angle++; - } - } - } - if (atom->avec->dihedrals_allow) { - if (force->newton_bond) delta_dihed += atom->num_dihedral[i]; - else { - for (int j = 0; j < atom->num_dihedral[i]; j++) { - int m = atom->map(atom->dihedral_atom2[i][j]); - if (m >= 0 && m < nlocal) delta_dihed++; - } - } - } - if (atom->avec->impropers_allow) { - if (force->newton_bond) delta_imprp += atom->num_improper[i]; - else { - for (int j = 0; j < atom->num_improper[i]; j++) { - int m = atom->map(atom->improper_atom2[i][j]); - if (m >= 0 && m < nlocal) delta_imprp++; - } - } - } - } - } - memory->destroy(mark); - memory->destroy(delflag); - - MPI_Allreduce(&ndelone,&ndel,1,MPI_INT,MPI_SUM,world); - - atom->natoms -= ndel; - // done deleting atoms - - // reset mol ids - if (reset_mol_ids_flag) reset_mol_ids->reset(); - - // something to think about: this could done much more concisely if - // all atom-level info (bond,angles, etc...) were kinda inherited from a common data struct --JG - - int Tdelta_bonds; - MPI_Allreduce(&delta_bonds,&Tdelta_bonds,1,MPI_INT,MPI_SUM,world); - atom->nbonds += Tdelta_bonds; - - int Tdelta_angle; - MPI_Allreduce(&delta_angle,&Tdelta_angle,1,MPI_INT,MPI_SUM,world); - atom->nangles += Tdelta_angle; - - int Tdelta_dihed; - MPI_Allreduce(&delta_dihed,&Tdelta_dihed,1,MPI_INT,MPI_SUM,world); - atom->ndihedrals += Tdelta_dihed; - - int Tdelta_imprp; - MPI_Allreduce(&delta_imprp,&Tdelta_imprp,1,MPI_INT,MPI_SUM,world); - atom->nimpropers += Tdelta_imprp; - - if (ndel && (atom->map_style != Atom::MAP_NONE)) { - atom->nghost = 0; - atom->map_init(); - atom->map_set(); - } -} - -/* ---------------------------------------------------------------------- -insert created atoms -------------------------------------------------------------------------- */ - -int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) -{ - // inserting atoms based off fix_deposit->pre_exchange - int flag; - imageint *imageflags; - double **coords,lamda[3],rotmat[3][3]; - double *newcoord; - double **v = atom->v; - double t,delx,dely,delz,rsq; - - memory->create(coords,twomol->natoms,3,"bond/react:coords"); - memory->create(imageflags,twomol->natoms,"bond/react:imageflags"); - - double *sublo,*subhi; - if (domain->triclinic == 0) { - sublo = domain->sublo; - subhi = domain->subhi; - } else { - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - // find current max atom and molecule IDs - tagint *tag = atom->tag; - double **x = atom->x; - tagint *molecule = atom->molecule; - int nlocal = atom->nlocal; - - tagint maxtag_all,maxmol_all; - tagint max = 0; - for (int i = 0; i < nlocal; i++) max = MAX(max,tag[i]); - MPI_Allreduce(&max,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world); - - max = 0; - for (int i = 0; i < nlocal; i++) max = MAX(max,molecule[i]); - MPI_Allreduce(&max,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world); - - int dimension = domain->dimension; - - // only proc that owns reacting atom (use ibonding), - // fits post-reaction template to reaction site, for creating atoms - int n2superpose = 0; - for (int j = 0; j < twomol->natoms; j++) { - if (modify_create_fragid[rxnID] >= 0) - if (!twomol->fragmentmask[modify_create_fragid[rxnID]][j]) continue; - if (!create_atoms[j][rxnID] && !delete_atoms[equivalences[j][1][rxnID]][rxnID]) - n2superpose++; - } - - int ifit = atom->map(my_update_mega_glove[ibonding[rxnID]+1][iupdate]); // use this local ID to find fitting proc - Superpose3D superposer(n2superpose); - int fitroot = 0; - if (ifit >= 0 && ifit < atom->nlocal) { - fitroot = comm->me; - - // get 'temperatere' averaged over site, used for created atoms' vels - // note: row_offset for my_update_mega_glove is unity, not 'cuff' - t = get_temperature(my_update_mega_glove,1,iupdate); - - double **xfrozen; // coordinates for the "frozen" target molecule - double **xmobile; // coordinates for the "mobile" molecule - memory->create(xfrozen,n2superpose,3,"bond/react:xfrozen"); - memory->create(xmobile,n2superpose,3,"bond/react:xmobile"); - tagint iatom; - tagint iref = -1; // choose first atom as reference - int fit_incr = 0; - for (int j = 0; j < twomol->natoms; j++) { - if (modify_create_fragid[rxnID] >= 0) - if (!twomol->fragmentmask[modify_create_fragid[rxnID]][j]) continue; - int ipre = equivalences[j][1][rxnID]-1; // equiv pre-reaction template index - if (!create_atoms[j][rxnID] && !delete_atoms[ipre][rxnID]) { - if (atom->map(my_update_mega_glove[ipre+1][iupdate]) < 0) { - error->warning(FLERR," eligible atoms skipped for created-atoms fit on rank {}\n", - comm->me); - continue; - } - iatom = atom->map(my_update_mega_glove[ipre+1][iupdate]); - if (iref == -1) iref = iatom; - iatom = domain->closest_image(iref,iatom); - for (int k = 0; k < 3; k++) { - xfrozen[fit_incr][k] = x[iatom][k]; - xmobile[fit_incr][k] = twomol->x[j][k]; - } - fit_incr++; - } - } - superposer.Superpose(xfrozen, xmobile); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - rotmat[i][j] = superposer.R[i][j]; - memory->destroy(xfrozen); - memory->destroy(xmobile); - } - MPI_Allreduce(MPI_IN_PLACE,&fitroot,1,MPI_INT,MPI_SUM,world); - MPI_Bcast(&t,1,MPI_DOUBLE,fitroot,world); - - // get coordinates and image flags - for (int m = 0; m < twomol->natoms; m++) { - if (create_atoms[m][rxnID] == 1) { - // apply optimal rotation/translation for created atom coords - // also map coords back into simulation box - if (fitroot == comm->me) { - MathExtra::matvec(rotmat,twomol->x[m],coords[m]); - for (int i = 0; i < 3; i++) coords[m][i] += superposer.T[i]; - imageflags[m] = atom->image[ifit]; - domain->remap(coords[m],imageflags[m]); - } - MPI_Bcast(&imageflags[m],1,MPI_LMP_IMAGEINT,fitroot,world); - MPI_Bcast(coords[m],3,MPI_DOUBLE,fitroot,world); - } - } - - // check distance between any existing atom and inserted atom - // if less than near, abort - if (overlapsq[rxnID] > 0) { - int abortflag = 0; - for (int m = 0; m < twomol->natoms; m++) { - if (create_atoms[m][rxnID] == 1) { - for (int i = 0; i < nlocal; i++) { - delx = coords[m][0] - x[i][0]; - dely = coords[m][1] - x[i][1]; - delz = coords[m][2] - x[i][2]; - domain->minimum_image(delx,dely,delz); - rsq = delx*delx + dely*dely + delz*delz; - if (rsq < overlapsq[rxnID]) { - abortflag = 1; - break; - } - } - if (abortflag) break; - } - } - MPI_Allreduce(MPI_IN_PLACE,&abortflag,1,MPI_INT,MPI_MAX,world); - if (abortflag) { - memory->destroy(coords); - memory->destroy(imageflags); - return 0; - } - } - - // clear ghost count and any ghost bonus data internal to AtomVec - // same logic as beginning of Comm::exchange() - // do it now b/c inserting atoms will overwrite ghost atoms - atom->nghost = 0; - atom->avec->clear_bonus(); - - // check if new atoms are in my sub-box or above it if I am highest proc - // if so, add atom to my list via create_atom() - // initialize additional info about the atoms - // set group mask to "all" plus fix group - int preID; // new equivalences index - int add_count = 0; - for (int m = 0; m < twomol->natoms; m++) { - if (create_atoms[m][rxnID] == 1) { - // increase atom count - add_count++; - preID = onemol->natoms+add_count; - - if (domain->triclinic) { - domain->x2lamda(coords[m],lamda); - newcoord = lamda; - } else newcoord = coords[m]; - - flag = 0; - if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && - newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] && - newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1; - else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) { - if (comm->layout != Comm::LAYOUT_TILED) { - if (comm->myloc[2] == comm->procgrid[2]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && - newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; - } else { - if (comm->mysplit[2][1] == 1.0 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && - newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; - } - } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) { - if (comm->layout != Comm::LAYOUT_TILED) { - if (comm->myloc[1] == comm->procgrid[1]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; - } else { - if (comm->mysplit[1][1] == 1.0 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; - } - } - - int root = 0; - if (flag) { - root = comm->me; - - atom->avec->create_atom(twomol->type[m],coords[m]); - int n = atom->nlocal - 1; - atom->tag[n] = maxtag_all + add_count; - - // locally update mega_glove - my_update_mega_glove[preID][iupdate] = atom->tag[n]; - - if (atom->molecule_flag) { - if (twomol->moleculeflag) { - atom->molecule[n] = maxmol_all + twomol->molecule[m]; - } else { - atom->molecule[n] = maxmol_all + 1; - } - } - - atom->mask[n] = 1 | groupbit; - atom->image[n] = imageflags[m]; - - // guess a somewhat reasonable initial velocity based on reaction site - // further control is possible using bond_react_MASTER_group - // compute |velocity| corresponding to a given temperature t, using specific atom's mass - double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]]; - double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass); - v[n][0] = random[rxnID]->uniform(); - v[n][1] = random[rxnID]->uniform(); - v[n][2] = random[rxnID]->uniform(); - double vnorm = sqrt(v[n][0]*v[n][0] + v[n][1]*v[n][1] + v[n][2]*v[n][2]); - v[n][0] = v[n][0]/vnorm*vtnorm; - v[n][1] = v[n][1]/vnorm*vtnorm; - v[n][2] = v[n][2]/vnorm*vtnorm; - modify->create_attribute(n); - } - // globally update mega_glove and equivalences - MPI_Allreduce(MPI_IN_PLACE,&root,1,MPI_INT,MPI_SUM,world); - MPI_Bcast(&my_update_mega_glove[preID][iupdate],1,MPI_LMP_TAGINT,root,world); - equivalences[m][0][rxnID] = m+1; - equivalences[m][1][rxnID] = preID; - reverse_equiv[preID-1][0][rxnID] = preID; - reverse_equiv[preID-1][1][rxnID] = m+1; - } - } - - // reset global natoms here - // reset atom map elsewhere, after all calls to 'insert_atoms' - atom->natoms += add_count; - if (atom->natoms < 0) - error->all(FLERR,"Too many total atoms"); - maxtag_all += add_count; - if (maxtag_all >= MAXTAGINT) - error->all(FLERR,"New atom IDs exceed maximum allowed ID"); - // atom creation successful - memory->destroy(coords); - memory->destroy(imageflags); - return 1; -} - -/* ---------------------------------------------------------------------- -add equal-style variable to keyword argument list -------------------------------------------------------------------------- */ - -void FixBondReact::read_variable_keyword(const char *myarg, int keyword, int myrxn) -{ - var_id[keyword][myrxn] = input->variable->find(myarg); - if (var_id[keyword][myrxn] < 0) - error->all(FLERR,"Fix bond/react: Variable name {} does not exist",myarg); - if (!input->variable->equalstyle(var_id[keyword][myrxn])) - error->all(FLERR,"Fix bond/react: Variable {} is not equal-style",myarg); - var_flag[keyword][myrxn] = 1; -} - -/* ---------------------------------------------------------------------- -read map file -------------------------------------------------------------------------- */ - -void FixBondReact::read_map_file(int myrxn) -{ - int rv; - char line[MAXLINE] = {'\0'}; - char keyword[MAXLINE] = {'\0'}; - char *eof,*ptr; - - // skip 1st line of file - eof = fgets(line,MAXLINE,fp); - if (eof == nullptr) error->one(FLERR,"Fix bond/react: Unexpected end of superimpose file"); - - // read header lines - // skip blank lines or lines that start with "#" - // stop when read an unrecognized line - - ncreate = 0; - while (true) { - - readline(line); - - // trim anything from '#' onward - // if line is blank, continue - - if ((ptr = strchr(line,'#'))) *ptr = '\0'; - if (strspn(line," \t\n\r") == strlen(line)) continue; - - if (strstr(line,"edgeIDs")) sscanf(line,"%d",&nedge); - else if (strstr(line,"equivalences")) { - rv = sscanf(line,"%d",&nequivalent); - if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); - if (nequivalent != onemol->natoms) - error->one(FLERR,"Fix bond/react: Number of equivalences in map file must " - "equal number of atoms in reaction templates"); - } - else if (strstr(line,"deleteIDs")) { - rv = sscanf(line,"%d",&ndelete); - if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); - } else if (strstr(line,"createIDs")) { - rv = sscanf(line,"%d",&ncreate); - if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); - } else if (strstr(line,"chiralIDs")) { - rv = sscanf(line,"%d",&nchiral); - if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); - } else if (strstr(line,"constraints")) { - rv = sscanf(line,"%d",&nconstraints[myrxn]); - if (rv != 1) error->one(FLERR, "Map file header is incorrectly formatted"); - if (maxnconstraints < nconstraints[myrxn]) maxnconstraints = nconstraints[myrxn]; - constraints.resize(maxnconstraints, std::vector(nreacts)); - } else break; - } - - // grab keyword and skip next line - - parse_keyword(0,line,keyword); - readline(line); - - // loop over sections of superimpose file - - int equivflag = 0, bondflag = 0; - while (strlen(keyword)) { - if (strcmp(keyword,"InitiatorIDs") == 0 || strcmp(keyword,"BondingIDs") == 0) { - if (strcmp(keyword,"BondingIDs") == 0) - if (comm->me == 0) error->warning(FLERR,"Fix bond/react: The BondingIDs section title has been deprecated. Please use InitiatorIDs instead."); - bondflag = 1; - readline(line); - rv = sscanf(line,"%d",&ibonding[myrxn]); - if (rv != 1) error->one(FLERR, "InitiatorIDs section is incorrectly formatted"); - if (ibonding[myrxn] > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - readline(line); - rv = sscanf(line,"%d",&jbonding[myrxn]); - if (rv != 1) error->one(FLERR, "InitiatorIDs section is incorrectly formatted"); - if (jbonding[myrxn] > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - } else if (strcmp(keyword,"EdgeIDs") == 0) { - EdgeIDs(line, myrxn); - } else if (strcmp(keyword,"Equivalences") == 0) { - equivflag = 1; - Equivalences(line, myrxn); - } else if (strcmp(keyword,"DeleteIDs") == 0) { - DeleteAtoms(line, myrxn); - } else if (strcmp(keyword,"CreateIDs") == 0) { - CreateAtoms(line, myrxn); - } else if (strcmp(keyword,"ChiralIDs") == 0) { - ChiralCenters(line, myrxn); - } else if (strcmp(keyword,"Constraints") == 0) { - ReadConstraints(line, myrxn); - } else error->one(FLERR,"Fix bond/react: Unknown section in map file"); - - parse_keyword(1,line,keyword); - - } - - // error check - if (bondflag == 0 || equivflag == 0) - error->all(FLERR,"Fix bond/react: Map file missing InitiatorIDs or Equivalences section\n"); -} - -void FixBondReact::EdgeIDs(char *line, int myrxn) -{ - // puts a 1 at edge(edgeID) - - int tmp,rv; - for (int i = 0; i < nedge; i++) { - readline(line); - rv = sscanf(line,"%d",&tmp); - if (rv != 1) error->one(FLERR, "EdgeIDs section is incorrectly formatted"); - if (tmp > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - edge[tmp-1][myrxn] = 1; - } -} - -void FixBondReact::Equivalences(char *line, int myrxn) -{ - int tmp1,tmp2,rv; - for (int i = 0; i < nequivalent; i++) { - readline(line); - rv = sscanf(line,"%d %d",&tmp1,&tmp2); - if (rv != 2) error->one(FLERR, "Equivalences section is incorrectly formatted"); - if (tmp1 > onemol->natoms || tmp2 > twomol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - //equivalences is-> clmn 1: post-reacted, clmn 2: pre-reacted - equivalences[tmp2-1][0][myrxn] = tmp2; - equivalences[tmp2-1][1][myrxn] = tmp1; - //reverse_equiv is-> clmn 1: pre-reacted, clmn 2: post-reacted - reverse_equiv[tmp1-1][0][myrxn] = tmp1; - reverse_equiv[tmp1-1][1][myrxn] = tmp2; - } -} - -void FixBondReact::DeleteAtoms(char *line, int myrxn) -{ - int tmp,rv; - for (int i = 0; i < ndelete; i++) { - readline(line); - rv = sscanf(line,"%d",&tmp); - if (rv != 1) error->one(FLERR, "DeleteIDs section is incorrectly formatted"); - if (tmp > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - delete_atoms[tmp-1][myrxn] = 1; - } -} - -void FixBondReact::CreateAtoms(char *line, int myrxn) -{ - create_atoms_flag[myrxn] = 1; - int tmp,rv; - for (int i = 0; i < ncreate; i++) { - readline(line); - rv = sscanf(line,"%d",&tmp); - if (rv != 1) error->one(FLERR, "CreateIDs section is incorrectly formatted"); - create_atoms[tmp-1][myrxn] = 1; - } - if (twomol->xflag == 0) - error->one(FLERR,"Fix bond/react: 'Coords' section required in post-reaction template when creating new atoms"); -} - -void FixBondReact::CustomCharges(int ifragment, int myrxn) -{ - for (int i = 0; i < onemol->natoms; i++) - if (onemol->fragmentmask[ifragment][i]) - custom_charges[i][myrxn] = 1; - else - custom_charges[i][myrxn] = 0; -} - -void FixBondReact::ChiralCenters(char *line, int myrxn) -{ - int tmp,rv; - for (int i = 0; i < nchiral; i++) { - readline(line); - rv = sscanf(line,"%d",&tmp); - if (rv != 1) error->one(FLERR, "ChiralIDs section is incorrectly formatted"); - if (tmp > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID in map file"); - chiral_atoms[tmp-1][0][myrxn] = 1; - if (onemol->xflag == 0) - error->one(FLERR,"Fix bond/react: Molecule template 'Coords' section required for chiralIDs keyword"); - if ((int) onemol_nxspecial[tmp-1][0] != 4) - error->one(FLERR,"Fix bond/react: Chiral atoms must have exactly four first neighbors"); - for (int j = 0; j < 4; j++) { - for (int k = j+1; k < 4; k++) { - if (onemol->type[onemol_xspecial[tmp-1][j]-1] == - onemol->type[onemol_xspecial[tmp-1][k]-1]) - error->one(FLERR,"Fix bond/react: First neighbors of chiral atoms must be of mutually different types"); - } - } - // record order of atom types, and coords - double my4coords[12]; - for (int j = 0; j < 4; j++) { - chiral_atoms[tmp-1][j+2][myrxn] = onemol->type[onemol_xspecial[tmp-1][j]-1]; - for (int k = 0; k < 3; k++) { - my4coords[3*j+k] = onemol->x[onemol_xspecial[tmp-1][j]-1][k]; - } - } - // get orientation - chiral_atoms[tmp-1][1][myrxn] = get_chirality(my4coords); - } -} - -void FixBondReact::ReadConstraints(char *line, int myrxn) -{ - int rv; - double tmp[MAXCONARGS]; - char **strargs,*ptr,*lptr; - memory->create(strargs,MAXCONARGS,MAXLINE,"bond/react:strargs"); - auto constraint_type = new char[MAXLINE]; - strcpy(constraintstr[myrxn],"("); // string for boolean constraint logic - for (int i = 0; i < nconstraints[myrxn]; i++) { - readline(line); - // find left parentheses, add to constraintstr, and update line - for (int j = 0; j < (int)strlen(line); j++) { - if (line[j] == '(') strcat(constraintstr[myrxn],"("); - if (isalpha(line[j])) { - line = line + j; - break; - } - } - // 'C' indicates where to sub in next constraint - strcat(constraintstr[myrxn],"C"); - // special consideration for 'custom' constraint - // find final double quote, or skip two words - lptr = line; - if ((ptr = strrchr(lptr,'\"'))) lptr = ptr+1; - else { - while (lptr[0] != ' ') lptr++; // skip first 'word' - while (lptr[0] == ' ' || lptr[0] == '\t') lptr++; // skip blanks - while (lptr[0] != ' ') lptr++; // skip second 'word' - } - // find right parentheses - for (int j = 0; j < (int)strlen(lptr); j++) - if (lptr[j] == ')') strcat(constraintstr[myrxn],")"); - // find logic symbols, and trim line via ptr - if ((ptr = strstr(lptr,"&&"))) { - strcat(constraintstr[myrxn],"&&"); - *ptr = '\0'; - } else if ((ptr = strstr(lptr,"||"))) { - strcat(constraintstr[myrxn],"||"); - *ptr = '\0'; - } else if (i+1 < nconstraints[myrxn]) { - strcat(constraintstr[myrxn],"&&"); - } - if ((ptr = strchr(lptr,')'))) - *ptr = '\0'; - rv = sscanf(line,"%s",constraint_type); - if (rv != 1) error->one(FLERR, "Constraints section is incorrectly formatted"); - if (strcmp(constraint_type,"distance") == 0) { - constraints[i][myrxn].type = DISTANCE; - rv = sscanf(line,"%*s %s %s %lg %lg",strargs[0],strargs[1],&tmp[0],&tmp[1]); - if (rv != 4) error->one(FLERR, "Distance constraint is incorrectly formatted"); - readID(strargs[0], i, myrxn, 0); - readID(strargs[1], i, myrxn, 1); - // cutoffs - constraints[i][myrxn].par[0] = tmp[0]*tmp[0]; // using square of distance - constraints[i][myrxn].par[1] = tmp[1]*tmp[1]; - } else if (strcmp(constraint_type,"angle") == 0) { - constraints[i][myrxn].type = ANGLE; - rv = sscanf(line,"%*s %s %s %s %lg %lg",strargs[0],strargs[1],strargs[2],&tmp[0],&tmp[1]); - if (rv != 5) error->one(FLERR, "Angle constraint is incorrectly formatted"); - readID(strargs[0], i, myrxn, 0); - readID(strargs[1], i, myrxn, 1); - readID(strargs[2], i, myrxn, 2); - constraints[i][myrxn].par[0] = tmp[0]/180.0 * MY_PI; - constraints[i][myrxn].par[1] = tmp[1]/180.0 * MY_PI; - } else if (strcmp(constraint_type,"dihedral") == 0) { - constraints[i][myrxn].type = DIHEDRAL; - tmp[2] = 181.0; // impossible range - tmp[3] = 182.0; - rv = sscanf(line,"%*s %s %s %s %s %lg %lg %lg %lg",strargs[0],strargs[1], - strargs[2],strargs[3],&tmp[0],&tmp[1],&tmp[2],&tmp[3]); - if (rv != 6 && rv != 8) error->one(FLERR, "Dihedral constraint is incorrectly formatted"); - readID(strargs[0], i, myrxn, 0); - readID(strargs[1], i, myrxn, 1); - readID(strargs[2], i, myrxn, 2); - readID(strargs[3], i, myrxn, 3); - constraints[i][myrxn].par[0] = tmp[0]/180.0 * MY_PI; - constraints[i][myrxn].par[1] = tmp[1]/180.0 * MY_PI; - constraints[i][myrxn].par[2] = tmp[2]/180.0 * MY_PI; - constraints[i][myrxn].par[3] = tmp[3]/180.0 * MY_PI; - } else if (strcmp(constraint_type,"arrhenius") == 0) { - constraints[i][myrxn].type = ARRHENIUS; - constraints[i][myrxn].par[0] = narrhenius++; - rv = sscanf(line,"%*s %lg %lg %lg %lg",&tmp[0],&tmp[1],&tmp[2],&tmp[3]); - if (rv != 4) error->one(FLERR, "Arrhenius constraint is incorrectly formatted"); - constraints[i][myrxn].par[1] = tmp[0]; - constraints[i][myrxn].par[2] = tmp[1]; - constraints[i][myrxn].par[3] = tmp[2]; - constraints[i][myrxn].par[4] = tmp[3]; - } else if (strcmp(constraint_type,"rmsd") == 0) { - constraints[i][myrxn].type = RMSD; - strcpy(strargs[0],"0"); - rv = sscanf(line,"%*s %lg %s",&tmp[0],strargs[0]); - if (rv != 1 && rv != 2) error->one(FLERR, "RMSD constraint is incorrectly formatted"); - constraints[i][myrxn].par[0] = tmp[0]; // RMSDmax - constraints[i][myrxn].id[0] = -1; // optional molecule fragment - if (isalpha(strargs[0][0])) { - int ifragment = onemol->findfragment(strargs[0]); - if (ifragment < 0) error->one(FLERR,"Fix bond/react: Molecule fragment does not exist"); - else constraints[i][myrxn].id[0] = ifragment; - } - } else if (strcmp(constraint_type,"custom") == 0) { - constraints[i][myrxn].type = CUSTOM; - std::vector args = utils::split_words(line); - constraints[i][myrxn].str = args[1]; - } else error->one(FLERR,"Fix bond/react: Illegal constraint type in 'Constraints' section of map file"); - } - strcat(constraintstr[myrxn],")"); // close boolean constraint logic string - delete [] constraint_type; - memory->destroy(strargs); -} - -/* ---------------------------------------------------------------------- -if ID starts with character, assume it is a pre-reaction molecule fragment ID -otherwise, it is a pre-reaction atom ID ----------------------------------------------------------------------- */ - -void FixBondReact::readID(char *strarg, int iconstr, int myrxn, int i) -{ - if (isalpha(strarg[0])) { - constraints[iconstr][myrxn].idtype[i] = FRAG; // fragment vs. atom ID flag - int ifragment = onemol->findfragment(strarg); - if (ifragment < 0) - error->one(FLERR,"Fix bond/react: Molecule fragment {} does not exist", strarg); - constraints[iconstr][myrxn].id[i] = ifragment; - } else { - constraints[iconstr][myrxn].idtype[i] = ATOM; // fragment vs. atom ID flag - int iatom = utils::inumeric(FLERR, strarg, true, lmp); - if (iatom > onemol->natoms) - error->one(FLERR,"Fix bond/react: Invalid template atom ID {} in map file", strarg); - constraints[iconstr][myrxn].id[i] = iatom; - } -} - -void FixBondReact::open(char *file) -{ - fp = fopen(file,"r"); - if (fp == nullptr) error->one(FLERR, "Fix bond/react: Cannot open map file {}", file); -} - -void FixBondReact::readline(char *line) -{ - int n; - if (comm->me == 0) { - if (fgets(line,MAXLINE,fp) == nullptr) n = 0; - else n = strlen(line) + 1; - } - MPI_Bcast(&n,1,MPI_INT,0,world); - if (n == 0) error->all(FLERR,"Fix bond/react: Unexpected end of map file"); - MPI_Bcast(line,n,MPI_CHAR,0,world); -} - -void FixBondReact::parse_keyword(int flag, char *line, char *keyword) -{ - if (flag) { - - // read upto non-blank line plus 1 following line - // eof is set to 1 if any read hits end-of-file - - int eof = 0; - if (comm->me == 0) { - if (fgets(line,MAXLINE,fp) == nullptr) eof = 1; - while (eof == 0 && strspn(line," \t\n\r") == strlen(line)) { - if (fgets(line,MAXLINE,fp) == nullptr) eof = 1; - } - if (fgets(keyword,MAXLINE,fp) == nullptr) eof = 1; - } - - // if eof, set keyword empty and return - - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) { - keyword[0] = '\0'; - return; - } - - // bcast keyword line to all procs - - int n; - if (comm->me == 0) n = strlen(line) + 1; - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - } - - // copy non-whitespace portion of line into keyword - - int start = strspn(line," \t\n\r"); - int stop = strlen(line) - 1; - while (line[stop] == ' ' || line[stop] == '\t' - || line[stop] == '\n' || line[stop] == '\r') stop--; - line[stop+1] = '\0'; - strcpy(keyword,&line[start]); -} - -/* ---------------------------------------------------------------------- */ - -double FixBondReact::compute_vector(int n) -{ - // now we print just the totals for each reaction instance - return (double) reaction_count_total[n]; - -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::post_integrate_respa(int ilevel, int /*iloop*/) -{ - if (ilevel == nlevels_respa-1) post_integrate(); -} - -/* ---------------------------------------------------------------------- */ - -int FixBondReact::pack_forward_comm(int n, int *list, double *buf, - int /*pbc_flag*/, int * /*pbc*/) -{ - int i,j,k,m,ns; - - m = 0; - - if (commflag == 1) { - for (i = 0; i < n; i++) { - j = list[i]; - for (k = 0; k < ncustomvars; k++) - buf[m++] = vvec[j][k]; - } - return m; - } - - if (commflag == 2) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = ubuf(partner[j]).d; - } - return m; - } - - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = ubuf(finalpartner[j]).d; - ns = nxspecial[j][0]; - buf[m++] = ubuf(ns).d; - for (k = 0; k < ns; k++) - buf[m++] = ubuf(xspecial[j][k]).d; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::unpack_forward_comm(int n, int first, double *buf) -{ - int i,j,k,m,ns,last; - - m = 0; - last = first + n; - - if (commflag == 1) { - for (i = first; i < last; i++) - for (k = 0; k < ncustomvars; k++) - vvec[i][k] = buf[m++]; - } else if (commflag == 2) { - for (i = first; i < last; i++) - partner[i] = (tagint) ubuf(buf[m++]).i; - } else { - m = 0; - last = first + n; - for (i = first; i < last; i++) { - finalpartner[i] = (tagint) ubuf(buf[m++]).i; - ns = (int) ubuf(buf[m++]).i; - nxspecial[i][0] = ns; - for (j = 0; j < ns; j++) - xspecial[i][j] = (tagint) ubuf(buf[m++]).i; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixBondReact::pack_reverse_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - - for (i = first; i < last; i++) { - buf[m++] = ubuf(partner[i]).d; - if (closeneigh[rxnID] != 0) - buf[m++] = distsq[i][1]; - else - buf[m++] = distsq[i][0]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::unpack_reverse_comm(int n, int *list, double *buf) -{ - int i,j,m; - - m = 0; - - for (i = 0; i < n; i++) { - j = list[i]; - if (closeneigh[rxnID] != 0) { - if (buf[m+1] < distsq[j][1]) { - partner[j] = (tagint) ubuf(buf[m++]).i; - distsq[j][1] = buf[m++]; - } else m += 2; - } else { - if (buf[m+1] > distsq[j][0]) { - partner[j] = (tagint) ubuf(buf[m++]).i; - distsq[j][0] = buf[m++]; - } else m += 2; - } - } -} - -/* ---------------------------------------------------------------------- - write Set data to restart file -------------------------------------------------------------------------- */ - -void FixBondReact::write_restart(FILE *fp) -{ - int revision = 1; - set[0].nreacts = nreacts; - set[0].max_rate_limit_steps = max_rate_limit_steps; - - for (int i = 0; i < nreacts; i++) { - set[i].reaction_count_total = reaction_count_total[i]; - - strncpy(set[i].rxn_name,rxn_name[i],MAXNAME-1); - set[i].rxn_name[MAXNAME-1] = '\0'; - } - - int rbufcount = max_rate_limit_steps*nreacts; - int *rbuf; - if (rbufcount) { - memory->create(rbuf,rbufcount,"bond/react:rbuf"); - memcpy(rbuf,&store_rxn_count[0][0],sizeof(int)*rbufcount); - } - - if (comm->me == 0) { - int size = nreacts*sizeof(Set)+(rbufcount+1)*sizeof(int); - fwrite(&size,sizeof(int),1,fp); - fwrite(&revision,sizeof(int),1,fp); - fwrite(set,sizeof(Set),nreacts,fp); - if (rbufcount) fwrite(rbuf,sizeof(int),rbufcount,fp); - } - if (rbufcount) memory->destroy(rbuf); -} - -/* ---------------------------------------------------------------------- - use selected state info from restart file to restart the Fix - bond/react restart revisions numbers added after LAMMPS version 3 Nov 2022 -------------------------------------------------------------------------- */ - -void FixBondReact::restart(char *buf) -{ - int n,revision,r_nreacts,r_max_rate_limit_steps,ibufcount,n2cpy; - int **ibuf; - - n = 0; - if (lmp->restart_ver > utils::date2num("3 Nov 2022")) revision = buf[n++]; - else revision = 0; - - Set *set_restart = (Set *) &buf[n*sizeof(int)]; - r_nreacts = set_restart[0].nreacts; - - n2cpy = 0; - if (revision > 0) { - r_max_rate_limit_steps = set_restart[0].max_rate_limit_steps; - if (r_max_rate_limit_steps > 0) { - ibufcount = r_max_rate_limit_steps*r_nreacts; - memory->create(ibuf,r_max_rate_limit_steps,r_nreacts,"bond/react:ibuf"); - memcpy(&ibuf[0][0],&buf[sizeof(int)+r_nreacts*sizeof(Set)],sizeof(int)*ibufcount); - n2cpy = r_max_rate_limit_steps; - } - } - - if (max_rate_limit_steps < n2cpy) n2cpy = max_rate_limit_steps; - for (int i = 0; i < r_nreacts; i++) { - for (int j = 0; j < nreacts; j++) { - if (strcmp(set_restart[i].rxn_name,rxn_name[j]) == 0) { - reaction_count_total[j] = set_restart[i].reaction_count_total; - // read rate_limit restart information - for (int k = 0; k < n2cpy; k++) - store_rxn_count[k][j] = ibuf[k][i]; - } - } - } - if (revision > 0 && r_max_rate_limit_steps > 0) memory->destroy(ibuf); -} - -/* ---------------------------------------------------------------------- -memory usage of local atom-based arrays -------------------------------------------------------------------------- */ - -double FixBondReact::memory_usage() -{ - int nmax = atom->nmax; - double bytes = (double)nmax * sizeof(int); - bytes = 2*nmax * sizeof(tagint); - bytes += (double)nmax * sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- */ - -void FixBondReact::print_bb() -{ -#if 0 - //fix bond/create cargo code. eg nbonds needs to be added - -for (int i = 0; i < atom->nlocal; i++) { - // printf("TAG " TAGINT_FORMAT ": %d nbonds: ",atom->tag[i],atom->num_bond[i]); - for (int j = 0; j < atom->num_bond[i]; j++) { - // printf(" " TAGINT_FORMAT,atom->bond_atom[i][j]); - } - // printf("\n"); - // printf("TAG " TAGINT_FORMAT ": %d nangles: ",atom->tag[i],atom->num_angle[i]); - for (int j = 0; j < atom->num_angle[i]; j++) { - // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT ",", - atom->angle_atom1[i][j], atom->angle_atom2[i][j], - atom->angle_atom3[i][j]); - } - // printf("\n"); - // printf("TAG " TAGINT_FORMAT ": %d ndihedrals: ",atom->tag[i],atom->num_dihedral[i]); - for (int j = 0; j < atom->num_dihedral[i]; j++) { - // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " - TAGINT_FORMAT ",", atom->dihedral_atom1[i][j], - atom->dihedral_atom2[i][j],atom->dihedral_atom3[i][j], - atom->dihedral_atom4[i][j]); - } - // printf("\n"); - // printf("TAG " TAGINT_FORMAT ": %d nimpropers: ",atom->tag[i],atom->num_improper[i]); - for (int j = 0; j < atom->num_improper[i]; j++) { - // printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " - TAGINT_FORMAT ",",atom->improper_atom1[i][j], - atom->improper_atom2[i][j],atom->improper_atom3[i][j], - atom->improper_atom4[i][j]); - } - // printf("\n"); - // printf("TAG " TAGINT_FORMAT ": %d %d %d nspecial: ",atom->tag[i], - atom->nspecial[i][0],atom->nspecial[i][1],atom->nspecial[i][2]); - for (int j = 0; j < atom->nspecial[i][2]; j++) { - printf(" " TAGINT_FORMAT,atom->special[i][j]); - } - // printf("\n"); -} -#endif -} diff --git a/src/KOKKOS/fix_bond_react_kokkos.h b/src/KOKKOS/fix_bond_react_kokkos.h deleted file mode 100644 index a21de74a69..0000000000 --- a/src/KOKKOS/fix_bond_react_kokkos.h +++ /dev/null @@ -1,238 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing Author: Jacob Gissinger (jgissing@stevens.edu) - KOKKOS version (2024/08): Mitch Murphy (alphataubio@gmail.com) -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS -// clang-format off -FixStyle(bond/react/kk,FixBondReactKokkos); -FixStyle(bond/react/kk/device,FixBondReactKokkos); -FixStyle(bond/react/kk/host,FixBondReactKokkos); -// clang-format on -#else - -// clang-format off -#ifndef LMP_FIX_BOND_REACT_KOKKOS_H -#define LMP_FIX_BOND_REACT_KOKKOS_H - -#include "fix_bond_react.h" -#include "kokkos_type.h" - -#include -#include - -namespace LAMMPS_NS { - -template -class FixBondReactKokkos : public FixBondReact { - public: - - FixBondReactKokkos(class LAMMPS *, int, char **); - ~FixBondReactKokkos() override; - //int setmask() override; - void post_constructor() override; - void init() override; - void init_list(int, class NeighList *) override; - void post_integrate() override; - //void post_integrate_respa(int, int) override; - - int pack_forward_comm(int, int *, double *, int, int *) override; - void unpack_forward_comm(int, int, double *) override; - int pack_reverse_comm(int, int, double *) override; - void unpack_reverse_comm(int, int *, double *) override; - double compute_vector(int) override; - //double memory_usage() override; - - private: - - int *nevery; - FILE *fp; - int *iatomtype, *jatomtype; - int *seed; - double **cutsq, *fraction; - int *max_rxn, *nlocalskips, *nghostlyskips; - int **rate_limit; - int **store_rxn_count; - int *stabilize_steps_flag; - int *custom_charges_fragid; - int *rescale_charges_flag; // if nonzero, indicates number of atoms whose charges are updated - double *mol_total_charge; // sum of charges of post-reaction atoms whose charges are updated - int *create_atoms_flag; - int *modify_create_fragid; - double *overlapsq; - int *molecule_keyword; - int *nconstraints; - char **constraintstr; - std::vector rxnfunclist; // lists current special rxn function - std::vector peratomflag; // 1 if special rxn function uses per-atom variable (vs. per-bond) - int **var_flag, **var_id; // for keyword values with variable inputs - int *groupbits; - - char **rxn_name; // name of reaction - int *reaction_count; - int *reaction_count_total; - tagint *partner, *finalpartner; - double **distsq; - int *nattempt; - tagint ***attempt; - - class Molecule *onemol; // pre-reacted molecule template - class Molecule *twomol; // post-reacted molecule template - Fix *fix1; // nve/limit used to relax reaction sites - Fix *fix2; // properties/atom used to indicate 1) relaxing atoms - // 2) to which 'react' atom belongs - Fix *fix3; // property/atom used for system-wide thermostat - class RanMars **random; // random number for 'prob' keyword - class RanMars **rrhandom; // random number for Arrhenius constraint - class NeighList *list; - class ResetAtomsMol *reset_mol_ids; // class for resetting mol IDs - - int *reacted_mol, *unreacted_mol; - int *limit_duration; // indicates how long to relax - char *nve_limit_xmax; // indicates max distance allowed to move when relaxing - char *id_fix1; // id of internally created fix nve/limit - char *id_fix2; // id of internally created fix per-atom properties - char *id_fix3; // id of internally created 'stabilization group' per-atom property fix - char *statted_id; // name of 'stabilization group' per-atom property - char *master_group; // group containing relaxing atoms from all fix rxns - char *exclude_group; // group for system-wide thermostat - - void superimpose_algorithm(); // main function of the superimpose algorithm - - int *ibonding, *jbonding; - int *closeneigh; // indicates if bonding atoms of a rxn are 1-2, 1-3, or 1-4 neighbors - int nedge, nequivalent, ndelete, ncreate, nchiral; // # edge, equivalent atoms in mapping file - int attempted_rxn; // there was an attempt! - int *local_rxn_count; - int *ghostly_rxn_count; - int avail_guesses; // num of restore points available - int *guess_branch; // used when there is more than two choices when guessing - int **restore_pt; // contains info about restore points - tagint **restore; // contains info about restore points - int *pioneer_count; // counts pioneers - - int **edge; // atoms in molecule templates with incorrect valences - int ***equivalences; // relation between pre- and post-reacted templates - int ***reverse_equiv; // re-ordered equivalences - int **landlocked_atoms; // all atoms at least three bonds away from edge atoms - int **custom_charges; // atoms whose charge should be updated - int **delete_atoms; // atoms in pre-reacted templates to delete - int **create_atoms; // atoms in post-reacted templates to create - int ***chiral_atoms; // pre-react chiral atoms. 1) flag 2) orientation 3-4) ordered atom types - - int **nxspecial, **onemol_nxspecial, **twomol_nxspecial; // full number of 1-4 neighbors - tagint **xspecial, **onemol_xspecial, **twomol_xspecial; // full 1-4 neighbor list - - int pion, neigh, trace; // important indices for various loops. required for restore points - int lcl_inst; // reaction instance - tagint **glove; // 1st colmn: pre-reacted template, 2nd colmn: global IDs - // for all mega_gloves: first row is the ID of bond/react - // 'cuff' leaves room for additional values carried around - int cuff; // default = 1, w/ rescale_charges_flag = 2 - double **my_mega_glove; // local + ghostly reaction instances - double **local_mega_glove; // consolidation of local reaction instances - double **ghostly_mega_glove; // consolidation of nonlocal reaction instances - double **global_mega_glove; // consolidation (inter-processor) of gloves - // containing nonlocal atoms - - int *localsendlist; // indicates ghosts of other procs - int my_num_mega; // local + ghostly reaction instances (on this proc) - int local_num_mega; // num of local reaction instances - int ghostly_num_mega; // num of ghostly reaction instances - int global_megasize; // num of reaction instances in global_mega_glove - int *pioneers; // during Superimpose Algorithm, atoms which have been assigned, - // but whose first neighbors haven't - int glove_counter; // used to determine when to terminate Superimpose Algorithm - - void read_variable_keyword(const char *, int, int); - void read_map_file(int); - void EdgeIDs(char *, int); - void Equivalences(char *, int); - void DeleteAtoms(char *, int); - void CreateAtoms(char *, int); - void CustomCharges(int, int); - void ChiralCenters(char *, int); - void ReadConstraints(char *, int); - void readID(char *, int, int, int); - - void make_a_guess(); - void neighbor_loop(); - void check_a_neighbor(); - void crosscheck_the_neighbor(); - void inner_crosscheck_loop(); - int ring_check(); - int check_constraints(); - void get_IDcoords(int, int, double *); - double get_temperature(tagint **, int, int); - double get_totalcharge(); - void customvarnames(); // get per-atom variables names used by custom constraint - void get_customvars(); // evaluate local values for variables names used by custom constraint - double custom_constraint(const std::string &); // evaulate expression for custom constraint - double rxnfunction(const std::string &, const std::string &, - const std::string &); // eval rxn_sum and rxn_ave - void get_atoms2bond(int); - int get_chirality(double[12]); // get handedness given an ordered set of coordinates - - void open(char *); - void readline(char *); - void parse_keyword(int, char *, char *); - - void far_partner(); - void close_partner(); - void get_molxspecials(); - void find_landlocked_atoms(int); - void glove_ghostcheck(); - void ghost_glovecast(); - void update_everything(); - int insert_atoms(tagint **, int); - void unlimit_bond(); // removes atoms from stabilization, and other post-reaction every-step operations - void dedup_mega_gloves(int); //dedup global mega_glove - void write_restart(FILE *) override; - void restart(char *buf) override; - - // store restart data - struct Set { - int nreacts; - char rxn_name[MAXNAME]; - int reaction_count_total; - int max_rate_limit_steps; - }; - Set *set; - - struct Constraint { - int type; - int id[MAXCONIDS]; - int idtype[MAXCONIDS]; - double par[MAXCONPAR]; - std::string str; - }; - int ncustomvars; - std::vector customvarstrs; - int nvvec; - double **vvec; // per-atom vector to store custom constraint atom-style variable values - class Compute *cperbond; // pointer to 'compute bond/local' used by custom constraint ('rxnbond' function) - std::map, int> atoms2bond; // maps atom pair to index of local bond array - std::vector> constraints; - - // DEBUG - - void print_bb(); -}; - -} // namespace LAMMPS_NS - -#endif -#endif diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index 615691e33c..fd23f731a2 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -14,7 +14,7 @@ /* ---------------------------------------------------------------------- Contributing author: Mitch Murphy (alphataubio@gmail.com) - ------------------------------------------------------------------------- */ +------------------------------------------------------------------------- */ #include "fix_recenter_kokkos.h" diff --git a/src/KOKKOS/superpose3d_kokkos.h b/src/KOKKOS/superpose3d_kokkos.h deleted file mode 100644 index be960d5c07..0000000000 --- a/src/KOKKOS/superpose3d_kokkos.h +++ /dev/null @@ -1,439 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. (Some of the code in this file is also - available using a more premissive license. See below for details.) - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - Contributing author: Andrew Jewett (Scripps Research) - Availability: https://github.com/jewettaij/superpose3d_cpp (MIT license) -------------------------------------------------------------------------- */ - -/// @file superpose3d.hpp -/// @brief Calculate the optimal rotation, translation and scale needed to -/// optimally fit two different point clouds containing n points. -/// @author Andrew Jewett -/// @license MIT - -#ifndef LMP_SUPERPOSE3D_H -#define LMP_SUPERPOSE3D_H - -#include "math_eigen_impl.h" //functions to calculate eigenvalues and eigenvectors - -// ----------------------------------------------------------- -// ------------------------ INTERFACE ------------------------ -// ----------------------------------------------------------- - -/// @brief Superpose3d is a class with only one important member function -/// Superpose(). It is useful for calculating the optimal -/// superposition (rotations, translations, and scale transformations) -/// between two point clouds of the same size. -template -class Superpose3D { - private: - size_t N; //number of points in the point clouds - Scalar *aWeights; //weights applied to points when computing RMSD - MathEigen::Jacobi eigen_calc; // calc eigenvectors - Scalar **aaXf_shifted; //preallocated space for fixed point cloud (Nx3 array) - Scalar **aaXm_shifted; //preallocated space for mobile point cloud (Nx3 array) - - public: - // The following data members store the rotation, translation and scale - // after optimal superposition - Scalar **R; //!< store optimal rotation here (this is a 3x3 array). - Scalar T[3]; //!< store optimal translation here - Scalar c; //!< store optimal scale (typically 1 unless requested by the user) - Scalar q[4]; //!< quaternion corresponding to the rotation stored in R. - // The first entry of q is cos(θ/2). The remaining 3 entries - // of q are the axis of rotation (with length sin(θ/2)). - // (Note: This is not the same as "p" from Diamond's 1988 paper.) - - Superpose3D(size_t N = 0); //!< N=number of points in both point clouds - - Superpose3D(size_t N, //!< N = number of points in both point clouds - ConstArray aWeights); //!< weight per point for computing RMSD - - ~Superpose3D(); - - /// @brief specify he number of points in both point clouds - void SetNumPoints(size_t N); - /// @brief return the number of points in both point clouds - size_t GetNumPoints() { return N; } - /// @brief specify the weight applied to each point when computing RMSD - void SetWeights(ConstArray aWeights); - - /// @brief Use rigid-body transformations (rotations, translations, and - /// optionally scale transformations) to superimpose two point clouds. - /// - /// @details - /// This function takes two lists of xyz coordinates (of the same length) and - /// attempts to superimpose them using rotations, translations, and - /// (optionally) scale transformations. These transformations are applied to - /// to the coordinates in the "aaXm_orig" array (the "mobile" point cloud) - /// in order to minimize the root-mean-squared-distance (RMSD) between the - /// corresponding points in each cloud, where RMSD is defined as: - /// - /// @verbatim - /// sqrt((Σ_n w[n]*Σ_i |X[n][i] - (Σ_j c*R[i][j]*x[n][j]+T[i])|^2)/(Σ_n w[n])) - /// @endverbatim - /// - /// In this formula, the "X_i" and "x_i" are coordinates of the ith fixed and - /// mobile point clouds (represented by "aaXf" and "aaXm" in the code below) - /// and "w_i" are optional weights (represented by "aWeights" in the code). - /// This function implements a more general variant of the method from: - /// @verbatim - /// R. Diamond, (1988) "A Note on the Rotational Superposition Problem", - /// Acta Cryst. A44, pp. 211-216 - /// @endverbatim - /// - /// @note: - /// This code has been augmented with a new feature. The version in the - /// original paper only considers rotation and translation and does not allow - /// coordinates of either cloud to be rescaled (multiplied by a scalar). - /// To enable the ability to rescale the coordinates, set allow_rescale=true. - /// (By default, this feature is disabled.) - /// - /// @returns - /// The RMSD between the 2 pointclouds after optimal rotation, translation - /// (and scaling if requested) was applied to the "mobile" point cloud. - /// After this function is called, the optimal rotation, translation, - /// and scale (if requested) will be stored in the "R", "T", and "c" - /// public data members. - Scalar Superpose(ConstArrayOfCoords aaXf, //!< coords for the "frozen" object - ConstArrayOfCoords aaXm, //!< coords for the "mobile" object - bool allow_rescale = false //!< rescale mobile object? (c≠1?) - ); - - // C++ boilerplate: copy and move constructor, swap, and assignment operator - Superpose3D(const Superpose3D &source); - Superpose3D(Superpose3D &&other); - void swap(Superpose3D &other); - Superpose3D & - operator=(Superpose3D source); - - private: - // memory management: - void Alloc(size_t N); - void Init(); - void Dealloc(); - -}; // class Superpose3D - -// -------------- IMPLEMENTATION -------------- - -template static inline Scalar SQR(Scalar x) -{ - return x * x; -} - -template -Scalar Superpose3D::Superpose( - ConstArrayOfCoords aaXf, // coords for the "frozen" object - ConstArrayOfCoords aaXm, // coords for the "mobile" object - bool allow_rescale) // rescale mobile object? (c!=1?) -{ - // Find the center of mass of each object: - Scalar aCenter_f[3] = {0.0, 0.0, 0.0}; - Scalar aCenter_m[3] = {0.0, 0.0, 0.0}; - Scalar sum_weights = 0.0; - for (size_t n = 0; n < N; n++) { - Scalar weight = aWeights[n]; - for (int d = 0; d < 3; d++) { - aCenter_f[d] += aaXf[n][d] * weight; - aCenter_m[d] += aaXm[n][d] * weight; - } - sum_weights += weight; - } - - //assert(sum_weights != 0.0); - - for (int d = 0; d < 3; d++) { - aCenter_f[d] /= sum_weights; - aCenter_m[d] /= sum_weights; - } - - //Subtract the centers-of-mass from the original coordinates for each object - for (size_t n = 0; n < N; n++) { - for (int d = 0; d < 3; d++) { - // shift the coordinates so that the new center of mass is at the origin - aaXf_shifted[n][d] = aaXf[n][d] - aCenter_f[d]; - aaXm_shifted[n][d] = aaXm[n][d] - aCenter_m[d]; - } - } - - // Calculate the "M" array from the Diamond paper (equation 16) - Scalar M[3][3]; - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) M[i][j] = 0.0; - - for (size_t n = 0; n < N; n++) { - Scalar weight = aWeights[n]; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { M[i][j] += weight * aaXm_shifted[n][i] * aaXf_shifted[n][j]; } - } - } - - // Calculate Q (equation 17) - Scalar traceM = 0.0; - for (int i = 0; i < 3; i++) traceM += M[i][i]; - Scalar Q[3][3]; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - Q[i][j] = M[i][j] + M[j][i]; - if (i == j) Q[i][j] -= 2.0 * traceM; - } - } - - // Calculate V (equation 18) - Scalar V[3]; - V[0] = M[1][2] - M[2][1]; - V[1] = M[2][0] - M[0][2]; - V[2] = M[0][1] - M[1][0]; - - // Calculate "P" (equation 22) - // First we must allocate space for the P matrix. It's not safe to declare: - // Scalar P[4][4]; - // ...because most matrix solvers expect arrays in pointer-to-pointer format. - // (a different format). Below I create a fixed size matrix P in this format. - Scalar _PF[4 * 4]; // Contiguous 1D array for storing contents of the 2D P array - Scalar *P[4]; // This version of P has has ** (pointer-to-pointer) format. - for (int i = 0; i < 4; i++) // We must make sure that - P[i] = &(_PF[4 * i]); // P[i] points to the appropriate location in memory - - // Now fill the P array - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) P[i][j] = Q[i][j]; - P[0][3] = V[0]; - P[3][0] = V[0]; - P[1][3] = V[1]; - P[3][1] = V[1]; - P[2][3] = V[2]; - P[3][2] = V[2]; - P[3][3] = 0.0; - - // The vector "p" contains the optimal rotation (backwards quaternion format) - Scalar p[4] = {0.0, 0.0, 0.0, 1.0}; // default value - Scalar pPp = 0.0; // = p^T * P * p (zero by default) - Scalar rmsd = 0.0; // default value - - bool singular = N < 2; // (it doesn't make sense to rotate a single point) - - if (!singular) { - // Calculate the principal eigenvalue and eigenvector of matrix P. - // Store the principal eigenvector in "p" - // The vector "p" will contain the optimal rotation (in quaternion format) - - Scalar Evl[4]; // Store the eigenvalues of P here. - Scalar *Evc[4]; // Store the eigevectors here. This version has ** format. - Scalar _Evc[4 * 4]; // Contiguous 1D array for storing contents of "Evc" array - for (int i = 0; i < 4; i++) // We must make sure that - Evc[i] = &(_Evc[4 * i]); // Evc[i] points to the correct location in memory - - eigen_calc.Diagonalize(P, Evl, Evc); - - // Note: The eigenvalues are sorted in decreasing order by default. - pPp = Evl[0]; // = the maximum eigenvalue of P - for (int i = 0; i < 4; i++) - p[i] = Evc[0][i]; //copy eigenvector corresponding to this eigenvalue to p - } //if (! singular) - - // Now normalize p - Scalar pnorm = 0.0; - for (int i = 0; i < 4; i++) pnorm += p[i] * p[i]; - pnorm = sqrt(pnorm); - for (int i = 0; i < 4; i++) p[i] /= pnorm; - - // Finally, calculate the rotation matrix corresponding to "p" - // (convert a quaternion into a 3x3 rotation matrix) - - R[0][0] = (p[0] * p[0]) - (p[1] * p[1]) - (p[2] * p[2]) + (p[3] * p[3]); - R[1][1] = -(p[0] * p[0]) + (p[1] * p[1]) - (p[2] * p[2]) + (p[3] * p[3]); - R[2][2] = -(p[0] * p[0]) - (p[1] * p[1]) + (p[2] * p[2]) + (p[3] * p[3]); - R[0][1] = 2 * (p[0] * p[1] - p[2] * p[3]); - R[1][0] = 2 * (p[0] * p[1] + p[2] * p[3]); - R[1][2] = 2 * (p[1] * p[2] - p[0] * p[3]); - R[2][1] = 2 * (p[1] * p[2] + p[0] * p[3]); - R[0][2] = 2 * (p[0] * p[2] + p[1] * p[3]); - R[2][0] = 2 * (p[0] * p[2] - p[1] * p[3]); - - q[0] = p[3]; // Note: The "p" variable is not a quaternion in the - q[1] = p[0]; // conventional sense because its elements - q[2] = p[1]; // are in the wrong order. I correct for that here. - q[3] = p[2]; // "q" is the quaternion correspond to rotation R. - - // Optional: Decide the scale factor, c - c = 1.0; // by default, don't rescale the coordinates - - if ((allow_rescale) && (!singular)) { - Scalar Waxaixai = 0.0; - Scalar WaxaiXai = 0.0; - for (size_t a = 0; a < N; a++) { - Scalar weight = aWeights[a]; - for (int i = 0; i < 3; i++) { - Waxaixai += weight * aaXm_shifted[a][i] * aaXm_shifted[a][i]; - WaxaiXai += weight * aaXm_shifted[a][i] * aaXf_shifted[a][i]; - } - } - c = (WaxaiXai + pPp) / Waxaixai; - - } // if (allow_rescale) - - // Finally compute the RMSD between the two coordinate sets: - // First compute E0 from equation 24 of the paper - Scalar E0 = 0.0; - for (size_t n = 0; n < N; n++) { - Scalar weight = aWeights[n]; - for (int d = 0; d < 3; d++) - // (remember to include the scale factor "c" that we inserted) - E0 += weight * (SQR(aaXf_shifted[n][d] - c * aaXm_shifted[n][d])); - } - Scalar sum_sqr_dist = E0 - c * 2.0 * pPp; - if (sum_sqr_dist < 0.0) //(edge case due to rounding error) - sum_sqr_dist = 0.0; - - if (!singular) rmsd = sqrt(sum_sqr_dist / sum_weights); - - // Lastly, calculate the translational offset. - // If c!=1, this is slightly more complicated than it seems. Recall that: - //RMSD=sqrt((Sum_i w_i * |X_i - Sum_j(c*R_ij*x_j + T_i))|^2) / (Sum_j w_j)) - // =sqrt((Sum_i w_i * |X_i - x_i')|^2) / (Sum_j w_j)) - // where - // x_i' = Sum_j(c*R_ij*x_j) + T_i - // = Xcm_i + c*R_ij*(x_j - xcm_j) - // and Xcm and xcm = center_of_mass for the frozen and mobile point clouds - // - // Hence: - // T_i = Xcm_i - Sum_j c*R_ij*xcm_j - // In the code, Xcm_i is represented by "aCenter_f[i]" - // and xcm_j is represented by "aCenter_m[j]" - - for (int i = 0; i < 3; i++) { - T[i] = aCenter_f[i]; - for (int j = 0; j < 3; j++) { T[i] -= c * R[i][j] * aCenter_m[j]; } - } - - return rmsd; - -} //Superpose3D::Superpose(aaXf, aaXm, allow_rescale) - -template -void Superpose3D::SetNumPoints(size_t N) -{ - Dealloc(); - Alloc(N); -} - -template -void Superpose3D::SetWeights(ConstArray aWeights) -{ - for (size_t i = 0; i < N; i++) this->aWeights[i] = aWeights[i]; -} - -template -Superpose3D::Superpose3D(size_t N) : eigen_calc(4) -{ - Init(); - Alloc(N); -} - -template -Superpose3D::Superpose3D(size_t N, ConstArray aWeights) : - eigen_calc(4) -{ - Init(); - Alloc(N); - SetWeights(aWeights); -} - -template -Superpose3D::~Superpose3D() -{ - Dealloc(); -} - -template -void Superpose3D::Init() -{ - R = nullptr; - aWeights = nullptr; - aaXf_shifted = nullptr; - aaXm_shifted = nullptr; -} - -// memory management: - -template -void Superpose3D::Alloc(size_t N) -{ - this->N = N; - aWeights = new Scalar[N]; - for (size_t i = 0; i < N; i++) aWeights[i] = 1.0 / N; - MathEigen::Alloc2D(3, 3, &R); - MathEigen::Alloc2D(N, 3, &aaXf_shifted); - MathEigen::Alloc2D(N, 3, &aaXm_shifted); -} - -template -void Superpose3D::Dealloc() -{ - if (R) MathEigen::Dealloc2D(&R); - if (aWeights) delete[] aWeights; - if (aaXf_shifted) MathEigen::Dealloc2D(&aaXf_shifted); - if (aaXm_shifted) MathEigen::Dealloc2D(&aaXm_shifted); -} - -// memory management: copy and move constructor, swap, and assignment operator: - -template -Superpose3D::Superpose3D( - const Superpose3D &source) : - eigen_calc(4) -{ - Init(); - Alloc(source.N); - - //assert(N == source.N); - - for (int i = 0; i < N; i++) { - std::copy(source.aaXf_shifted[i], source.aaXf_shifted[i] + 3, aaXf_shifted[i]); - std::copy(source.aaXm_shifted[i], source.aaXm_shifted[i] + 3, aaXm_shifted[i]); - } -} - -template -void Superpose3D::swap( - Superpose3D &other) -{ - std::swap(N, other.N); - std::swap(R, other.R); - std::swap(aaXf_shifted, other.aaXf_shifted); - std::swap(aaXm_shifted, other.aaXm_shifted); -} - -// Move constructor (C++11) -template -Superpose3D::Superpose3D( - Superpose3D &&other) -{ - Init(); - swap(*this, other); -} - -// Using the "copy-swap" idiom for the assignment operator -template -Superpose3D & -Superpose3D::operator=( - Superpose3D source) -{ - this->swap(source); - return *this; -} - -#endif //#ifndef LMP_SUPERPOSE3D_H diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 8c241355fd..3ef1cae895 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -607,6 +607,8 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : FixBondReact::~FixBondReact() { + if (copymode) return; // needed for KOKKOS [alphataubio,2024/08] + for (int i = 0; i < narrhenius; i++) { delete rrhandom[i]; } From 5f5e2d9a64a7b88205d3e84f578235fbb9e0d2c9 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 19 Aug 2024 20:58:41 -0400 Subject: [PATCH 013/294] added fix nve/limit/kk --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 190 ++++++++++++++++++++++++++++ src/KOKKOS/fix_nve_limit_kokkos.h | 42 ++++++ src/fix_nve_limit.h | 2 +- 3 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 src/KOKKOS/fix_nve_limit_kokkos.cpp create mode 100644 src/KOKKOS/fix_nve_limit_kokkos.h diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp new file mode 100644 index 0000000000..a073451f9e --- /dev/null +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -0,0 +1,190 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy, alphataubio at gmail com +------------------------------------------------------------------------- */ + +#include "fix_nve_limit_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "kokkos_type.h" + +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixNVELimitKokkos::FixNVELimitKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNVELimit(lmp, narg, arg) +{ + kokkosable = 1; + execution_space = ExecutionSpaceFromDevice::space; + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- + allow for both per-type and per-atom mass +------------------------------------------------------------------------- */ + +template +void FixNVELimitKokkos::initial_integrate(int /*vflag*/) +{ + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + auto d_x = atomKK->k_x.template view(); + auto d_v = atomKK->k_v.template view(); + auto d_f = atomKK->k_f.template view(); + auto d_mask = atomKK->k_mask.template view(); + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & groupbit) { + const double dtfm = dtf / d_rmass[i]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > vlimitsq) { + l_ncount++; + const double scale = sqrt(vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + + d_x(i,0) += dtv * d_v(i,0); + d_x(i,1) += dtv * d_v(i,1); + d_x(i,2) += dtv * d_v(i,2); + } + }, ncount); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & groupbit) { + const double dtfm = dtf / d_mass[d_type[i]]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > vlimitsq) { + l_ncount++; + const double scale = sqrt(vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + + d_x(i,0) += dtv * d_v(i,0); + d_x(i,1) += dtv * d_v(i,1); + d_x(i,2) += dtv * d_v(i,2); + } + }, ncount); + } + + atomKK->modified(execution_space, X_MASK | V_MASK ); + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixNVELimitKokkos::final_integrate() +{ + double dtfm,vsq; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + auto d_v = atomKK->k_v.template view(); + auto d_f = atomKK->k_f.template view(); + auto d_mask = atomKK->k_mask.template view(); + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & groupbit) { + const double dtfm = dtf / d_rmass[i]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > vlimitsq) { + l_ncount++; + const double scale = sqrt(vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + } + }, ncount); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); + + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { + if (d_mask[i] & groupbit) { + const double dtfm = dtf / d_mass[d_type[i]]; + d_v(i,0) += dtfm * d_f(i,0); + d_v(i,1) += dtfm * d_f(i,1); + d_v(i,2) += dtfm * d_f(i,2); + + const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); + if (vsq > vlimitsq) { + l_ncount++; + const double scale = sqrt(vlimitsq/vsq); + d_v(i,0) *= scale; + d_v(i,1) *= scale; + d_v(i,2) *= scale; + } + } + }, ncount); + } + + atomKK->modified(execution_space, V_MASK ); + +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixNVELimitKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixNVELimitKokkos; +#endif +} + diff --git a/src/KOKKOS/fix_nve_limit_kokkos.h b/src/KOKKOS/fix_nve_limit_kokkos.h new file mode 100644 index 0000000000..f639f60640 --- /dev/null +++ b/src/KOKKOS/fix_nve_limit_kokkos.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(nve/limit/kk,FixNVELimitKokkos); +FixStyle(nve/limit/kk/device,FixNVELimitKokkos); +FixStyle(nve/limit/kk/host,FixNVELimitKokkos); + +// clang-format on +#else + +#ifndef LMP_FIX_NVE_LIMIT_KOKKOS_H +#define LMP_FIX_NVE_LIMIT_KOKKOS_H + +#include "fix_nve_limit.h" + +namespace LAMMPS_NS { + +template +class FixNVELimitKokkos : public FixNVELimit { + public: + FixNVELimitKokkos(class LAMMPS *, int, char **); + void initial_integrate(int) override; + void final_integrate() override; + +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/fix_nve_limit.h b/src/fix_nve_limit.h index 2a32aee975..e8a07d815a 100644 --- a/src/fix_nve_limit.h +++ b/src/fix_nve_limit.h @@ -36,7 +36,7 @@ class FixNVELimit : public Fix { void reset_dt() override; double compute_scalar() override; - private: + protected: double dtv, dtf; double *step_respa; int ncount; From d75fe348f160e0f59707484c85d9514beb97d483 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 20 Aug 2024 05:40:02 -0400 Subject: [PATCH 014/294] bugfix for compute_scalar() to work, test PASSED --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 21 ++++++++++++++++----- src/KOKKOS/fix_nve_limit_kokkos.h | 1 + 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index a073451f9e..28a3cbfe51 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -25,7 +25,6 @@ #include using namespace LAMMPS_NS; -using namespace FixConst; /* ---------------------------------------------------------------------- */ @@ -34,8 +33,14 @@ FixNVELimitKokkos::FixNVELimitKokkos(LAMMPS *lmp, int narg, char **a FixNVELimit(lmp, narg, arg) { kokkosable = 1; + + //FIXME: unit test fails when i turn this on + //fuse_integrate_flag = 1; + execution_space = ExecutionSpaceFromDevice::space; atomKK = (AtomKokkos *) atom; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; } /* ---------------------------------------------------------------------- @@ -53,6 +58,8 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); + int d_ncount; + if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); @@ -79,7 +86,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_x(i,1) += dtv * d_v(i,1); d_x(i,2) += dtv * d_v(i,2); } - }, ncount); + }, d_ncount); } else { @@ -107,9 +114,10 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_x(i,1) += dtv * d_v(i,1); d_x(i,2) += dtv * d_v(i,2); } - }, ncount); + }, d_ncount); } + ncount += d_ncount; atomKK->modified(execution_space, X_MASK | V_MASK ); } @@ -127,6 +135,8 @@ void FixNVELimitKokkos::final_integrate() auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); + int d_ncount; + if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); @@ -148,7 +158,7 @@ void FixNVELimitKokkos::final_integrate() d_v(i,2) *= scale; } } - }, ncount); + }, d_ncount); } else { @@ -172,9 +182,10 @@ void FixNVELimitKokkos::final_integrate() d_v(i,2) *= scale; } } - }, ncount); + }, d_ncount); } + ncount += d_ncount; atomKK->modified(execution_space, V_MASK ); } diff --git a/src/KOKKOS/fix_nve_limit_kokkos.h b/src/KOKKOS/fix_nve_limit_kokkos.h index f639f60640..b611996b66 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.h +++ b/src/KOKKOS/fix_nve_limit_kokkos.h @@ -24,6 +24,7 @@ FixStyle(nve/limit/kk/host,FixNVELimitKokkos); #define LMP_FIX_NVE_LIMIT_KOKKOS_H #include "fix_nve_limit.h" +#include "kokkos_type.h" namespace LAMMPS_NS { From cac0c56687322a6b913c957a6c866a6a023c10fa Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 20 Aug 2024 19:12:09 -0400 Subject: [PATCH 015/294] add charge to atom style template --- src/MOLECULE/atom_vec_template.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/MOLECULE/atom_vec_template.cpp b/src/MOLECULE/atom_vec_template.cpp index 2678f44327..8c646b27c0 100644 --- a/src/MOLECULE/atom_vec_template.cpp +++ b/src/MOLECULE/atom_vec_template.cpp @@ -29,20 +29,21 @@ AtomVecTemplate::AtomVecTemplate(LAMMPS *lmp) : AtomVec(lmp) atom->molecule_flag = 1; atom->molindex_flag = 1; atom->molatom_flag = 1; + atom->q_flag = 1; // strings with peratom variables to include in each AtomVec method // strings cannot contain fields in corresponding AtomVec default strings // order of fields in the string does not matter // except fields_data_atom and fields_data_vel which must match data file - fields_grow = {"molecule", "molindex", "molatom"}; - fields_copy = {"molecule", "molindex", "molatom"}; - fields_border = {"molecule", "molindex", "molatom"}; - fields_border_vel = {"molecule", "molindex", "molatom"}; - fields_exchange = {"molecule", "molindex", "molatom"}; - fields_restart = {"molecule", "molindex", "molatom"}; - fields_create = {"molecule", "molindex", "molatom"}; - fields_data_atom = {"id", "molecule", "molindex", "molatom", "type", "x"}; + fields_grow = {"q","molecule", "molindex", "molatom"}; + fields_copy = {"q","molecule", "molindex", "molatom"}; + fields_border = {"q","molecule", "molindex", "molatom"}; + fields_border_vel = {"q","molecule", "molindex", "molatom"}; + fields_exchange = {"q","molecule", "molindex", "molatom"}; + fields_restart = {"q","molecule", "molindex", "molatom"}; + fields_create = {"q","molecule", "molindex", "molatom"}; + fields_data_atom = {"id", "molecule", "type", "q", "x", "molindex", "molatom"}; fields_data_vel = {"id", "v"}; setup_fields(); From 3aadd8bd84b94b3b8b64d0c966156f0cb35ae30d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 20 Aug 2024 19:12:27 -0400 Subject: [PATCH 016/294] fix typo in comment --- src/molecule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/molecule.cpp b/src/molecule.cpp index 617402d605..468f7dd136 100644 --- a/src/molecule.cpp +++ b/src/molecule.cpp @@ -958,7 +958,7 @@ void Molecule::diameters(char *line) } /* ---------------------------------------------------------------------- - read charges from file + read dipoles from file ------------------------------------------------------------------------- */ void Molecule::dipoles(char *line) From c968787c41f6f157b01b4773550f15a82301a552 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 16:19:09 -0400 Subject: [PATCH 017/294] fix molecule add for MESH and remove 'not compatible' error message --- src/create_atoms.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index d6c402f721..b8d42156d1 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -303,7 +303,7 @@ void CreateAtoms::command(int narg, char **arg) if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates"); if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types"); if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes) - error->all(FLERR, "Invalid atom type in create_atoms mol command"); + error->all(FLERR, "Invalid atom type {} in create_atoms mol command (onemol->ntypes {} atom->ntypes {})", ntype, onemol->ntypes, atom->ntypes); if (onemol->tag_require && !atom->tag_enable) error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not"); if (atom->molecular == Atom::TEMPLATE && onemol != atom->avec->onemols[0]) @@ -319,11 +319,7 @@ void CreateAtoms::command(int narg, char **arg) memory->create(xmol, onemol->natoms, 3, "create_atoms:xmol"); } - if (style == MESH) { - if (mode == MOLECULE) - error->all(FLERR, "Create_atoms mesh is not compatible with the 'mol' option"); - if (scaleflag) error->all(FLERR, "Create_atoms mesh must use 'units box' option"); - } + if (style == MESH && scaleflag) error->all(FLERR, "Create_atoms mesh must use 'units box' option"); ranlatt = nullptr; if (subsetflag != NONE) ranlatt = new RanMars(lmp, subsetseed + comm->me); @@ -968,7 +964,12 @@ int CreateAtoms::add_bisection(const double vert[3][3], tagint molid) if ((center[0] >= sublo[0]) && (center[0] < subhi[0]) && (center[1] >= sublo[1]) && (center[1] < subhi[1]) && (center[2] >= sublo[2]) && (center[2] < subhi[2])) { - atom->avec->create_atom(ntype, center); + if (mode == ATOM) atom->avec->create_atom(ntype, center); + else { + get_xmol(center); + add_molecule(); + } + int idx = atom->nlocal - 1; if (atom->radius_flag) atom->radius[idx] = ravg * radscale; if (atom->molecule_flag) atom->molecule[idx] = molid; @@ -1050,7 +1051,12 @@ int CreateAtoms::add_quasirandom(const double vert[3][3], tagint molid) if ((point[0] >= sublo[0]) && (point[0] < subhi[0]) && (point[1] >= sublo[1]) && (point[1] < subhi[1]) && (point[2] >= sublo[2]) && (point[2] < subhi[2])) { - atom->avec->create_atom(ntype, point); + if (mode == ATOM) atom->avec->create_atom(ntype, point); + else { + get_xmol(point); + add_molecule(); + } + int idx = atom->nlocal - 1; if (atom->molecule_flag) atom->molecule[idx] = molid; if (atom->radius_flag) atom->radius[idx] = rad * radscale; From 3ea74b17252ef4a4e16d2ee6841c518e983e435a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 16:20:17 -0400 Subject: [PATCH 018/294] update atom_style template test for added charge --- unittest/formats/test_atom_styles.cpp | 451 +------------------------- 1 file changed, 12 insertions(+), 439 deletions(-) diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp index 921d469e31..90c8b07b02 100644 --- a/unittest/formats/test_atom_styles.cpp +++ b/unittest/formats/test_atom_styles.cpp @@ -2743,418 +2743,15 @@ TEST_F(AtomStyleTest, template) expected.has_x = true; expected.has_v = true; expected.has_f = true; - expected.molecule_flag = 1; - expected.molindex_flag = 1; - expected.molatom_flag = 1; - expected.nmolecule = 2; - expected.map_style = 3; - - ASSERT_ATOM_STATE_EQ(lmp->atom, expected); - - BEGIN_HIDE_OUTPUT(); - command("create_box 4 box bond/types 2 angle/types 2 "); - command("create_atoms 0 single -2.0 2.0 0.1 mol twomols 65234"); - command("create_atoms 0 single -2.0 -2.0 -0.1 mol twomols 62346"); - command("create_atoms 0 single 2.0 2.0 -0.1 mol twomols 61354"); - command("create_atoms 3 single 2.0 -2.0 0.1"); - command("create_atoms 3 single 2.0 2.0 -2.1"); - command("create_atoms 4 single 2.0 -2.0 2.1"); - command("mass 1 16.0"); - command("mass 2 1.0"); - command("mass 3 12.0"); - command("mass 4 16.0"); - command("bond_style zero"); - command("bond_coeff 1 1.0"); - command("bond_coeff 2 1.16"); - command("angle_style zero"); - command("angle_coeff * 109.0"); - command("pair_coeff * *"); - END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); - ASSERT_NE(lmp->atom->avec, nullptr); - ASSERT_EQ(lmp->atom->natoms, 12); - ASSERT_EQ(lmp->atom->nbonds, 6); - ASSERT_EQ(lmp->atom->nbondtypes, 2); - ASSERT_EQ(lmp->atom->nangles, 3); - ASSERT_EQ(lmp->atom->nangletypes, 2); - ASSERT_EQ(lmp->atom->nellipsoids, 0); - ASSERT_EQ(lmp->atom->nlocal, 12); - ASSERT_EQ(lmp->atom->nghost, 0); - ASSERT_NE(lmp->atom->nmax, -1); - ASSERT_EQ(lmp->atom->tag_enable, 1); - ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); - ASSERT_EQ(lmp->atom->ntypes, 4); - ASSERT_EQ(lmp->atom->nextra_grow, 0); - ASSERT_EQ(lmp->atom->nextra_restart, 0); - ASSERT_EQ(lmp->atom->nextra_border, 0); - ASSERT_EQ(lmp->atom->nextra_grow_max, 0); - ASSERT_EQ(lmp->atom->nextra_restart_max, 0); - ASSERT_EQ(lmp->atom->nextra_border_max, 0); - ASSERT_EQ(lmp->atom->nextra_store, 0); - ASSERT_EQ(lmp->atom->extra_grow, nullptr); - ASSERT_EQ(lmp->atom->extra_restart, nullptr); - ASSERT_EQ(lmp->atom->extra_border, nullptr); - ASSERT_EQ(lmp->atom->extra, nullptr); - - ASSERT_NE(lmp->atom->mass, nullptr); - ASSERT_NE(lmp->atom->mass_setflag, nullptr); - - BEGIN_HIDE_OUTPUT(); - command("write_data test_atom_styles.data"); - command("clear"); - command("units real"); - command("newton off on"); - command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("atom_style template twomols"); - command("pair_style zero 4.0"); - command("bond_style zero"); - command("angle_style zero"); - command("atom_modify map array"); - command("read_data test_atom_styles.data"); - END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); - ASSERT_NE(lmp->atom->avec, nullptr); - - ASSERT_EQ(lmp->atom->natoms, 12); - ASSERT_EQ(lmp->atom->nlocal, 12); - ASSERT_EQ(lmp->atom->nbonds, 6); - ASSERT_EQ(lmp->atom->nangles, 3); - ASSERT_EQ(lmp->atom->nbondtypes, 2); - ASSERT_EQ(lmp->atom->nangletypes, 2); - ASSERT_EQ(lmp->atom->nghost, 0); - ASSERT_NE(lmp->atom->nmax, -1); - ASSERT_EQ(lmp->atom->tag_enable, 1); - ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); - ASSERT_EQ(lmp->atom->ntypes, 4); - ASSERT_NE(lmp->atom->sametag, nullptr); - ASSERT_EQ(lmp->atom->tag_consecutive(), 1); - ASSERT_EQ(lmp->atom->map_style, Atom::MAP_ARRAY); - ASSERT_EQ(lmp->atom->map_user, 1); - ASSERT_EQ(lmp->atom->map_tag_max, 12); - - auto *molecule = lmp->atom->molecule; - auto *molindex = lmp->atom->molindex; - auto *molatom = lmp->atom->molatom; - - ASSERT_EQ(molecule[GETIDX(1)], 1); - ASSERT_EQ(molecule[GETIDX(2)], 1); - ASSERT_EQ(molecule[GETIDX(3)], 1); - ASSERT_EQ(molecule[GETIDX(4)], 2); - ASSERT_EQ(molecule[GETIDX(5)], 2); - ASSERT_EQ(molecule[GETIDX(6)], 2); - ASSERT_EQ(molecule[GETIDX(7)], 3); - ASSERT_EQ(molecule[GETIDX(8)], 3); - ASSERT_EQ(molecule[GETIDX(9)], 3); - ASSERT_EQ(molecule[GETIDX(10)], 0); - ASSERT_EQ(molecule[GETIDX(11)], 0); - ASSERT_EQ(molecule[GETIDX(12)], 0); - ASSERT_EQ(molindex[GETIDX(1)], 0); - ASSERT_EQ(molindex[GETIDX(2)], 0); - ASSERT_EQ(molindex[GETIDX(3)], 0); - ASSERT_EQ(molindex[GETIDX(4)], 0); - ASSERT_EQ(molindex[GETIDX(5)], 0); - ASSERT_EQ(molindex[GETIDX(6)], 0); - ASSERT_EQ(molindex[GETIDX(7)], 0); - ASSERT_EQ(molindex[GETIDX(8)], 0); - ASSERT_EQ(molindex[GETIDX(9)], 0); - ASSERT_EQ(molindex[GETIDX(10)], -1); - ASSERT_EQ(molindex[GETIDX(11)], -1); - ASSERT_EQ(molindex[GETIDX(12)], -1); - ASSERT_EQ(molatom[GETIDX(1)], 0); - ASSERT_EQ(molatom[GETIDX(2)], 1); - ASSERT_EQ(molatom[GETIDX(3)], 2); - ASSERT_EQ(molatom[GETIDX(4)], 0); - ASSERT_EQ(molatom[GETIDX(5)], 1); - ASSERT_EQ(molatom[GETIDX(6)], 2); - ASSERT_EQ(molatom[GETIDX(7)], 0); - ASSERT_EQ(molatom[GETIDX(8)], 1); - ASSERT_EQ(molatom[GETIDX(9)], 2); - ASSERT_EQ(molatom[GETIDX(10)], -1); - ASSERT_EQ(molatom[GETIDX(11)], -1); - ASSERT_EQ(molatom[GETIDX(12)], -1); - - BEGIN_HIDE_OUTPUT(); - command("clear"); - command("units real"); - command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("atom_style template twomols"); - command("pair_style zero 4.0"); - command("bond_style zero"); - command("angle_style zero"); - command("atom_modify map array"); - command("read_data test_atom_styles.data"); - END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); - ASSERT_NE(lmp->atom->avec, nullptr); - - ASSERT_EQ(lmp->atom->natoms, 12); - ASSERT_EQ(lmp->atom->nlocal, 12); - ASSERT_EQ(lmp->atom->nbonds, 6); - ASSERT_EQ(lmp->atom->nangles, 3); - ASSERT_EQ(lmp->atom->nbondtypes, 2); - ASSERT_EQ(lmp->atom->nangletypes, 2); - ASSERT_EQ(lmp->atom->nghost, 0); - ASSERT_NE(lmp->atom->nmax, -1); - ASSERT_EQ(lmp->atom->tag_enable, 1); - ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); - ASSERT_EQ(lmp->atom->ntypes, 4); - ASSERT_NE(lmp->atom->sametag, nullptr); - ASSERT_EQ(lmp->atom->tag_consecutive(), 1); - ASSERT_EQ(lmp->atom->map_style, Atom::MAP_ARRAY); - ASSERT_EQ(lmp->atom->map_user, 1); - ASSERT_EQ(lmp->atom->map_tag_max, 12); - - molecule = lmp->atom->molecule; - molindex = lmp->atom->molindex; - molatom = lmp->atom->molatom; - - ASSERT_EQ(molindex[GETIDX(1)], 0); - ASSERT_EQ(molindex[GETIDX(2)], 0); - ASSERT_EQ(molindex[GETIDX(3)], 0); - ASSERT_EQ(molindex[GETIDX(4)], 0); - ASSERT_EQ(molindex[GETIDX(5)], 0); - ASSERT_EQ(molindex[GETIDX(6)], 0); - ASSERT_EQ(molindex[GETIDX(7)], 0); - ASSERT_EQ(molindex[GETIDX(8)], 0); - ASSERT_EQ(molindex[GETIDX(9)], 0); - ASSERT_EQ(molindex[GETIDX(10)], -1); - ASSERT_EQ(molindex[GETIDX(11)], -1); - ASSERT_EQ(molindex[GETIDX(12)], -1); - ASSERT_EQ(molatom[GETIDX(1)], 0); - ASSERT_EQ(molatom[GETIDX(2)], 1); - ASSERT_EQ(molatom[GETIDX(3)], 2); - ASSERT_EQ(molatom[GETIDX(4)], 0); - ASSERT_EQ(molatom[GETIDX(5)], 1); - ASSERT_EQ(molatom[GETIDX(6)], 2); - ASSERT_EQ(molatom[GETIDX(7)], 0); - ASSERT_EQ(molatom[GETIDX(8)], 1); - ASSERT_EQ(molatom[GETIDX(9)], 2); - ASSERT_EQ(molatom[GETIDX(10)], -1); - ASSERT_EQ(molatom[GETIDX(11)], -1); - ASSERT_EQ(molatom[GETIDX(12)], -1); - - auto *x = lmp->atom->x; - auto *v = lmp->atom->v; - auto *type = lmp->atom->type; - - EXPECT_NEAR(x[GETIDX(10)][0], 2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(10)][1], -2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(10)][2], 0.1, EPSILON); - EXPECT_NEAR(x[GETIDX(11)][0], 2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(11)][1], 2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(11)][2], -2.1, EPSILON); - EXPECT_NEAR(x[GETIDX(12)][0], 2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(12)][1], -2.0, EPSILON); - EXPECT_NEAR(x[GETIDX(12)][2], 2.1, EPSILON); - EXPECT_NEAR(v[GETIDX(1)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(1)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(1)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(2)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(2)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(2)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(3)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(3)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(3)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(4)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(4)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(4)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(5)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(5)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(5)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(6)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(6)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(6)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(7)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(7)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(7)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(8)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(8)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(8)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(9)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(9)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(9)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(10)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(10)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(10)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(11)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(11)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(11)][2], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(12)][0], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(12)][1], 0.0, EPSILON); - EXPECT_NEAR(v[GETIDX(12)][2], 0.0, EPSILON); - ASSERT_EQ(type[GETIDX(1)], 1); - ASSERT_EQ(type[GETIDX(2)], 2); - ASSERT_EQ(type[GETIDX(3)], 2); - ASSERT_EQ(type[GETIDX(4)], 1); - ASSERT_EQ(type[GETIDX(5)], 2); - ASSERT_EQ(type[GETIDX(6)], 2); - ASSERT_EQ(type[GETIDX(7)], 1); - ASSERT_EQ(type[GETIDX(8)], 2); - ASSERT_EQ(type[GETIDX(9)], 2); - ASSERT_EQ(type[GETIDX(10)], 3); - ASSERT_EQ(type[GETIDX(11)], 3); - ASSERT_EQ(type[GETIDX(12)], 4); - - BEGIN_HIDE_OUTPUT(); - command("group two id 7:10"); - command("delete_atoms group two compress no"); - command("write_restart test_atom_styles.restart"); - command("clear"); - command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("read_restart test_atom_styles.restart"); - command("replicate 1 1 2 bbox"); - END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); - ASSERT_NE(lmp->atom->avec, nullptr); - ASSERT_EQ(lmp->atom->natoms, 16); - ASSERT_EQ(lmp->atom->nbonds, 8); - ASSERT_EQ(lmp->atom->nangles, 4); - ASSERT_EQ(lmp->atom->nlocal, 16); - ASSERT_EQ(lmp->atom->nghost, 0); - ASSERT_NE(lmp->atom->nmax, -1); - ASSERT_EQ(lmp->atom->tag_enable, 1); - ASSERT_EQ(lmp->atom->q_flag, 0); - ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); - ASSERT_EQ(lmp->atom->ntypes, 4); - ASSERT_EQ(lmp->atom->tag_consecutive(), 0); - ASSERT_EQ(lmp->atom->map_tag_max, 24); - - type = lmp->atom->type; - molecule = lmp->atom->molecule; - molindex = lmp->atom->molindex; - molatom = lmp->atom->molatom; - ASSERT_EQ(type[GETIDX(1)], 1); - ASSERT_EQ(type[GETIDX(2)], 2); - ASSERT_EQ(type[GETIDX(3)], 2); - ASSERT_EQ(type[GETIDX(4)], 1); - ASSERT_EQ(type[GETIDX(5)], 2); - ASSERT_EQ(type[GETIDX(6)], 2); - ASSERT_EQ(type[GETIDX(11)], 3); - ASSERT_EQ(type[GETIDX(12)], 4); - ASSERT_EQ(type[GETIDX(13)], 1); - ASSERT_EQ(type[GETIDX(14)], 2); - ASSERT_EQ(type[GETIDX(15)], 2); - ASSERT_EQ(type[GETIDX(16)], 1); - ASSERT_EQ(type[GETIDX(17)], 2); - ASSERT_EQ(type[GETIDX(18)], 2); - ASSERT_EQ(type[GETIDX(23)], 3); - ASSERT_EQ(type[GETIDX(24)], 4); - - ASSERT_EQ(molindex[GETIDX(1)], 0); - ASSERT_EQ(molindex[GETIDX(2)], 0); - ASSERT_EQ(molindex[GETIDX(3)], 0); - ASSERT_EQ(molindex[GETIDX(4)], 0); - ASSERT_EQ(molindex[GETIDX(5)], 0); - ASSERT_EQ(molindex[GETIDX(6)], 0); - ASSERT_EQ(molindex[GETIDX(11)], -1); - ASSERT_EQ(molindex[GETIDX(12)], -1); - ASSERT_EQ(molindex[GETIDX(13)], 0); - ASSERT_EQ(molindex[GETIDX(14)], 0); - ASSERT_EQ(molindex[GETIDX(15)], 0); - ASSERT_EQ(molindex[GETIDX(16)], 0); - ASSERT_EQ(molindex[GETIDX(17)], 0); - ASSERT_EQ(molindex[GETIDX(18)], 0); - ASSERT_EQ(molindex[GETIDX(23)], -1); - ASSERT_EQ(molindex[GETIDX(24)], -1); - ASSERT_EQ(molatom[GETIDX(1)], 0); - ASSERT_EQ(molatom[GETIDX(2)], 1); - ASSERT_EQ(molatom[GETIDX(3)], 2); - ASSERT_EQ(molatom[GETIDX(4)], 0); - ASSERT_EQ(molatom[GETIDX(5)], 1); - ASSERT_EQ(molatom[GETIDX(6)], 2); - ASSERT_EQ(molatom[GETIDX(11)], -1); - ASSERT_EQ(molatom[GETIDX(12)], -1); - ASSERT_EQ(molatom[GETIDX(13)], 0); - ASSERT_EQ(molatom[GETIDX(14)], 1); - ASSERT_EQ(molatom[GETIDX(15)], 2); - ASSERT_EQ(molatom[GETIDX(16)], 0); - ASSERT_EQ(molatom[GETIDX(17)], 1); - ASSERT_EQ(molatom[GETIDX(18)], 2); - ASSERT_EQ(molatom[GETIDX(23)], -1); - ASSERT_EQ(molatom[GETIDX(24)], -1); - - BEGIN_HIDE_OUTPUT(); - command("reset_atoms id"); - END_HIDE_OUTPUT(); - ASSERT_EQ(lmp->atom->tag_consecutive(), 1); - ASSERT_EQ(lmp->atom->map_tag_max, 16); - - type = lmp->atom->type; - molecule = lmp->atom->molecule; - molindex = lmp->atom->molindex; - molatom = lmp->atom->molatom; - ASSERT_EQ(type[GETIDX(1)], 1); - ASSERT_EQ(type[GETIDX(2)], 2); - ASSERT_EQ(type[GETIDX(3)], 2); - ASSERT_EQ(type[GETIDX(4)], 1); - ASSERT_EQ(type[GETIDX(5)], 2); - ASSERT_EQ(type[GETIDX(6)], 2); - ASSERT_EQ(type[GETIDX(7)], 4); - ASSERT_EQ(type[GETIDX(8)], 3); - ASSERT_EQ(type[GETIDX(9)], 1); - ASSERT_EQ(type[GETIDX(10)], 2); - ASSERT_EQ(type[GETIDX(11)], 2); - ASSERT_EQ(type[GETIDX(12)], 1); - ASSERT_EQ(type[GETIDX(13)], 2); - ASSERT_EQ(type[GETIDX(14)], 2); - ASSERT_EQ(type[GETIDX(15)], 4); - ASSERT_EQ(type[GETIDX(16)], 3); - ASSERT_EQ(molatom[GETIDX(1)], 0); - ASSERT_EQ(molatom[GETIDX(2)], 1); - ASSERT_EQ(molatom[GETIDX(3)], 2); - ASSERT_EQ(molatom[GETIDX(4)], 0); - ASSERT_EQ(molatom[GETIDX(5)], 1); - ASSERT_EQ(molatom[GETIDX(6)], 2); - ASSERT_EQ(molatom[GETIDX(7)], -1); - ASSERT_EQ(molatom[GETIDX(8)], -1); - ASSERT_EQ(molatom[GETIDX(9)], 0); - ASSERT_EQ(molatom[GETIDX(10)], 1); - ASSERT_EQ(molatom[GETIDX(11)], 2); - ASSERT_EQ(molatom[GETIDX(12)], 0); - ASSERT_EQ(molatom[GETIDX(13)], 1); - ASSERT_EQ(molatom[GETIDX(14)], 2); - ASSERT_EQ(molatom[GETIDX(15)], -1); - ASSERT_EQ(molatom[GETIDX(16)], -1); -} - -TEST_F(AtomStyleTest, template_charge) -{ - if (!LAMMPS::is_installed_pkg("MOLECULE")) GTEST_SKIP(); - BEGIN_HIDE_OUTPUT(); - command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("atom_style hybrid template twomols charge"); - command("newton on"); - END_HIDE_OUTPUT(); - - AtomState expected; - expected.atom_style = "hybrid"; - expected.molecular = Atom::TEMPLATE; - expected.nbondtypes = 2; - expected.nangletypes = 2; - expected.tag_enable = 1; - expected.has_type = true; - expected.has_mask = true; - expected.has_image = true; - expected.has_x = true; - expected.has_v = true; - expected.has_f = true; - expected.molecule_flag = 1; - expected.molindex_flag = 1; - expected.molatom_flag = 1; expected.q_flag = 1; + expected.molecule_flag = 1; + expected.molindex_flag = 1; + expected.molatom_flag = 1; expected.nmolecule = 2; expected.map_style = 3; ASSERT_ATOM_STATE_EQ(lmp->atom, expected); - auto *hybrid = dynamic_cast(lmp->atom->avec); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); - ASSERT_EQ(hybrid->nstyles, 2); - ASSERT_THAT(std::string(hybrid->keywords[0]), Eq("template")); - ASSERT_THAT(std::string(hybrid->keywords[1]), Eq("charge")); - ASSERT_NE(hybrid->styles[0], nullptr); - ASSERT_NE(hybrid->styles[1], nullptr); - BEGIN_HIDE_OUTPUT(); command("create_box 4 box bond/types 2 angle/types 2 "); command("create_atoms 0 single -2.0 2.0 0.1 mol twomols 65234"); @@ -3167,9 +2764,6 @@ TEST_F(AtomStyleTest, template_charge) command("mass 2 1.0"); command("mass 3 12.0"); command("mass 4 16.0"); - command("set atom 10 charge 0.7"); - command("set atom 11 charge -0.35"); - command("set atom 12 charge -0.35"); command("bond_style zero"); command("bond_coeff 1 1.0"); command("bond_coeff 2 1.16"); @@ -3177,15 +2771,8 @@ TEST_F(AtomStyleTest, template_charge) command("angle_coeff * 109.0"); command("pair_coeff * *"); END_HIDE_OUTPUT(); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); ASSERT_NE(lmp->atom->avec, nullptr); - hybrid = dynamic_cast(lmp->atom->avec); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); - ASSERT_EQ(hybrid->nstyles, 2); - ASSERT_THAT(std::string(hybrid->keywords[0]), Eq("template")); - ASSERT_THAT(std::string(hybrid->keywords[1]), Eq("charge")); - ASSERT_NE(hybrid->styles[0], nullptr); - ASSERT_NE(hybrid->styles[1], nullptr); - ASSERT_EQ(lmp->atom->natoms, 12); ASSERT_EQ(lmp->atom->nbonds, 6); ASSERT_EQ(lmp->atom->nbondtypes, 2); @@ -3197,7 +2784,6 @@ TEST_F(AtomStyleTest, template_charge) ASSERT_NE(lmp->atom->nmax, -1); ASSERT_EQ(lmp->atom->tag_enable, 1); ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); - ASSERT_EQ(lmp->atom->q_flag, 1); ASSERT_EQ(lmp->atom->ntypes, 4); ASSERT_EQ(lmp->atom->nextra_grow, 0); ASSERT_EQ(lmp->atom->nextra_restart, 0); @@ -3220,14 +2806,14 @@ TEST_F(AtomStyleTest, template_charge) command("units real"); command("newton off on"); command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("atom_style hybrid template twomols charge"); + command("atom_style template twomols"); command("pair_style zero 4.0"); command("bond_style zero"); command("angle_style zero"); command("atom_modify map array"); command("read_data test_atom_styles.data"); END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); ASSERT_NE(lmp->atom->avec, nullptr); ASSERT_EQ(lmp->atom->natoms, 12); @@ -3292,14 +2878,14 @@ TEST_F(AtomStyleTest, template_charge) command("clear"); command("units real"); command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); - command("atom_style hybrid template twomols charge"); + command("atom_style template twomols"); command("pair_style zero 4.0"); command("bond_style zero"); command("angle_style zero"); command("atom_modify map array"); command("read_data test_atom_styles.data"); END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); ASSERT_NE(lmp->atom->avec, nullptr); ASSERT_EQ(lmp->atom->natoms, 12); @@ -3351,7 +2937,6 @@ TEST_F(AtomStyleTest, template_charge) auto *x = lmp->atom->x; auto *v = lmp->atom->v; auto *type = lmp->atom->type; - auto *q = lmp->atom->q; EXPECT_NEAR(x[GETIDX(10)][0], 2.0, EPSILON); EXPECT_NEAR(x[GETIDX(10)][1], -2.0, EPSILON); @@ -3398,18 +2983,6 @@ TEST_F(AtomStyleTest, template_charge) EXPECT_NEAR(v[GETIDX(12)][0], 0.0, EPSILON); EXPECT_NEAR(v[GETIDX(12)][1], 0.0, EPSILON); EXPECT_NEAR(v[GETIDX(12)][2], 0.0, EPSILON); - EXPECT_NEAR(q[GETIDX(1)], -0.8472, EPSILON); - EXPECT_NEAR(q[GETIDX(2)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(3)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(4)], -0.8472, EPSILON); - EXPECT_NEAR(q[GETIDX(5)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(6)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(7)], -0.8472, EPSILON); - EXPECT_NEAR(q[GETIDX(8)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(9)], 0.4236, EPSILON); - EXPECT_NEAR(q[GETIDX(10)], 0.7, EPSILON); - EXPECT_NEAR(q[GETIDX(11)], -0.35, EPSILON); - EXPECT_NEAR(q[GETIDX(12)], -0.35, EPSILON); ASSERT_EQ(type[GETIDX(1)], 1); ASSERT_EQ(type[GETIDX(2)], 2); ASSERT_EQ(type[GETIDX(3)], 2); @@ -3432,18 +3005,18 @@ TEST_F(AtomStyleTest, template_charge) command("read_restart test_atom_styles.restart"); command("replicate 1 1 2 bbox"); END_HIDE_OUTPUT(); - ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("template")); ASSERT_NE(lmp->atom->avec, nullptr); ASSERT_EQ(lmp->atom->natoms, 16); - + ASSERT_EQ(lmp->atom->nbonds, 8); + ASSERT_EQ(lmp->atom->nangles, 4); ASSERT_EQ(lmp->atom->nlocal, 16); ASSERT_EQ(lmp->atom->nghost, 0); ASSERT_NE(lmp->atom->nmax, -1); ASSERT_EQ(lmp->atom->tag_enable, 1); + ASSERT_EQ(lmp->atom->q_flag, 1); ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); ASSERT_EQ(lmp->atom->ntypes, 4); - ASSERT_EQ(lmp->atom->nbonds, 8); - ASSERT_EQ(lmp->atom->nangles, 4); ASSERT_EQ(lmp->atom->tag_consecutive(), 0); ASSERT_EQ(lmp->atom->map_tag_max, 24); From 5ea26e6cc1b5052d40e461f3fb64d624b2c081dd Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 20:07:04 -0400 Subject: [PATCH 019/294] added RegSphereKokkos to bugfix dynamic_cast in FixWallRegionKokkos --- src/KOKKOS/region_sphere_kokkos.cpp | 168 ++++++++++++++++++++++++++++ src/KOKKOS/region_sphere_kokkos.h | 66 +++++++++++ 2 files changed, 234 insertions(+) create mode 100644 src/KOKKOS/region_sphere_kokkos.cpp create mode 100644 src/KOKKOS/region_sphere_kokkos.h diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp new file mode 100644 index 0000000000..e27b5ef14c --- /dev/null +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -0,0 +1,168 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail.com) +------------------------------------------------------------------------- */ + +#include "region_sphere_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +RegSphereKokkos::RegSphereKokkos(LAMMPS *lmp, int narg, char **arg) + : RegSphere(lmp, narg, arg) +{ + atomKK = (AtomKokkos*) atom; +} + +/* ---------------------------------------------------------------------- + inside = 1 if x,y,z is inside or on surface + inside = 0 if x,y,z is outside and not on surface +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegSphereKokkos::k_inside(double x, double y, double z) const +{ + const double delx = x - xc; + const double dely = y - yc; + const double delz = z - zc; + const double r = sqrt(delx * delx + dely * dely + delz * delz); + + if (r <= radius) return 1; + return 0; +} + +template +void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) +{ + groupbit = groupbit_in; + d_match = k_match_in.template view(); + + auto execution_space = ExecutionSpaceFromDevice::space; + atomKK->sync(execution_space, X_MASK | MASK_MASK); + + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + + copymode = 1; + + Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA( const int &i ) { + if (d_mask[i] & groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); + d_match[i] = match(x_tmp,y_tmp,z_tmp); + }}); + + copymode = 0; + + k_match_in.template modify(); +} + +/* ---------------------------------------------------------------------- + determine if point x,y,z is a match to region volume + XOR computes 0 if 2 args are the same, 1 if different + note that k_inside() returns 1 for points on surface of region + thus point on surface of exterior region will not match + if region has variable shape, invoke shape_update() once per timestep + if region is dynamic, apply inverse transform to x,y,z + unmove first, then unrotate, so don't have to change rotation point + caller is responsible for wrapping this call with + modify->clearstep_compute() and modify->addstep_compute() if needed +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegSphereKokkos::match(double x, double y, double z) const +{ + if (dynamic) inverse_transform(x,y,z); + return !(k_inside(x,y,z) ^ interior); +} + +/* ---------------------------------------------------------------------- + transform a point x,y,z in moved space back to region space + undisplace first, then unrotate (around original P) +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegSphereKokkos::inverse_transform(double &x, double &y, double &z) const +{ + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); +} + +/* ---------------------------------------------------------------------- + rotate x,y,z by angle via right-hand rule around point and runit normal + sign of angle determines whether rotating forward/backward in time + return updated x,y,z + R = vector axis of rotation + P = point = point to rotate around + R0 = runit = unit vector for R + X0 = x,y,z = initial coord of atom + D = X0 - P = vector from P to X0 + C = (D dot R0) R0 = projection of D onto R, i.e. Dparallel + A = D - C = vector from R line to X0, i.e. Dperp + B = R0 cross A = vector perp to A in plane of rotation, same len as A + A,B define plane of circular rotation around R line + new x,y,z = P + C + A cos(angle) + B sin(angle) +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegSphereKokkos::rotate(double &x, double &y, double &z, double angle) const +{ + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; +} + +namespace LAMMPS_NS { +template class RegSphereKokkos; +#ifdef LMP_KOKKOS_GPU +template class RegSphereKokkos; +#endif +} + diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h new file mode 100644 index 0000000000..8ccd6217bf --- /dev/null +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -0,0 +1,66 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef REGION_CLASS +// clang-format off +RegionStyle(sphere/kk,RegSphereKokkos); +RegionStyle(sphere/kk/device,RegSphereKokkos); +RegionStyle(sphere/kk/host,RegSphereKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_REGION_SPHERE_KOKKOS_H +#define LMP_REGION_SPHERE_KOKKOS_H + +#include "region_sphere.h" +#include "kokkos_base.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class RegSphereKokkos : public RegSphere, public KokkosBase { + friend class FixPour; + + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + RegSphereKokkos(class LAMMPS *, int, char **); + + void match_all_kokkos(int, DAT::tdual_int_1d) override; + + //KOKKOS_INLINE_FUNCTION + //void operator()(TagRegBlockMatchAll, const int&) const; + + private: + int groupbit; + typename AT::t_int_1d d_match; + + KOKKOS_INLINE_FUNCTION + int k_inside(double, double, double) const; + KOKKOS_INLINE_FUNCTION + int match(double, double, double) const; + KOKKOS_INLINE_FUNCTION + void inverse_transform(double &, double &, double &) const; + KOKKOS_INLINE_FUNCTION + void rotate(double &, double &, double &, double) const; + +}; + +} + +#endif +#endif + From 623e1d68e53a55177d9388f369510215ecb385cc Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 20:07:14 -0400 Subject: [PATCH 020/294] oops --- src/region_sphere.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/region_sphere.h b/src/region_sphere.h index f0e0bd195c..1fdda7fd29 100644 --- a/src/region_sphere.h +++ b/src/region_sphere.h @@ -36,7 +36,7 @@ class RegSphere : public Region { void set_velocity_shape() override; void velocity_contact_shape(double *, double *) override; - private: + protected: // KOKKOS subclass needs protected not private double xc, yc, zc; double radius; int xstyle, xvar; From d07e1b918e0b2f35db55d85d1b93e24c1412d121 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 20:07:43 -0400 Subject: [PATCH 021/294] update email and fix typo --- src/KOKKOS/fix_wall_region_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 96bf968293..1675cee0ce 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio@gmail.com) + Contributing author: Mitch Murphy (alphataubio at gmail.com) ------------------------------------------------------------------------- */ #include "fix_wall_region_kokkos.h" @@ -83,7 +83,7 @@ void FixWallRegionKokkos::post_force(int vflag) int nlocal = atomKK->nlocal; region->prematch(); - DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); + DAT::tdual_int_1d k_match = DAT::tdual_int_1d("wall_region:k_match",nlocal); KokkosBase* regionKKBase = dynamic_cast(region); regionKKBase->match_all_kokkos(groupbit,k_match); k_match.template sync(); From 4a115d66e026a21a5d0661c712acc327050bb83d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 21 Aug 2024 20:54:08 -0400 Subject: [PATCH 022/294] update region doc for sphere/kk --- doc/src/region.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/region.rst b/doc/src/region.rst index 9d2af01de1..e665c87a0d 100644 --- a/doc/src/region.rst +++ b/doc/src/region.rst @@ -73,7 +73,7 @@ Syntax Rx,Ry,Rz = axis of rotation vector *open* value = integer from 1-6 corresponding to face index (see below) -* accelerated styles (with same args) = *block/kk* +* accelerated styles (with same args) = *block/kk, sphere/kk* Examples """""""" @@ -399,9 +399,9 @@ sub-regions can be defined with the *open* keyword. .. note:: - Currently, only *block* style regions are supported by Kokkos. The + Currently, only *block* and *sphere* style regions are supported by KOKKOS. The code using the region (such as a fix or compute) must also be - supported by Kokkos or no acceleration will occur. + supported by KOKKOS or no acceleration will occur. ---------- From 40cae6e79aa7efc0b6e9890beea7f107b82c9224 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 22 Aug 2024 21:06:04 -0400 Subject: [PATCH 023/294] first draft charmm_c36_jul24.gz to LAMMPS data converter --- tools/charmm36/charmm36.py | 188 +++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 tools/charmm36/charmm36.py diff --git a/tools/charmm36/charmm36.py b/tools/charmm36/charmm36.py new file mode 100644 index 0000000000..076700638b --- /dev/null +++ b/tools/charmm36/charmm36.py @@ -0,0 +1,188 @@ + +# python3 charmm36.py | gzip -9 > charmm_c36_jul24.gz + +################################################################ +# FIXME: dihedral weighting factor +################################################################ + +import re + +mass = dict() + +# BONDS +# V(bond) = Kb(b - b0)**2 +# Kb: kcal/mole/A**2 +# b0: A +bond = dict() + +# ANGLES +# V(angle) = Ktheta(Theta - Theta0)**2 +# V(Urey-Bradley) = Kub(S - S0)**2 +# Ktheta: kcal/mole/rad**2 +# Theta0: degrees +# Kub: kcal/mole/A**2 (Urey-Bradley) +# S0: A +angle = dict() + +# DIHEDRALS +# V(dihedral) = Kchi(1 + cos(n(chi) - delta)) +# Kchi: kcal/mole +# n: multiplicity +# delta: degrees +dihedral = dict() + +# IMPROPER +# V(improper) = Kpsi(psi - psi0)**2 +# Kpsi: kcal/mole/rad**2 +# psi0: degrees +# note that the second column of numbers (0) is ignored +improper = dict() + +# NONBONDED nbxmod 5 atom cdiel fshift vatom vdistance vfswitch - +# cutnb 14.0 ctofnb 12.0 ctonnb 10.0 eps 1.0 e14fac 1.0 wmin 1.5 +# V(Lennard-Jones) = Eps,i,j[(Rmin,i,j/ri,j)**12 - 2(Rmin,i,j/ri,j)**6] +# epsilon: kcal/mole, Eps,i,j = sqrt(eps,i * eps,j) +# Rmin/2: A, Rmin,i,j = Rmin/2,i + Rmin/2,j +# atom ignored epsilon Rmin/2 ignored eps,1-4 Rmin/2,1-4 +pair = dict() + +prms = [ + "par_all36m_prot.prm", + "par_all36_na.prm", + #"par_all36_carb.prm", + "par_all36_lipid.prm", + "par_all36_cgenff.prm", + #"toppar_all36_moreions.str", + #"toppar/par_interface.prm", + "toppar_water_ions.str"] + +#prms = ["par_all36_lipid.prm"] + +for prm in prms: + + file = open(prm, "r") + + for line in file: + + match = re.search(r"^MASS\s+-1\s+(\w+)\s+(-?\d+.\d+).*", line) + if( match != None ): + mass.update( {match.group(1): match.group(2)} ) + + match = re.search(r"^(\w+)\s+(\w+)\s+(\d+.\d+)\s+(\d+.\d+)\s+.*", line) + if( match != None ): + bond.update( {"{}-{}".format(match.group(1),match.group(2)) : + "{} {}".format(match.group(3),match.group(4))} ) + + match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+).*", line) + if( match != None ): + angle.update( {"{}-{}-{}".format(match.group(1),match.group(2),match.group(3)) : + "{} {} {} {}".format(match.group(4),match.group(5),match.group(6),match.group(7))} ) + + match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(-?\d+.\d+)\s+(\d+)\s+(\d+).*", line) + if( match != None ): + dihedral.update( {"{}-{}-{}-{}".format(match.group(1),match.group(2),match.group(3),match.group(4)) : + "{} {} {} 1.00".format(match.group(5),match.group(6),match.group(7))} ) + + match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\d+.\d+)\s+0\s+(\d+.\d+).*", line) + if( match != None ): + improper.update( {"{}-{}-{}-{}".format(match.group(1),match.group(2),match.group(3),match.group(4)) : + "{} {}".format(match.group(5),match.group(6))} ) + + file.close() + + +#44 atoms +#11 atom types +#42 bonds +#15 bond types +#74 angles +#29 angle types +#100 dihedrals +#36 dihedral types +#44 impropers +#13 improper types + +# Header + +print( "LAMMPS CHARMM36 force field (toppar_c36_jul24.tgz) [https://mackerell.umaryland.edu/charmm_ff.shtml]\n" ) + +print( " ", len(mass), " atom types" ) +print( " ", len(bond), " bond types" ) +print( " ", len(angle), " angle types" ) +print( " ", len(dihedral), " dihedral types" ) +print( " ", len(improper), " improper types" ) + +# -------- Atom Type Labels -------- +print( "\nAtom Type Labels\n" ) +i=1 +for k in mass.keys(): + print(" ", i, k) + i+=1 + +# -------- Masses -------- +print( "\nMasses\n" ) +i=1 +for v in mass.values(): + print(" ", i, v) + i+=1 + +# -------- Bond Type Labels -------- +print( "\nBond Type Labels\n" ) +i=1 +for k in bond.keys(): + print(" ", i, k) + i+=1 + +# -------- Bond Coeffs -------- +print( "\nBond Coeffs # harmonic\n" ) +i=1 +for v in bond.values(): + print(" ", i, v) + i+=1 + +# -------- Angle Type Labels -------- +print( "\nAngle Type Labels\n" ) +i=1 +for k in angle.keys(): + print(" ", i, k) + i+=1 + +# -------- Angle Coeffs -------- +print( "\nAngle Coeffs # charmm\n" ) +i=1 +for v in angle.values(): + print(" ", i, v) + i+=1 + +# -------- Dihedral Type Labels -------- +print( "\nDihedral Type Labels\n" ) +i=1 +for k in dihedral.keys(): + print(" ", i, k) + i+=1 + +# -------- Dihedral Coeffs -------- +print( "\nDihedral Coeffs # charmmfsw\n" ) +i=1 +for v in dihedral.values(): + print(" ", i, v) + i+=1 + +# -------- Improper Type Labels -------- +print( "\nImproper Type Labels\n" ) +i=1 +for k in improper.keys(): + print(" ", i, k) + i+=1 + +# -------- Improper Coeffs -------- +print( "\nImproper Coeffs # harmonic\n" ) +i=1 +for v in improper.values(): + print(" ", i, v) + i+=1 + + + + +# -------- Pair Coeffs -------- From 37c312bf8afdebe5dd0e7c105983df866853d4d5 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 24 Aug 2024 05:28:38 -0400 Subject: [PATCH 024/294] added pair coeffs --- tools/charmm36/charmm36.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/charmm36/charmm36.py b/tools/charmm36/charmm36.py index 076700638b..b003046c56 100644 --- a/tools/charmm36/charmm36.py +++ b/tools/charmm36/charmm36.py @@ -88,8 +88,18 @@ for prm in prms: improper.update( {"{}-{}-{}-{}".format(match.group(1),match.group(2),match.group(3),match.group(4)) : "{} {}".format(match.group(5),match.group(6))} ) - file.close() + match = re.search(r"^(\w+)\s+0.0+\s+-(\d+.\d+)\s+(\d+.\d+).*", line) + if( match != None ): + pair.update( {match.group(1) : + "{} {:.15} {} {:.15}".format(match.group(2),float(match.group(3))*1.7817974362806774,match.group(2),float(match.group(3))*1.7817974362806774)} ) + match = re.search(r"^(\w+)\s+0.0+\s+-(\d+.\d+)\s+(\d+.\d+)\s+0.0\s+-(\d+.\d+)\s+(\d+.\d+).*", line) + if( match != None ): + pair.update( {match.group(1) : + "{} {:.15} {} {:.15}".format(match.group(2),float(match.group(3))*1.7817974362806774,match.group(4),float(match.group(5))*1.7817974362806774)} ) + + + file.close() #44 atoms #11 atom types @@ -182,7 +192,10 @@ for v in improper.values(): print(" ", i, v) i+=1 - - - # -------- Pair Coeffs -------- +print( "\nPair Coeffs\n" ) +i=1 +for k in mass.keys(): + print(" ", i, pair[k]) + i+=1 + From d9151d745abb3dd2b0e119805624aa577cf87e4d Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 1 Aug 2024 17:11:06 +0100 Subject: [PATCH 025/294] Create fix qtpie/reaxff --- src/REAXFF/fix_qtpie_reaxff.cpp | 1160 +++++++++++++++++++++++++++++++ src/REAXFF/fix_qtpie_reaxff.h | 149 ++++ src/REAXFF/pair_reaxff.cpp | 6 +- src/fix_efield.h | 1 + 4 files changed, 1314 insertions(+), 2 deletions(-) create mode 100644 src/REAXFF/fix_qtpie_reaxff.cpp create mode 100644 src/REAXFF/fix_qtpie_reaxff.h diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp new file mode 100644 index 0000000000..695bdb4316 --- /dev/null +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -0,0 +1,1160 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Navraj S Lalli & Efstratios Kritikos (Imperial College London) +------------------------------------------------------------------------- */ + +#include "fix_qtpie_reaxff.h" + +#include "atom.h" +#include "citeme.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "fix_efield.h" +#include "force.h" +#include "group.h" +#include "memory.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "region.h" +#include "respa.h" +#include "text_file_reader.h" +#include "update.h" + +#include "pair_reaxff.h" +#include "reaxff_api.h" + +#include +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +static constexpr double EV_TO_KCAL_PER_MOL = 14.4; +static constexpr double SMALL = 1.0e-14; +static constexpr double QSUMSMALL = 0.00001; + +static const char cite_fix_qtpie_reaxff[] = + "fix qtpie/reaxff command: doi:https://doi.org/10.1016/j.cplett.2007.02.065\n\n" + "@article{chen2007qtpie,\n" + "title={QTPIE: Charge transfer with polarization current equalization. A fluctuating charge model with correct asymptotics},\n" + "author={Chen, Jiahao and Martinez, Todd J},\n" + "journal={Chemical physics letters},\n" + "volume={438},\n" + "number={4-6},\n" + "pages={315--320},\n" + "year={2007},\n" + "publisher={Elsevier}\n" + "}\n\n"; + +/* ---------------------------------------------------------------------- */ + +FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), matvecs(0), pertype_option(nullptr) +{ + scalar_flag = 1; + extscalar = 0; + imax = 200; + maxwarn = 1; + + if ((narg < 9) || (narg > 13)) error->all(FLERR,"Illegal fix {} command", style); + + nevery = utils::inumeric(FLERR,arg[3],false,lmp); + if (nevery <= 0) error->all(FLERR,"Illegal fix {} command", style); + + swa = utils::numeric(FLERR,arg[4],false,lmp); + swb = utils::numeric(FLERR,arg[5],false,lmp); + tolerance = utils::numeric(FLERR,arg[6],false,lmp); + pertype_option = utils::strdup(arg[7]); + + // dual CG support only available for OPENMP variant + // check for compatibility is in Fix::post_constructor() + + dual_enabled = 0; + + int iarg = 9; + while (iarg < narg) { + if (strcmp(arg[iarg],"dual") == 0) dual_enabled = 1; + else if (strcmp(arg[iarg],"nowarn") == 0) maxwarn = 0; + else if (strcmp(arg[iarg],"maxiter") == 0) { + if (iarg+1 > narg-1) + error->all(FLERR,"Illegal fix {} command", style); + imax = utils::numeric(FLERR,arg[iarg+1],false,lmp); + iarg++; + } else error->all(FLERR,"Illegal fix {} command", style); + iarg++; + } + shld = nullptr; + + nn = n_cap = 0; + nmax = 0; + m_fill = m_cap = 0; + pack_flag = 0; + s = nullptr; + t = nullptr; + nprev = 4; + + Hdia_inv = nullptr; + b_s = nullptr; + chi_field = nullptr; + b_t = nullptr; + b_prc = nullptr; + b_prm = nullptr; + + // CG + + p = nullptr; + q = nullptr; + r = nullptr; + d = nullptr; + + // H matrix + + H.firstnbr = nullptr; + H.numnbrs = nullptr; + H.jlist = nullptr; + H.val = nullptr; + + // dual CG support + // Update comm sizes for this fix + + if (dual_enabled) comm_forward = comm_reverse = 2; + else comm_forward = comm_reverse = 1; + + // perform initial allocation of atom-based arrays + // register with Atom class + + reaxff = dynamic_cast(force->pair_match("^reaxff",0)); + + s_hist = t_hist = nullptr; + atom->add_callback(Atom::GROW); +} + +/* ---------------------------------------------------------------------- */ + +FixQtpieReaxFF::~FixQtpieReaxFF() +{ + if (copymode) return; + + delete[] pertype_option; + + // unregister callbacks to this fix from Atom class + + atom->delete_callback(id,Atom::GROW); + + memory->destroy(s_hist); + memory->destroy(t_hist); + + FixQtpieReaxFF::deallocate_storage(); + FixQtpieReaxFF::deallocate_matrix(); + + memory->destroy(shld); + + if (!reaxflag) { + memory->destroy(chi); + memory->destroy(eta); + memory->destroy(gamma); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::post_constructor() +{ + if (lmp->citeme) lmp->citeme->add(cite_fix_qtpie_reaxff); + + grow_arrays(atom->nmax); + for (int i = 0; i < atom->nmax; i++) + for (int j = 0; j < nprev; ++j) + s_hist[i][j] = t_hist[i][j] = 0; + + pertype_parameters(pertype_option); + if (dual_enabled) + error->all(FLERR,"Dual keyword only supported with fix qeq/reaxff/omp"); +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::setmask() +{ + int mask = 0; + mask |= PRE_FORCE; + mask |= PRE_FORCE_RESPA; + mask |= MIN_PRE_FORCE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pertype_parameters(char *arg) +{ + const int nlocal = atom->nlocal; + const int *mask = atom->mask; + const int *type = atom->type; + + if (utils::strmatch(arg,"^reaxff")) { + reaxflag = 1; + Pair *pair = force->pair_match("^reaxff",0); + if (!pair) error->all(FLERR,"No reaxff pair style for fix qeq/reaxff"); + + int tmp, tmp_all; + chi = (double *) pair->extract("chi",tmp); + eta = (double *) pair->extract("eta",tmp); + gamma = (double *) pair->extract("gamma",tmp); + if ((chi == nullptr) || (eta == nullptr) || (gamma == nullptr)) + error->all(FLERR, "Fix qeq/reaxff could not extract all Qtpie parameters from pair reaxff"); + tmp = tmp_all = 0; + for (int i = 0; i < nlocal; ++i) { + if (mask[i] & groupbit) { + if ((chi[type[i]] == 0.0) && (eta[type[i]] == 0.0) && (gamma[type[i]] == 0.0)) + tmp = type[i]; + } + } + MPI_Allreduce(&tmp, &tmp_all, 1, MPI_INT, MPI_MAX, world); + if (tmp_all) + error->all(FLERR, "No Qtpie parameters for atom type {} provided by pair reaxff", tmp_all); + return; + } else if (utils::strmatch(arg,"^reax/c")) { + error->all(FLERR, "Fix qeq/reaxff keyword 'reax/c' is obsolete; please use 'reaxff'"); + } else if (platform::file_is_readable(arg)) { + ; // arg is readable file. will read below + } else { + error->all(FLERR, "Unknown fix qeq/reaxff keyword {}", arg); + } + + reaxflag = 0; + + const int ntypes = atom->ntypes; + memory->create(chi,ntypes+1,"qeq/reaxff:chi"); + memory->create(eta,ntypes+1,"qeq/reaxff:eta"); + memory->create(gamma,ntypes+1,"qeq/reaxff:gamma"); + + if (comm->me == 0) { + chi[0] = eta[0] = gamma[0] = 0.0; + try { + TextFileReader reader(arg,"qeq/reaxff parameter"); + reader.ignore_comments = false; + for (int i = 1; i <= ntypes; i++) { + const char *line = reader.next_line(); + if (!line) + throw TokenizerException("Fix qeq/reaxff: Invalid param file format",""); + ValueTokenizer values(line); + + if (values.count() != 4) + throw TokenizerException("Fix qeq/reaxff: Incorrect format of param file",""); + + int itype = values.next_int(); + if ((itype < 1) || (itype > ntypes)) + throw TokenizerException("Fix qeq/reaxff: invalid atom type in param file", + std::to_string(itype)); + + chi[itype] = values.next_double(); + eta[itype] = values.next_double(); + gamma[itype] = values.next_double(); + } + } catch (std::exception &e) { + error->one(FLERR,e.what()); + } + } + + MPI_Bcast(chi,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(eta,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(gamma,ntypes+1,MPI_DOUBLE,0,world); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::allocate_storage() +{ + nmax = atom->nmax; + + memory->create(s,nmax,"qeq:s"); + memory->create(t,nmax,"qeq:t"); + + memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); + memory->create(b_s,nmax,"qeq:b_s"); + memory->create(chi_field,nmax,"qeq:chi_field"); + memory->create(b_t,nmax,"qeq:b_t"); + memory->create(b_prc,nmax,"qeq:b_prc"); + memory->create(b_prm,nmax,"qeq:b_prm"); + + // dual CG support + int size = nmax; + if (dual_enabled) size*= 2; + + memory->create(p,size,"qeq:p"); + memory->create(q,size,"qeq:q"); + memory->create(r,size,"qeq:r"); + memory->create(d,size,"qeq:d"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::deallocate_storage() +{ + memory->destroy(s); + memory->destroy(t); + + memory->destroy(Hdia_inv); + memory->destroy(b_s); + memory->destroy(b_t); + memory->destroy(b_prc); + memory->destroy(b_prm); + memory->destroy(chi_field); + + memory->destroy(p); + memory->destroy(q); + memory->destroy(r); + memory->destroy(d); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::reallocate_storage() +{ + deallocate_storage(); + allocate_storage(); + init_storage(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::allocate_matrix() +{ + int i,ii,m; + + int mincap; + double safezone; + + if (reaxflag) { + mincap = reaxff->api->system->mincap; + safezone = reaxff->api->system->safezone; + } else { + mincap = REAX_MIN_CAP; + safezone = REAX_SAFE_ZONE; + } + + n_cap = MAX((int)(atom->nlocal * safezone), mincap); + + // determine the total space for the H matrix + + m = 0; + for (ii = 0; ii < nn; ii++) { + i = ilist[ii]; + m += numneigh[i]; + } + m_cap = MAX((int)(m * safezone), mincap * REAX_MIN_NBRS); + + H.n = n_cap; + H.m = m_cap; + memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); + memory->create(H.jlist,m_cap,"qeq:H.jlist"); + memory->create(H.val,m_cap,"qeq:H.val"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::deallocate_matrix() +{ + memory->destroy(H.firstnbr); + memory->destroy(H.numnbrs); + memory->destroy(H.jlist); + memory->destroy(H.val); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::reallocate_matrix() +{ + deallocate_matrix(); + allocate_matrix(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init() +{ + if (!atom->q_flag) + error->all(FLERR,"Fix {} requires atom attribute q", style); + + if (group->count(igroup) == 0) + error->all(FLERR,"Fix {} group has no atoms", style); + + // compute net charge and print warning if too large + + double qsum_local = 0.0, qsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + if (atom->mask[i] & groupbit) + qsum_local += atom->q[i]; + } + MPI_Allreduce(&qsum_local,&qsum,1,MPI_DOUBLE,MPI_SUM,world); + + if ((comm->me == 0) && (fabs(qsum) > QSUMSMALL)) + error->warning(FLERR,"Fix {} group is not charge neutral, net charge = {:.8}", style, qsum); + + // get pointer to fix efield if present. there may be at most one instance of fix efield in use. + + efield = nullptr; + auto fixes = modify->get_fix_by_style("^efield"); + if (fixes.size() == 1) efield = dynamic_cast(fixes.front()); + else if (fixes.size() > 1) + error->all(FLERR, "There may be only one fix efield instance used with fix {}", style); + + // ensure that fix efield is properly initialized before accessing its data and check some settings + if (efield) { + efield->init(); + if (strcmp(update->unit_style,"real") != 0) + error->all(FLERR,"Must use unit_style real with fix {} and external fields", style); + + if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM) + error->all(FLERR,"Atom-style external electric field requires atom-style " + "potential variable when used with fix {}", style); + if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) || + ((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) || + ((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic)) + error->all(FLERR,"Must not have electric field component in direction of periodic " + "boundary when using charge equilibration with ReaxFF."); + if (((fabs(efield->ex) > SMALL) && domain->xperiodic) || + ((fabs(efield->ey) > SMALL) && domain->yperiodic) || + ((fabs(efield->ez) > SMALL) && domain->zperiodic)) + error->all(FLERR,"Must not have electric field component in direction of periodic " + "boundary when using charge equilibration with ReaxFF."); + } + + // we need a half neighbor list w/ Newton off + // built whenever re-neighboring occurs + + neighbor->add_request(this, NeighConst::REQ_NEWTON_OFF); + + init_shielding(); + init_taper(); + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::compute_scalar() +{ + return matvecs/2.0; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_shielding() +{ + int i,j; + int ntypes; + + ntypes = atom->ntypes; + if (shld == nullptr) + memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); + + for (i = 1; i <= ntypes; ++i) + for (j = 1; j <= ntypes; ++j) + shld[i][j] = pow(gamma[i] * gamma[j], -1.5); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_taper() +{ + double d7, swa2, swa3, swb2, swb3; + + if (fabs(swa) > 0.01 && comm->me == 0) + error->warning(FLERR,"Fix qeq/reaxff has non-zero lower Taper radius cutoff"); + if (swb < 0) + error->all(FLERR, "Fix qeq/reaxff has negative upper Taper radius cutoff"); + else if (swb < 5 && comm->me == 0) + error->warning(FLERR,"Fix qeq/reaxff has very low Taper radius cutoff"); + + d7 = pow(swb - swa, 7); + swa2 = SQR(swa); + swa3 = CUBE(swa); + swb2 = SQR(swb); + swb3 = CUBE(swb); + + Tap[7] = 20.0 / d7; + Tap[6] = -70.0 * (swa + swb) / d7; + Tap[5] = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7; + Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7; + Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7; + Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7; + Tap[1] = 140.0 * swa3 * swb3 / d7; + Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 - + 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::setup_pre_force(int vflag) +{ + if (reaxff) { + nn = reaxff->list->inum; + ilist = reaxff->list->ilist; + numneigh = reaxff->list->numneigh; + firstneigh = reaxff->list->firstneigh; + } else { + nn = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + + deallocate_storage(); + allocate_storage(); + + init_storage(); + + deallocate_matrix(); + allocate_matrix(); + + pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::setup_pre_force_respa(int vflag, int ilevel) +{ + if (ilevel < nlevels_respa-1) return; + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::min_setup_pre_force(int vflag) +{ + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_storage() +{ + if (efield) get_chi_field(); + + for (int ii = 0; ii < nn; ii++) { + int i = ilist[ii]; + if (atom->mask[i] & groupbit) { + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi[atom->type[i]]; + if (efield) b_s[i] -= chi_field[i]; + b_t[i] = -1.0; + b_prc[i] = 0; + b_prm[i] = 0; + s[i] = t[i] = 0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pre_force(int /*vflag*/) +{ + if (update->ntimestep % nevery) return; + + int n = atom->nlocal; + + if (reaxff) { + nn = reaxff->list->inum; + ilist = reaxff->list->ilist; + numneigh = reaxff->list->numneigh; + firstneigh = reaxff->list->firstneigh; + } else { + nn = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + + // grow arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) reallocate_storage(); + if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) + reallocate_matrix(); + + if (efield) get_chi_field(); + + init_matvec(); + + matvecs_s = CG(b_s, s); // CG on s - parallel + matvecs_t = CG(b_t, t); // CG on t - parallel + matvecs = matvecs_s + matvecs_t; + + calculate_Q(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::pre_force_respa(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::min_pre_force(int vflag) +{ + pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::init_matvec() +{ + /* fill-in H matrix */ + compute_H(); + + int ii, i; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi[atom->type[i]]; + if (efield) b_s[i] -= chi_field[i]; + b_t[i] = -1.0; + + /* quadratic extrapolation for s & t from previous solutions */ + t[i] = t_hist[i][2] + 3 * (t_hist[i][0] - t_hist[i][1]); + + /* cubic extrapolation for s & t from previous solutions */ + s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); + } + } + + pack_flag = 2; + comm->forward_comm(this); //Dist_vector(s); + pack_flag = 3; + comm->forward_comm(this); //Dist_vector(t); +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::compute_H() +{ + int jnum; + int i, j, ii, jj, flag; + double dx, dy, dz, r_sqr; + constexpr double EPSILON = 0.0001; + + int *type = atom->type; + tagint *tag = atom->tag; + double **x = atom->x; + int *mask = atom->mask; + + // fill in the H matrix + m_fill = 0; + r_sqr = 0; + for (ii = 0; ii < nn; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + H.firstnbr[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + dx = x[j][0] - x[i][0]; + dy = x[j][1] - x[i][1]; + dz = x[j][2] - x[i][2]; + r_sqr = SQR(dx) + SQR(dy) + SQR(dz); + + flag = 0; + if (r_sqr <= SQR(swb)) { + if (j < atom->nlocal) flag = 1; + else if (tag[i] < tag[j]) flag = 1; + else if (tag[i] == tag[j]) { + if (dz > EPSILON) flag = 1; + else if (fabs(dz) < EPSILON) { + if (dy > EPSILON) flag = 1; + else if (fabs(dy) < EPSILON && dx > EPSILON) + flag = 1; + } + } + } + + if (flag) { + H.jlist[m_fill] = j; + H.val[m_fill] = calculate_H(sqrt(r_sqr), shld[type[i]][type[j]]); + m_fill++; + } + } + H.numnbrs[i] = m_fill - H.firstnbr[i]; + } + } + + if (m_fill >= H.m) + error->all(FLERR,"Fix qeq/reaxff H matrix size has been exceeded: m_fill={} H.m={}\n", + m_fill, H.m); +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::calculate_H(double r, double gamma) +{ + double Taper, denom; + + Taper = Tap[7] * r + Tap[6]; + Taper = Taper * r + Tap[5]; + Taper = Taper * r + Tap[4]; + Taper = Taper * r + Tap[3]; + Taper = Taper * r + Tap[2]; + Taper = Taper * r + Tap[1]; + Taper = Taper * r + Tap[0]; + + denom = r * r * r + gamma; + denom = pow(denom,1.0/3.0); + + return Taper * EV_TO_KCAL_PER_MOL / denom; +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::CG(double *b, double *x) +{ + int i, j; + double tmp, alpha, beta, b_norm; + double sig_old, sig_new; + + int jj; + + pack_flag = 1; + sparse_matvec(&H, x, q); + comm->reverse_comm(this); //Coll_Vector(q); + + vector_sum(r , 1., b, -1., q, nn); + + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) + d[j] = r[j] * Hdia_inv[j]; //pre-condition + } + + b_norm = parallel_norm(b, nn); + sig_new = parallel_dot(r, d, nn); + + for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) { + comm->forward_comm(this); //Dist_vector(d); + sparse_matvec(&H, d, q); + comm->reverse_comm(this); //Coll_vector(q); + + tmp = parallel_dot(d, q, nn); + alpha = sig_new / tmp; + + vector_add(x, alpha, d, nn); + vector_add(r, -alpha, q, nn); + + // pre-conditioning + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) + p[j] = r[j] * Hdia_inv[j]; + } + + sig_old = sig_new; + sig_new = parallel_dot(r, p, nn); + + beta = sig_new / sig_old; + vector_sum(d, 1., p, beta, d, nn); + } + + if ((i >= imax) && maxwarn && (comm->me == 0)) + error->warning(FLERR, "Fix qeq/reaxff CG convergence failed after {} iterations at step {}", + i,update->ntimestep); + return i; +} + + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::sparse_matvec(sparse_matrix *A, double *x, double *b) +{ + int i, j, itr_j; + int ii; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + b[i] = eta[atom->type[i]] * x[i]; + } + + int nall = atom->nlocal + atom->nghost; + for (i = atom->nlocal; i < nall; ++i) + b[i] = 0; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + for (itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + b[i] += A->val[itr_j] * x[j]; + b[j] += A->val[itr_j] * x[i]; + } + } + } + +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::calculate_Q() +{ + int i, k; + double u, s_sum, t_sum; + double *q = atom->q; + + int ii; + + s_sum = parallel_vector_acc(s, nn); + t_sum = parallel_vector_acc(t, nn); + u = s_sum / t_sum; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + q[i] = s[i] - u * t[i]; + + /* backup s & t */ + for (k = nprev-1; k > 0; --k) { + s_hist[i][k] = s_hist[i][k-1]; + t_hist[i][k] = t_hist[i][k-1]; + } + s_hist[i][0] = s[i]; + t_hist[i][0] = t[i]; + } + } + + pack_flag = 4; + comm->forward_comm(this); //Dist_vector(atom->q); +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_forward_comm(int n, int *list, double *buf, + int /*pbc_flag*/, int * /*pbc*/) +{ + int m; + + if (pack_flag == 1) + for (m = 0; m < n; m++) buf[m] = d[list[m]]; + else if (pack_flag == 2) + for (m = 0; m < n; m++) buf[m] = s[list[m]]; + else if (pack_flag == 3) + for (m = 0; m < n; m++) buf[m] = t[list[m]]; + else if (pack_flag == 4) + for (m = 0; m < n; m++) buf[m] = atom->q[list[m]]; + else if (pack_flag == 5) { + m = 0; + for (int i = 0; i < n; i++) { + int j = 2 * list[i]; + buf[m++] = d[j]; + buf[m++] = d[j+1]; + } + return m; + } + return n; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::unpack_forward_comm(int n, int first, double *buf) +{ + int i, m; + + if (pack_flag == 1) + for (m = 0, i = first; m < n; m++, i++) d[i] = buf[m]; + else if (pack_flag == 2) + for (m = 0, i = first; m < n; m++, i++) s[i] = buf[m]; + else if (pack_flag == 3) + for (m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; + else if (pack_flag == 4) + for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; + else if (pack_flag == 5) { + int last = first + n; + m = 0; + for (i = first; i < last; i++) { + int j = 2 * i; + d[j] = buf[m++]; + d[j+1] = buf[m++]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_reverse_comm(int n, int first, double *buf) +{ + int i, m; + if (pack_flag == 5) { + m = 0; + int last = first + n; + for (i = first; i < last; i++) { + int indxI = 2 * i; + buf[m++] = q[indxI]; + buf[m++] = q[indxI+1]; + } + return m; + } else { + for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; + return n; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::unpack_reverse_comm(int n, int *list, double *buf) +{ + if (pack_flag == 5) { + int m = 0; + for (int i = 0; i < n; i++) { + int indxI = 2 * list[i]; + q[indxI] += buf[m++]; + q[indxI+1] += buf[m++]; + } + } else { + for (int m = 0; m < n; m++) q[list[m]] += buf[m]; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixQtpieReaxFF::memory_usage() +{ + double bytes; + + bytes = (double)atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist + bytes += (double)atom->nmax*11 * sizeof(double); // storage + bytes += (double)n_cap*2 * sizeof(int); // matrix... + bytes += (double)m_cap * sizeof(int); + bytes += (double)m_cap * sizeof(double); + + if (dual_enabled) + bytes += (double)atom->nmax*4 * sizeof(double); // double size for q, d, r, and p + + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate fictitious charge arrays +------------------------------------------------------------------------- */ + +void FixQtpieReaxFF::grow_arrays(int nmax) +{ + memory->grow(s_hist,nmax,nprev,"qeq:s_hist"); + memory->grow(t_hist,nmax,nprev,"qeq:t_hist"); +} + +/* ---------------------------------------------------------------------- + copy values within fictitious charge arrays +------------------------------------------------------------------------- */ + +void FixQtpieReaxFF::copy_arrays(int i, int j, int /*delflag*/) +{ + for (int m = 0; m < nprev; m++) { + s_hist[j][m] = s_hist[i][m]; + t_hist[j][m] = t_hist[i][m]; + } +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +int FixQtpieReaxFF::pack_exchange(int i, double *buf) +{ + for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; + for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +int FixQtpieReaxFF::unpack_exchange(int nlocal, double *buf) +{ + for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m]; + for (int m = 0; m < nprev; m++) t_hist[nlocal][m] = buf[nprev+m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_norm(double *v, int n) +{ + int i; + double my_sum, norm_sqr; + + int ii; + + my_sum = 0.0; + norm_sqr = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_sum += SQR(v[i]); + } + + MPI_Allreduce(&my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world); + + return sqrt(norm_sqr); +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_dot(double *v1, double *v2, int n) +{ + int i; + double my_dot, res; + + int ii; + + my_dot = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_dot += v1[i] * v2[i]; + } + + MPI_Allreduce(&my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::parallel_vector_acc(double *v, int n) +{ + int i; + double my_acc, res; + + int ii; + + my_acc = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + my_acc += v[i]; + } + + MPI_Allreduce(&my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::vector_sum(double* dest, double c, double* v, + double d, double* y, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) + dest[kk] = c * v[kk] + d * y[kk]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::vector_add(double* dest, double c, double* v, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) + dest[kk] += c * v[kk]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQtpieReaxFF::get_chi_field() +{ + memset(&chi_field[0],0,atom->nmax*sizeof(double)); + if (!efield) return; + + const auto x = (const double * const *)atom->x; + const int *mask = atom->mask; + const imageint *image = atom->image; + const int nlocal = atom->nlocal; + + + // update electric field region if necessary + + Region *region = efield->region; + if (region) region->prematch(); + + // efield energy is in real units of kcal/mol/angstrom, need to convert to eV + + const double qe2f = force->qe2f; + const double factor = -1.0/qe2f; + + + if (efield->varflag != FixEfield::CONSTANT) + efield->update_efield_variables(); + + // atom selection is for the group of fix efield + + double unwrap[3]; + const double ex = efield->ex; + const double ey = efield->ey; + const double ez = efield->ez; + const int efgroupbit = efield->groupbit; + + // charge interactions + // force = qE, potential energy = F dot x in unwrapped coords + if (efield->varflag != FixEfield::ATOM) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & efgroupbit) { + if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; + domain->unmap(x[i],image[i],unwrap); + chi_field[i] = factor*(ex*unwrap[0] + ey*unwrap[1] + ez*unwrap[2]); + } + } + } else { // must use atom-style potential from FixEfield + for (int i = 0; i < nlocal; i++) { + if (mask[i] & efgroupbit) { + if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; + chi_field[i] = -efield->efield[i][3]; + } + } + } +} diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h new file mode 100644 index 0000000000..fcdedddf26 --- /dev/null +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -0,0 +1,149 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(qtpie/reax,FixQtpieReaxFF); +FixStyle(qtpie/reaxff,FixQtpieReaxFF); +// clang-format on +#else + +#ifndef LMP_FIX_QTPIE_REAXFF_H +#define LMP_FIX_QTPIE_REAXFF_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixQtpieReaxFF : public Fix { + public: + FixQtpieReaxFF(class LAMMPS *, int, char **); + ~FixQtpieReaxFF() override; + int setmask() override; + void post_constructor() override; + void init() override; + void init_list(int, class NeighList *) override; + virtual void init_storage(); + void setup_pre_force(int) override; + void pre_force(int) override; + + void setup_pre_force_respa(int, int) override; + void pre_force_respa(int, int, int) override; + + void min_setup_pre_force(int); + void min_pre_force(int) override; + + double compute_scalar() override; + + protected: + int nevery, reaxflag; + int matvecs; + int nn, m_fill; + int n_cap, nmax, m_cap; + int pack_flag; + int nlevels_respa; + class NeighList *list; + class PairReaxFF *reaxff; + class FixEfield *efield; + int *ilist, *jlist, *numneigh, **firstneigh; + + double swa, swb; // lower/upper Taper cutoff radius + double Tap[8]; // Taper function + double tolerance; // tolerance for the norm of the rel residual in CG + + double *chi, *eta, *gamma; // qeq parameters + double **shld; + + // fictitious charges + + double *s, *t; + double **s_hist, **t_hist; + int nprev; + + typedef struct { + int n, m; + int *firstnbr; + int *numnbrs; + int *jlist; + double *val; + } sparse_matrix; + + sparse_matrix H; + double *Hdia_inv; + double *b_s, *b_t; + double *b_prc, *b_prm; + double *chi_field; + + //CG storage + double *p, *q, *r, *d; + int imax, maxwarn; + + char *pertype_option; // argument to determine how per-type info is obtained + + // Params from Kritikos - could rename or move to protected later + char *gauss_file; // input file for gaussian exponents for each type of REAXFF file + double cutghost; // ghost atoms cutoff (used for check) + int nn_prev; // number of local atoms; needed for memory reallocation of chi_eff (when multiprocessing) + double *gauss_exp; // array of gaussian exponents + double *chi_eff; // array of effective electronegativities + double *chi_eff_init; // array of effective electronegativities for FixQEqReax::init_storage() + + // void calculate_chi_eff(LAMMPS_NS::Atom *atom, reax_system *system, double *chi, + // int ni, int nj, double *lchi_eff); + virtual void pertype_parameters(char *); + void init_shielding(); + void init_taper(); + virtual void allocate_storage(); + virtual void deallocate_storage(); + void reallocate_storage(); + virtual void allocate_matrix(); + virtual void deallocate_matrix(); + void reallocate_matrix(); + + virtual void init_matvec(); + void init_H(); + virtual void compute_H(); + double calculate_H(double, double); + virtual void calculate_Q(); + + virtual int CG(double *, double *); + virtual void sparse_matvec(sparse_matrix *, double *, double *); + + int pack_forward_comm(int, int *, double *, int, int *) override; + void unpack_forward_comm(int, int, double *) override; + int pack_reverse_comm(int, int, double *) override; + void unpack_reverse_comm(int, int *, double *) override; + double memory_usage() override; + void grow_arrays(int) override; + void copy_arrays(int, int, int) override; + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + virtual double parallel_norm(double *, int); + virtual double parallel_dot(double *, double *, int); + virtual double parallel_vector_acc(double *, int); + + virtual void vector_sum(double *, double, double *, double, double *, int); + virtual void vector_add(double *, double, double *, int); + + virtual void get_chi_field(); + + // dual CG support + int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization + int matvecs_s, matvecs_t; // Iteration count for each system +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/REAXFF/pair_reaxff.cpp b/src/REAXFF/pair_reaxff.cpp index b9f4f6c838..06ad172a38 100644 --- a/src/REAXFF/pair_reaxff.cpp +++ b/src/REAXFF/pair_reaxff.cpp @@ -339,11 +339,13 @@ void PairReaxFF::init_style() auto acks2_fixes = modify->get_fix_by_style("^acks2/reax"); int have_qeq = modify->get_fix_by_style("^qeq/reax").size() - + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size(); + + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size() + + modify->get_fix_by_style("^qtpie/reax").size(); if (qeqflag && (have_qeq != 1)) error->all(FLERR,"Pair style reaxff requires use of exactly one of the " - "fix qeq/reaxff or fix qeq/shielded or fix acks2/reaxff commands"); + "fix qeq/reaxff or fix qeq/shielded or fix acks2/reaxff or " + "fix qtpie/reaxff commands"); api->system->acks2_flag = acks2_fixes.size(); if (api->system->acks2_flag) diff --git a/src/fix_efield.h b/src/fix_efield.h index 72fd204898..108395cc2c 100644 --- a/src/fix_efield.h +++ b/src/fix_efield.h @@ -26,6 +26,7 @@ namespace LAMMPS_NS { class FixEfield : public Fix { friend class FixQEqReaxFF; + friend class FixQtpieReaxFF; public: FixEfield(class LAMMPS *, int, char **); From be43a2bdeb2e614aaaf816cfbf94c7ce0368a0ed Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 2 Aug 2024 18:11:20 +0100 Subject: [PATCH 026/294] Allow for reading of Gaussian exponents from file --- src/REAXFF/fix_qtpie_reaxff.cpp | 53 ++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 695bdb4316..77b4708f7a 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -42,6 +42,7 @@ #include #include #include +#include using namespace LAMMPS_NS; using namespace FixConst; @@ -66,7 +67,7 @@ static const char cite_fix_qtpie_reaxff[] = /* ---------------------------------------------------------------------- */ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), matvecs(0), pertype_option(nullptr) + Fix(lmp, narg, arg), matvecs(0), pertype_option(nullptr), gauss_file(nullptr) { scalar_flag = 1; extscalar = 0; @@ -82,6 +83,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : swb = utils::numeric(FLERR,arg[5],false,lmp); tolerance = utils::numeric(FLERR,arg[6],false,lmp); pertype_option = utils::strdup(arg[7]); + gauss_file = utils::strdup(arg[8]); // dual CG support only available for OPENMP variant // check for compatibility is in Fix::post_constructor() @@ -153,6 +155,7 @@ FixQtpieReaxFF::~FixQtpieReaxFF() if (copymode) return; delete[] pertype_option; + delete[] gauss_file; // unregister callbacks to this fix from Atom class @@ -166,6 +169,7 @@ FixQtpieReaxFF::~FixQtpieReaxFF() memory->destroy(shld); + memory->destroy(gauss_exp); if (!reaxflag) { memory->destroy(chi); memory->destroy(eta); @@ -207,18 +211,52 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) const int nlocal = atom->nlocal; const int *mask = atom->mask; const int *type = atom->type; + const int ntypes = atom->ntypes; + + // read gaussian exponents + memory->create(gauss_exp,ntypes+1,"qtpie/reaxff:gauss_exp"); + if (comm->me == 0) { + gauss_exp[0] = 0.0; + try { + TextFileReader reader(gauss_file,"qtpie/reaxff gaussian exponents"); + reader.ignore_comments = false; + for (int i = 1; i <= ntypes; i++) { + const char *line = reader.next_line(); + std::cout << line; + if (!line) + throw TokenizerException("Fix qtpie/reaxff: Invalid param file format",""); + ValueTokenizer values(line); + + if (values.count() != 2) + throw TokenizerException("Fix qtpie/reaxff: Incorrect format of param file",""); + + int itype = values.next_int(); + if ((itype < 1) || (itype > ntypes)) + throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in param file", + std::to_string(itype)); + + gauss_exp[itype] = values.next_double(); + } + } catch (std::exception &e) { + error->one(FLERR,e.what()); + } + } + + MPI_Bcast(gauss_exp,ntypes+1,MPI_DOUBLE,0,world); + + // read chi, eta and gamma if (utils::strmatch(arg,"^reaxff")) { reaxflag = 1; Pair *pair = force->pair_match("^reaxff",0); - if (!pair) error->all(FLERR,"No reaxff pair style for fix qeq/reaxff"); + if (!pair) error->all(FLERR,"No reaxff pair style for fix qtpie/reaxff"); int tmp, tmp_all; chi = (double *) pair->extract("chi",tmp); eta = (double *) pair->extract("eta",tmp); gamma = (double *) pair->extract("gamma",tmp); if ((chi == nullptr) || (eta == nullptr) || (gamma == nullptr)) - error->all(FLERR, "Fix qeq/reaxff could not extract all Qtpie parameters from pair reaxff"); + error->all(FLERR, "Fix qtpie/reaxff could not extract qtpie parameters from pair reaxff"); tmp = tmp_all = 0; for (int i = 0; i < nlocal; ++i) { if (mask[i] & groupbit) { @@ -228,19 +266,18 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) } MPI_Allreduce(&tmp, &tmp_all, 1, MPI_INT, MPI_MAX, world); if (tmp_all) - error->all(FLERR, "No Qtpie parameters for atom type {} provided by pair reaxff", tmp_all); + error->all(FLERR, "No qtpie parameters for atom type {} provided by pair reaxff", tmp_all); return; } else if (utils::strmatch(arg,"^reax/c")) { - error->all(FLERR, "Fix qeq/reaxff keyword 'reax/c' is obsolete; please use 'reaxff'"); + error->all(FLERR, "Fix qtpie/reaxff keyword 'reax/c' is obsolete; please use 'reaxff'"); } else if (platform::file_is_readable(arg)) { ; // arg is readable file. will read below } else { - error->all(FLERR, "Unknown fix qeq/reaxff keyword {}", arg); + error->all(FLERR, "Unknown fix qtpie/reaxff keyword {}", arg); } reaxflag = 0; - const int ntypes = atom->ntypes; memory->create(chi,ntypes+1,"qeq/reaxff:chi"); memory->create(eta,ntypes+1,"qeq/reaxff:eta"); memory->create(gamma,ntypes+1,"qeq/reaxff:gamma"); @@ -261,7 +298,7 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) int itype = values.next_int(); if ((itype < 1) || (itype > ntypes)) - throw TokenizerException("Fix qeq/reaxff: invalid atom type in param file", + throw TokenizerException("Fix qeq/reaxff: Invalid atom type in param file", std::to_string(itype)); chi[itype] = values.next_double(); From bfb1c64b647219e5927b65c932e96aeb321b5401 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 6 Aug 2024 12:43:19 +0100 Subject: [PATCH 027/294] Add functionality for calculating chi_eff --- src/REAXFF/fix_qtpie_reaxff.cpp | 142 ++++++++++++++++++++++++++++++-- src/REAXFF/fix_qtpie_reaxff.h | 14 ++-- 2 files changed, 142 insertions(+), 14 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 77b4708f7a..9f3714c32d 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -104,7 +104,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : } shld = nullptr; - nn = n_cap = 0; + nn = ng = n_cap = 0; nmax = 0; m_fill = m_cap = 0; pack_flag = 0; @@ -115,6 +115,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : Hdia_inv = nullptr; b_s = nullptr; chi_field = nullptr; + chi_eff = nullptr; b_t = nullptr; b_prc = nullptr; b_prm = nullptr; @@ -327,6 +328,7 @@ void FixQtpieReaxFF::allocate_storage() memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); memory->create(b_s,nmax,"qeq:b_s"); memory->create(chi_field,nmax,"qeq:chi_field"); + memory->create(chi_eff,nmax,"qtpie:chi_eff"); memory->create(b_t,nmax,"qeq:b_t"); memory->create(b_prc,nmax,"qeq:b_prc"); memory->create(b_prm,nmax,"qeq:b_prm"); @@ -354,6 +356,7 @@ void FixQtpieReaxFF::deallocate_storage() memory->destroy(b_prc); memory->destroy(b_prm); memory->destroy(chi_field); + memory->destroy(chi_eff); memory->destroy(p); memory->destroy(q); @@ -553,11 +556,13 @@ void FixQtpieReaxFF::setup_pre_force(int vflag) { if (reaxff) { nn = reaxff->list->inum; + ng = reaxff->list->inum + reaxff->list->gnum; ilist = reaxff->list->ilist; numneigh = reaxff->list->numneigh; firstneigh = reaxff->list->firstneigh; } else { nn = list->inum; + ng = list->inum + list->gnum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -593,14 +598,15 @@ void FixQtpieReaxFF::min_setup_pre_force(int vflag) void FixQtpieReaxFF::init_storage() { - if (efield) get_chi_field(); + // if (efield) get_chi_field(); + calc_chi_eff(); for (int ii = 0; ii < nn; ii++) { int i = ilist[ii]; if (atom->mask[i] & groupbit) { Hdia_inv[i] = 1. / eta[atom->type[i]]; - b_s[i] = -chi[atom->type[i]]; - if (efield) b_s[i] -= chi_field[i]; + b_s[i] = -chi_eff[i]; + // if (efield) b_s[i] -= chi_field[i]; b_t[i] = -1.0; b_prc[i] = 0; b_prm[i] = 0; @@ -619,11 +625,13 @@ void FixQtpieReaxFF::pre_force(int /*vflag*/) if (reaxff) { nn = reaxff->list->inum; + ng = reaxff->list->inum + reaxff->list->gnum; ilist = reaxff->list->ilist; numneigh = reaxff->list->numneigh; firstneigh = reaxff->list->firstneigh; } else { nn = list->inum; + ng = list->inum + list->gnum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -636,7 +644,8 @@ void FixQtpieReaxFF::pre_force(int /*vflag*/) if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) reallocate_matrix(); - if (efield) get_chi_field(); + // if (efield) get_chi_field(); + calc_chi_eff(); init_matvec(); @@ -676,8 +685,8 @@ void FixQtpieReaxFF::init_matvec() /* init pre-conditioner for H and init solution vectors */ Hdia_inv[i] = 1. / eta[atom->type[i]]; - b_s[i] = -chi[atom->type[i]]; - if (efield) b_s[i] -= chi_field[i]; + b_s[i] = -chi_eff[i]; + // if (efield) b_s[i] -= chi_field[i]; b_t[i] = -1.0; /* quadratic extrapolation for s & t from previous solutions */ @@ -1195,3 +1204,122 @@ void FixQtpieReaxFF::get_chi_field() } } } + +void FixQtpieReaxFF::calc_chi_eff() +{ + memset(&chi_eff[0],0,atom->nmax*sizeof(double)); + const int KSCREEN = 10; + const double ZERONAME = 1.0e-50; + const double ANG_TO_BOHRRAD = 1.8897259886; // 1 Ang = 1.8897259886 Bohr radius + + double R,a_min,OvIntMaxR,voltage,overlap,nominator,denominator; + double ea,eb,chia,chib,p,m; + double phia,phib; + int i,j; + const int ntypes = atom->ntypes; + const int *type = atom->type; + // const auto x = (const double * const *)atom->x; + double **x = atom->x; + + // Use integral pre-screening for overlap calculations + a_min = find_min(gauss_exp,ntypes+1); + OvIntMaxR = sqrt(pow(a_min,-1.)*log(pow(M_PI/(2.*a_min),3.)*pow(10.,2.*KSCREEN))); + + ghost_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); + if(ghost_cutoff < OvIntMaxR/ANG_TO_BOHRRAD) { + error->all(FLERR,"ghost cutoff error"); + // char errmsg[256]; + // snprintf(errmsg, 256,"qtpie/reaxff: limit distance for overlap integral: %f " + // "Angstrom > ghost cutoff: %f Angstrom. Increase the ghost atom cutoff " + // "with comm_modify.",OvIntMaxR/ANG_TO_BOHRRAD,ghost_cutoff); + // // system->error_ptr->all(FLERR,errmsg); + // error->all(FLERR,errmsg); + } + + for (i = 0; i < nn; i++) { + + // type_i = type[i]; + ea = gauss_exp[type[i]]; + chia = chi[type[i]]; + + nominator = denominator = 0.0; + + for (j = 0; j < ng; j++) { + + R = distance(x[i],x[j])*ANG_TO_BOHRRAD; + overlap = voltage = 0.0; + + if (R < OvIntMaxR) + { + // type_j = type[j]; + eb = gauss_exp[type[j]]; + chib = chi[type[j]]; + + // The expressions below are in atomic units + // Implementation from Chen Jiahao, Theory and applications of fluctuating-charge models, 2009 (with normalization constants added) + p = ea + eb; + m = ea * eb / p; + overlap = pow((4. * m / p), 0.75) * exp(-m * R * R); + + // Implementation from T. Halgaker et al., Molecular electronic-structure theory, 2000 +// p = ea + eb; +// m = ea * eb / p; +// Overlap = pow((M_PI / p), 1.5) * exp(-m * R * R); + + if (efield) { + phib = efield_potential(x[j]); + voltage = chia - chib + phib; + } else { + voltage = chia - chib; + } + nominator += voltage * overlap; + denominator += overlap; + } + } + if (denominator != 0.0 && nominator != 0.0) + chi_eff[i] = nominator / denominator; + else + chi_eff[i] = ZERONAME; + + if (efield) { + phia = efield_potential(x[i]); + chi_eff[i] -= phia; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::find_min(double *array, int array_length) +{ + // since types start from 1, gaussian exponents start from 1 + double smallest = array[1]; + for (int i = 1; i < array_length; i++) + { + if (array[i] < smallest) + smallest = array[i]; + } + return smallest; +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::distance(double *loc1, double *loc2) +{ + double distx, disty, distz; + distx = loc2[0] - loc1[0]; + disty = loc2[1] - loc1[1]; + distz = loc2[2] - loc1[2]; + return sqrt(distx*distx + disty*disty + distz*distz); +} + +/* ---------------------------------------------------------------------- */ + +double FixQtpieReaxFF::efield_potential(double *x) +{ + double x_efcomp, y_efcomp, z_efcomp; + x_efcomp = x[0] * efield->ex; + y_efcomp = x[1] * efield->ey; + z_efcomp = x[2] * efield->ez; + return x_efcomp + y_efcomp + z_efcomp; +} diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index fcdedddf26..216fc56468 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -48,7 +48,7 @@ class FixQtpieReaxFF : public Fix { protected: int nevery, reaxflag; int matvecs; - int nn, m_fill; + int nn, ng, m_fill; int n_cap, nmax, m_cap; int pack_flag; int nlevels_respa; @@ -90,16 +90,11 @@ class FixQtpieReaxFF : public Fix { char *pertype_option; // argument to determine how per-type info is obtained - // Params from Kritikos - could rename or move to protected later char *gauss_file; // input file for gaussian exponents for each type of REAXFF file - double cutghost; // ghost atoms cutoff (used for check) - int nn_prev; // number of local atoms; needed for memory reallocation of chi_eff (when multiprocessing) + double ghost_cutoff; // ghost atoms cutoff double *gauss_exp; // array of gaussian exponents double *chi_eff; // array of effective electronegativities - double *chi_eff_init; // array of effective electronegativities for FixQEqReax::init_storage() - // void calculate_chi_eff(LAMMPS_NS::Atom *atom, reax_system *system, double *chi, - // int ni, int nj, double *lchi_eff); virtual void pertype_parameters(char *); void init_shielding(); void init_taper(); @@ -110,6 +105,11 @@ class FixQtpieReaxFF : public Fix { virtual void deallocate_matrix(); void reallocate_matrix(); + void calc_chi_eff(); + double find_min(double*, int); + double distance(double*, double*); + double efield_potential(double*); + virtual void init_matvec(); void init_H(); virtual void compute_H(); From 8c8882927c0a2db1c35d9b22d5b74ee942886172 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 15 Aug 2024 16:20:45 +0100 Subject: [PATCH 028/294] Rename variables in calc_chi_eff() --- src/REAXFF/fix_qtpie_reaxff.cpp | 43 +++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 9f3714c32d..f3151a85ba 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Navraj S Lalli & Efstratios Kritikos (Imperial College London) + Contributing author: Navraj S Lalli & Efstratios M Kritikos (Imperial College London) ------------------------------------------------------------------------- */ #include "fix_qtpie_reaxff.h" @@ -1209,10 +1209,10 @@ void FixQtpieReaxFF::calc_chi_eff() { memset(&chi_eff[0],0,atom->nmax*sizeof(double)); const int KSCREEN = 10; - const double ZERONAME = 1.0e-50; + // const double ZERONAME = 1.0e-50; const double ANG_TO_BOHRRAD = 1.8897259886; // 1 Ang = 1.8897259886 Bohr radius - double R,a_min,OvIntMaxR,voltage,overlap,nominator,denominator; + double R,a_min,OvIntMaxR,overlap,sum_n,sum_d; double ea,eb,chia,chib,p,m; double phia,phib; int i,j; @@ -1224,8 +1224,15 @@ void FixQtpieReaxFF::calc_chi_eff() // Use integral pre-screening for overlap calculations a_min = find_min(gauss_exp,ntypes+1); OvIntMaxR = sqrt(pow(a_min,-1.)*log(pow(M_PI/(2.*a_min),3.)*pow(10.,2.*KSCREEN))); + // OvIntMaxR = 0.5; + // if (comm->me == 0) { + // std::cout << "OvIntMaxR is " << OvIntMaxR/ANG_TO_BOHRRAD; + // } ghost_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); + // if (comm->me == 0) { + // std::cout << "ghost_cutoff is " << ghost_cutoff; + // } if(ghost_cutoff < OvIntMaxR/ANG_TO_BOHRRAD) { error->all(FLERR,"ghost cutoff error"); // char errmsg[256]; @@ -1242,12 +1249,14 @@ void FixQtpieReaxFF::calc_chi_eff() ea = gauss_exp[type[i]]; chia = chi[type[i]]; - nominator = denominator = 0.0; + // nominator = denominator = 0.0; + sum_n = 0.0; + sum_d = 0.0; for (j = 0; j < ng; j++) { - R = distance(x[i],x[j])*ANG_TO_BOHRRAD; - overlap = voltage = 0.0; + R = distance(x[i],x[j])*ANG_TO_BOHRRAD; // Distance between atoms as a multiple of Bohr radius + // overlap = voltage = 0.0; if (R < OvIntMaxR) { @@ -1268,18 +1277,26 @@ void FixQtpieReaxFF::calc_chi_eff() if (efield) { phib = efield_potential(x[j]); - voltage = chia - chib + phib; + sum_n += (chia - chib + phib) * overlap; + // voltage = chia - chib + phib; } else { - voltage = chia - chib; + sum_n += (chia - chib) * overlap; + // voltage = chia - chib; } - nominator += voltage * overlap; - denominator += overlap; + sum_d += overlap; + // nominator += voltage * overlap; + // denominator += overlap; } } - if (denominator != 0.0 && nominator != 0.0) - chi_eff[i] = nominator / denominator; + + if (fabs(sum_n) < SMALL && fabs(sum_d) < SMALL) + chi_eff[i] = 0.0; // SMALL; else - chi_eff[i] = ZERONAME; + chi_eff[i] = sum_n / sum_d; + // if (denominator != 0.0 && nominator != 0.0) + // chi_eff[i] = nominator / denominator; + // else + // chi_eff[i] = ZERONAME; if (efield) { phia = efield_potential(x[i]); From 6d47e417415d3dc4cb6f28d909e81be8972e8a9c Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 20 Aug 2024 16:00:51 +0100 Subject: [PATCH 029/294] Update calculation of chi_eff chi_eff can now be calculated when atom or equal style variables are used for the electric field, under the restriction that the electric field is applied to all atoms. --- src/REAXFF/fix_qtpie_reaxff.cpp | 156 ++++++++++++++------------------ src/REAXFF/fix_qtpie_reaxff.h | 4 +- 2 files changed, 71 insertions(+), 89 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index f3151a85ba..6c8e15d965 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -223,7 +223,7 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) reader.ignore_comments = false; for (int i = 1; i <= ntypes; i++) { const char *line = reader.next_line(); - std::cout << line; + std::cout << "Orbital exponent " << line; if (!line) throw TokenizerException("Fix qtpie/reaxff: Invalid param file format",""); ValueTokenizer values(line); @@ -438,7 +438,6 @@ void FixQtpieReaxFF::init() error->all(FLERR,"Fix {} group has no atoms", style); // compute net charge and print warning if too large - double qsum_local = 0.0, qsum = 0.0; for (int i = 0; i < atom->nlocal; i++) { if (atom->mask[i] & groupbit) @@ -450,7 +449,6 @@ void FixQtpieReaxFF::init() error->warning(FLERR,"Fix {} group is not charge neutral, net charge = {:.8}", style, qsum); // get pointer to fix efield if present. there may be at most one instance of fix efield in use. - efield = nullptr; auto fixes = modify->get_fix_by_style("^efield"); if (fixes.size() == 1) efield = dynamic_cast(fixes.front()); @@ -463,19 +461,27 @@ void FixQtpieReaxFF::init() if (strcmp(update->unit_style,"real") != 0) error->all(FLERR,"Must use unit_style real with fix {} and external fields", style); + if (efield->groupbit != 1){ // if efield is not applied to all atoms + error->all(FLERR,"Must use group id all for fix efield when using fix {}", style); + } + + if (efield->region){ // if efield is not applied to all atoms + error->all(FLERR,"Keyword region not supported for fix efield when using fix {}", style); + } + if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM) error->all(FLERR,"Atom-style external electric field requires atom-style " "potential variable when used with fix {}", style); - if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) || - ((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) || - ((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic)) - error->all(FLERR,"Must not have electric field component in direction of periodic " - "boundary when using charge equilibration with ReaxFF."); - if (((fabs(efield->ex) > SMALL) && domain->xperiodic) || - ((fabs(efield->ey) > SMALL) && domain->yperiodic) || - ((fabs(efield->ez) > SMALL) && domain->zperiodic)) - error->all(FLERR,"Must not have electric field component in direction of periodic " - "boundary when using charge equilibration with ReaxFF."); + // if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) || + // ((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) || + // ((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic)) + // error->all(FLERR,"Must not have electric field component in direction of periodic " + // "boundary when using charge equilibration with ReaxFF."); + // if (((fabs(efield->ex) > SMALL) && domain->xperiodic) || + // ((fabs(efield->ey) > SMALL) && domain->yperiodic) || + // ((fabs(efield->ez) > SMALL) && domain->zperiodic)) + // error->all(FLERR,"Must not have electric field component in direction of periodic " + // "boundary when using charge equilibration with ReaxFF."); } // we need a half neighbor list w/ Newton off @@ -1208,59 +1214,55 @@ void FixQtpieReaxFF::get_chi_field() void FixQtpieReaxFF::calc_chi_eff() { memset(&chi_eff[0],0,atom->nmax*sizeof(double)); - const int KSCREEN = 10; - // const double ZERONAME = 1.0e-50; - const double ANG_TO_BOHRRAD = 1.8897259886; // 1 Ang = 1.8897259886 Bohr radius - double R,a_min,OvIntMaxR,overlap,sum_n,sum_d; - double ea,eb,chia,chib,p,m; - double phia,phib; - int i,j; + const int KSCREEN = 10; + const double ANG_TO_BOHRRAD = 1.8897259886; // 1 Ang = 1.8897259886 Bohr radius + const auto x = (const double * const *)atom->x; const int ntypes = atom->ntypes; const int *type = atom->type; - // const auto x = (const double * const *)atom->x; - double **x = atom->x; - // Use integral pre-screening for overlap calculations - a_min = find_min(gauss_exp,ntypes+1); - OvIntMaxR = sqrt(pow(a_min,-1.)*log(pow(M_PI/(2.*a_min),3.)*pow(10.,2.*KSCREEN))); - // OvIntMaxR = 0.5; - // if (comm->me == 0) { - // std::cout << "OvIntMaxR is " << OvIntMaxR/ANG_TO_BOHRRAD; - // } + double dist,overlap,sum_n,sum_d,ea,eb,chia,chib,phia,phib,p,m; + int i,j; - ghost_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); - // if (comm->me == 0) { - // std::cout << "ghost_cutoff is " << ghost_cutoff; - // } - if(ghost_cutoff < OvIntMaxR/ANG_TO_BOHRRAD) { - error->all(FLERR,"ghost cutoff error"); - // char errmsg[256]; - // snprintf(errmsg, 256,"qtpie/reaxff: limit distance for overlap integral: %f " - // "Angstrom > ghost cutoff: %f Angstrom. Increase the ghost atom cutoff " - // "with comm_modify.",OvIntMaxR/ANG_TO_BOHRRAD,ghost_cutoff); - // // system->error_ptr->all(FLERR,errmsg); - // error->all(FLERR,errmsg); + // efield energy is in real units of kcal/mol, factor needed for conversion to eV + const double qe2f = force->qe2f; + const double factor = 1.0/qe2f; + + if (efield) { + if (efield->varflag != FixEfield::CONSTANT) + efield->update_efield_variables(); } - for (i = 0; i < nn; i++) { + // use integral pre-screening for overlap calculations + const double emin = find_min(gauss_exp,ntypes+1); + const double dist_cutoff = sqrt(pow(emin,-1.)*log(pow(M_PI/(2.*emin),3.)*pow(10.,2.*KSCREEN))); - // type_i = type[i]; + const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); + if(comm_cutoff < dist_cutoff/ANG_TO_BOHRRAD) { + error->all(FLERR,"comm cutoff = {} Angstrom is smaller than distance cutoff = {} Angstrom " + "for overlap integral in {}. Increase comm cutoff with comm_modify", + comm_cutoff, dist_cutoff/ANG_TO_BOHRRAD, style); + } + + // compute chi_eff for each local atom + for (i = 0; i < nn; i++) { ea = gauss_exp[type[i]]; chia = chi[type[i]]; + if (efield) { + if (efield->varflag != FixEfield::ATOM) { + phia = factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); + } else { // atom-style potential from FixEfield + phia = efield->efield[i][3]; + } + } - // nominator = denominator = 0.0; sum_n = 0.0; sum_d = 0.0; for (j = 0; j < ng; j++) { + dist = distance(x[i],x[j])*ANG_TO_BOHRRAD; // distance between atoms as a multiple of Bohr radius - R = distance(x[i],x[j])*ANG_TO_BOHRRAD; // Distance between atoms as a multiple of Bohr radius - // overlap = voltage = 0.0; - - if (R < OvIntMaxR) - { - // type_j = type[j]; + if (dist < dist_cutoff) { eb = gauss_exp[type[j]]; chib = chi[type[j]]; @@ -1268,7 +1270,7 @@ void FixQtpieReaxFF::calc_chi_eff() // Implementation from Chen Jiahao, Theory and applications of fluctuating-charge models, 2009 (with normalization constants added) p = ea + eb; m = ea * eb / p; - overlap = pow((4. * m / p), 0.75) * exp(-m * R * R); + overlap = pow((4.0*m/p),0.75) * exp(-m*dist*dist); // Implementation from T. Halgaker et al., Molecular electronic-structure theory, 2000 // p = ea + eb; @@ -1276,32 +1278,25 @@ void FixQtpieReaxFF::calc_chi_eff() // Overlap = pow((M_PI / p), 1.5) * exp(-m * R * R); if (efield) { - phib = efield_potential(x[j]); - sum_n += (chia - chib + phib) * overlap; - // voltage = chia - chib + phib; + if (efield->varflag != FixEfield::ATOM) { + phib = factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); + } else { // atom-style potential from FixEfield + phib = efield->efield[j][3]; + } + sum_n += (chia - chib + phib - phia) * overlap; } else { sum_n += (chia - chib) * overlap; - // voltage = chia - chib; } sum_d += overlap; - // nominator += voltage * overlap; - // denominator += overlap; } } - if (fabs(sum_n) < SMALL && fabs(sum_d) < SMALL) - chi_eff[i] = 0.0; // SMALL; - else - chi_eff[i] = sum_n / sum_d; - // if (denominator != 0.0 && nominator != 0.0) - // chi_eff[i] = nominator / denominator; - // else - // chi_eff[i] = ZERONAME; + chi_eff[i] = sum_n / sum_d; - if (efield) { - phia = efield_potential(x[i]); - chi_eff[i] -= phia; - } + if (fabs(sum_n) < SMALL && fabs(sum_d) < SMALL) + error->all(FLERR,"Unexpected value: fabs(sum_d) is {}", fabs(sum_d)); + if (fabs(sum_d) < 1.0) + error->all(FLERR,"Unexpected value: fabs(sum_d) is {}", fabs(sum_d)); } } @@ -1321,22 +1316,11 @@ double FixQtpieReaxFF::find_min(double *array, int array_length) /* ---------------------------------------------------------------------- */ -double FixQtpieReaxFF::distance(double *loc1, double *loc2) +double FixQtpieReaxFF::distance(const double *posa, const double *posb) { - double distx, disty, distz; - distx = loc2[0] - loc1[0]; - disty = loc2[1] - loc1[1]; - distz = loc2[2] - loc1[2]; - return sqrt(distx*distx + disty*disty + distz*distz); -} - -/* ---------------------------------------------------------------------- */ - -double FixQtpieReaxFF::efield_potential(double *x) -{ - double x_efcomp, y_efcomp, z_efcomp; - x_efcomp = x[0] * efield->ex; - y_efcomp = x[1] * efield->ey; - z_efcomp = x[2] * efield->ez; - return x_efcomp + y_efcomp + z_efcomp; + double dx, dy, dz; + dx = posb[0] - posa[0]; + dy = posb[1] - posa[1]; + dz = posb[2] - posa[2]; + return sqrt(dx*dx + dy*dy + dz*dz); } diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 216fc56468..7b6feacabb 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -91,7 +91,6 @@ class FixQtpieReaxFF : public Fix { char *pertype_option; // argument to determine how per-type info is obtained char *gauss_file; // input file for gaussian exponents for each type of REAXFF file - double ghost_cutoff; // ghost atoms cutoff double *gauss_exp; // array of gaussian exponents double *chi_eff; // array of effective electronegativities @@ -107,8 +106,7 @@ class FixQtpieReaxFF : public Fix { void calc_chi_eff(); double find_min(double*, int); - double distance(double*, double*); - double efield_potential(double*); + double distance(const double*, const double*); virtual void init_matvec(); void init_H(); From 27e911cd10fb976a16d39b80980de3755cadb080 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 20 Aug 2024 17:36:56 +0100 Subject: [PATCH 030/294] Remove chi_field --- src/REAXFF/fix_qtpie_reaxff.cpp | 60 --------------------------------- src/REAXFF/fix_qtpie_reaxff.h | 15 ++++----- 2 files changed, 6 insertions(+), 69 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 6c8e15d965..82b0f8b595 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -114,7 +114,6 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : Hdia_inv = nullptr; b_s = nullptr; - chi_field = nullptr; chi_eff = nullptr; b_t = nullptr; b_prc = nullptr; @@ -327,7 +326,6 @@ void FixQtpieReaxFF::allocate_storage() memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); memory->create(b_s,nmax,"qeq:b_s"); - memory->create(chi_field,nmax,"qeq:chi_field"); memory->create(chi_eff,nmax,"qtpie:chi_eff"); memory->create(b_t,nmax,"qeq:b_t"); memory->create(b_prc,nmax,"qeq:b_prc"); @@ -355,7 +353,6 @@ void FixQtpieReaxFF::deallocate_storage() memory->destroy(b_t); memory->destroy(b_prc); memory->destroy(b_prm); - memory->destroy(chi_field); memory->destroy(chi_eff); memory->destroy(p); @@ -604,7 +601,6 @@ void FixQtpieReaxFF::min_setup_pre_force(int vflag) void FixQtpieReaxFF::init_storage() { - // if (efield) get_chi_field(); calc_chi_eff(); for (int ii = 0; ii < nn; ii++) { @@ -612,7 +608,6 @@ void FixQtpieReaxFF::init_storage() if (atom->mask[i] & groupbit) { Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi_eff[i]; - // if (efield) b_s[i] -= chi_field[i]; b_t[i] = -1.0; b_prc[i] = 0; b_prm[i] = 0; @@ -650,7 +645,6 @@ void FixQtpieReaxFF::pre_force(int /*vflag*/) if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) reallocate_matrix(); - // if (efield) get_chi_field(); calc_chi_eff(); init_matvec(); @@ -692,7 +686,6 @@ void FixQtpieReaxFF::init_matvec() /* init pre-conditioner for H and init solution vectors */ Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi_eff[i]; - // if (efield) b_s[i] -= chi_field[i]; b_t[i] = -1.0; /* quadratic extrapolation for s & t from previous solutions */ @@ -1158,59 +1151,6 @@ void FixQtpieReaxFF::vector_add(double* dest, double c, double* v, int k) /* ---------------------------------------------------------------------- */ -void FixQtpieReaxFF::get_chi_field() -{ - memset(&chi_field[0],0,atom->nmax*sizeof(double)); - if (!efield) return; - - const auto x = (const double * const *)atom->x; - const int *mask = atom->mask; - const imageint *image = atom->image; - const int nlocal = atom->nlocal; - - - // update electric field region if necessary - - Region *region = efield->region; - if (region) region->prematch(); - - // efield energy is in real units of kcal/mol/angstrom, need to convert to eV - - const double qe2f = force->qe2f; - const double factor = -1.0/qe2f; - - - if (efield->varflag != FixEfield::CONSTANT) - efield->update_efield_variables(); - - // atom selection is for the group of fix efield - - double unwrap[3]; - const double ex = efield->ex; - const double ey = efield->ey; - const double ez = efield->ez; - const int efgroupbit = efield->groupbit; - - // charge interactions - // force = qE, potential energy = F dot x in unwrapped coords - if (efield->varflag != FixEfield::ATOM) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & efgroupbit) { - if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; - domain->unmap(x[i],image[i],unwrap); - chi_field[i] = factor*(ex*unwrap[0] + ey*unwrap[1] + ez*unwrap[2]); - } - } - } else { // must use atom-style potential from FixEfield - for (int i = 0; i < nlocal; i++) { - if (mask[i] & efgroupbit) { - if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; - chi_field[i] = -efield->efield[i][3]; - } - } - } -} - void FixQtpieReaxFF::calc_chi_eff() { memset(&chi_eff[0],0,atom->nmax*sizeof(double)); diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 7b6feacabb..07afdf9439 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -82,17 +82,15 @@ class FixQtpieReaxFF : public Fix { double *Hdia_inv; double *b_s, *b_t; double *b_prc, *b_prm; - double *chi_field; + double *chi_eff; // array of effective electronegativities //CG storage double *p, *q, *r, *d; int imax, maxwarn; char *pertype_option; // argument to determine how per-type info is obtained - - char *gauss_file; // input file for gaussian exponents for each type of REAXFF file + char *gauss_file; // input file for gaussian exponents double *gauss_exp; // array of gaussian exponents - double *chi_eff; // array of effective electronegativities virtual void pertype_parameters(char *); void init_shielding(); @@ -104,10 +102,6 @@ class FixQtpieReaxFF : public Fix { virtual void deallocate_matrix(); void reallocate_matrix(); - void calc_chi_eff(); - double find_min(double*, int); - double distance(const double*, const double*); - virtual void init_matvec(); void init_H(); virtual void compute_H(); @@ -134,7 +128,10 @@ class FixQtpieReaxFF : public Fix { virtual void vector_sum(double *, double, double *, double, double *, int); virtual void vector_add(double *, double, double *, int); - virtual void get_chi_field(); + void calc_chi_eff(); + double find_min(double*, int); + double distance(const double*, const double*); + // dual CG support int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization From 5021c8c971d8f60acd53109a221bae0e863106b2 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 20 Aug 2024 17:39:44 +0100 Subject: [PATCH 031/294] Replace qeq with qtpie --- src/REAXFF/fix_qtpie_reaxff.cpp | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 82b0f8b595..aecb4352fa 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -278,27 +278,27 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) reaxflag = 0; - memory->create(chi,ntypes+1,"qeq/reaxff:chi"); - memory->create(eta,ntypes+1,"qeq/reaxff:eta"); - memory->create(gamma,ntypes+1,"qeq/reaxff:gamma"); + memory->create(chi,ntypes+1,"qtpie/reaxff:chi"); + memory->create(eta,ntypes+1,"qtpie/reaxff:eta"); + memory->create(gamma,ntypes+1,"qtpie/reaxff:gamma"); if (comm->me == 0) { chi[0] = eta[0] = gamma[0] = 0.0; try { - TextFileReader reader(arg,"qeq/reaxff parameter"); + TextFileReader reader(arg,"qtpie/reaxff parameter"); reader.ignore_comments = false; for (int i = 1; i <= ntypes; i++) { const char *line = reader.next_line(); if (!line) - throw TokenizerException("Fix qeq/reaxff: Invalid param file format",""); + throw TokenizerException("Fix qtpie/reaxff: Invalid param file format",""); ValueTokenizer values(line); if (values.count() != 4) - throw TokenizerException("Fix qeq/reaxff: Incorrect format of param file",""); + throw TokenizerException("Fix qtpie/reaxff: Incorrect format of param file",""); int itype = values.next_int(); if ((itype < 1) || (itype > ntypes)) - throw TokenizerException("Fix qeq/reaxff: Invalid atom type in param file", + throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in param file", std::to_string(itype)); chi[itype] = values.next_double(); @@ -321,24 +321,24 @@ void FixQtpieReaxFF::allocate_storage() { nmax = atom->nmax; - memory->create(s,nmax,"qeq:s"); - memory->create(t,nmax,"qeq:t"); + memory->create(s,nmax,"qtpie:s"); + memory->create(t,nmax,"qtpie:t"); - memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); - memory->create(b_s,nmax,"qeq:b_s"); + memory->create(Hdia_inv,nmax,"qtpie:Hdia_inv"); + memory->create(b_s,nmax,"qtpie:b_s"); memory->create(chi_eff,nmax,"qtpie:chi_eff"); - memory->create(b_t,nmax,"qeq:b_t"); - memory->create(b_prc,nmax,"qeq:b_prc"); - memory->create(b_prm,nmax,"qeq:b_prm"); + memory->create(b_t,nmax,"qtpie:b_t"); + memory->create(b_prc,nmax,"qtpie:b_prc"); + memory->create(b_prm,nmax,"qtpie:b_prm"); // dual CG support int size = nmax; if (dual_enabled) size*= 2; - memory->create(p,size,"qeq:p"); - memory->create(q,size,"qeq:q"); - memory->create(r,size,"qeq:r"); - memory->create(d,size,"qeq:d"); + memory->create(p,size,"qtpie:p"); + memory->create(q,size,"qtpie:q"); + memory->create(r,size,"qtpie:r"); + memory->create(d,size,"qtpie:d"); } /* ---------------------------------------------------------------------- */ @@ -400,10 +400,10 @@ void FixQtpieReaxFF::allocate_matrix() H.n = n_cap; H.m = m_cap; - memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); - memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); - memory->create(H.jlist,m_cap,"qeq:H.jlist"); - memory->create(H.val,m_cap,"qeq:H.val"); + memory->create(H.firstnbr,n_cap,"qtpie:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qtpie:H.numnbrs"); + memory->create(H.jlist,m_cap,"qtpie:H.jlist"); + memory->create(H.val,m_cap,"qtpie:H.val"); } /* ---------------------------------------------------------------------- */ @@ -516,7 +516,7 @@ void FixQtpieReaxFF::init_shielding() ntypes = atom->ntypes; if (shld == nullptr) - memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); + memory->create(shld,ntypes+1,ntypes+1,"qtpie:shielding"); for (i = 1; i <= ntypes; ++i) for (j = 1; j <= ntypes; ++j) @@ -530,11 +530,11 @@ void FixQtpieReaxFF::init_taper() double d7, swa2, swa3, swb2, swb3; if (fabs(swa) > 0.01 && comm->me == 0) - error->warning(FLERR,"Fix qeq/reaxff has non-zero lower Taper radius cutoff"); + error->warning(FLERR,"Fix qtpie/reaxff has non-zero lower Taper radius cutoff"); if (swb < 0) - error->all(FLERR, "Fix qeq/reaxff has negative upper Taper radius cutoff"); + error->all(FLERR, "Fix qtpie/reaxff has negative upper Taper radius cutoff"); else if (swb < 5 && comm->me == 0) - error->warning(FLERR,"Fix qeq/reaxff has very low Taper radius cutoff"); + error->warning(FLERR,"Fix qtpie/reaxff has very low Taper radius cutoff"); d7 = pow(swb - swa, 7); swa2 = SQR(swa); @@ -760,7 +760,7 @@ void FixQtpieReaxFF::compute_H() } if (m_fill >= H.m) - error->all(FLERR,"Fix qeq/reaxff H matrix size has been exceeded: m_fill={} H.m={}\n", + error->all(FLERR,"Fix qtpie/reaxff H matrix size has been exceeded: m_fill={} H.m={}\n", m_fill, H.m); } @@ -835,7 +835,7 @@ int FixQtpieReaxFF::CG(double *b, double *x) } if ((i >= imax) && maxwarn && (comm->me == 0)) - error->warning(FLERR, "Fix qeq/reaxff CG convergence failed after {} iterations at step {}", + error->warning(FLERR, "Fix qtpie/reaxff CG convergence failed after {} iterations at step {}", i,update->ntimestep); return i; } @@ -1018,8 +1018,8 @@ double FixQtpieReaxFF::memory_usage() void FixQtpieReaxFF::grow_arrays(int nmax) { - memory->grow(s_hist,nmax,nprev,"qeq:s_hist"); - memory->grow(t_hist,nmax,nprev,"qeq:t_hist"); + memory->grow(s_hist,nmax,nprev,"qtpie:s_hist"); + memory->grow(t_hist,nmax,nprev,"qtpie:t_hist"); } /* ---------------------------------------------------------------------- From 149d9b310d0b144065c9525fa5444f3bad804090 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 20 Aug 2024 18:03:50 +0100 Subject: [PATCH 032/294] Remove dual as a possible keyword argument --- src/REAXFF/fix_qtpie_reaxff.cpp | 23 +++-------------------- src/REAXFF/fix_qtpie_reaxff.h | 3 --- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index aecb4352fa..4066103152 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -74,7 +74,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : imax = 200; maxwarn = 1; - if ((narg < 9) || (narg > 13)) error->all(FLERR,"Illegal fix {} command", style); + if ((narg < 9) || (narg > 12)) error->all(FLERR,"Illegal fix {} command", style); nevery = utils::inumeric(FLERR,arg[3],false,lmp); if (nevery <= 0) error->all(FLERR,"Illegal fix {} command", style); @@ -85,15 +85,9 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : pertype_option = utils::strdup(arg[7]); gauss_file = utils::strdup(arg[8]); - // dual CG support only available for OPENMP variant - // check for compatibility is in Fix::post_constructor() - - dual_enabled = 0; - int iarg = 9; while (iarg < narg) { - if (strcmp(arg[iarg],"dual") == 0) dual_enabled = 1; - else if (strcmp(arg[iarg],"nowarn") == 0) maxwarn = 0; + if (strcmp(arg[iarg],"nowarn") == 0) maxwarn = 0; else if (strcmp(arg[iarg],"maxiter") == 0) { if (iarg+1 > narg-1) error->all(FLERR,"Illegal fix {} command", style); @@ -133,11 +127,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : H.jlist = nullptr; H.val = nullptr; - // dual CG support - // Update comm sizes for this fix - - if (dual_enabled) comm_forward = comm_reverse = 2; - else comm_forward = comm_reverse = 1; + comm_forward = comm_reverse = 1; // perform initial allocation of atom-based arrays // register with Atom class @@ -189,8 +179,6 @@ void FixQtpieReaxFF::post_constructor() s_hist[i][j] = t_hist[i][j] = 0; pertype_parameters(pertype_option); - if (dual_enabled) - error->all(FLERR,"Dual keyword only supported with fix qeq/reaxff/omp"); } /* ---------------------------------------------------------------------- */ @@ -331,9 +319,7 @@ void FixQtpieReaxFF::allocate_storage() memory->create(b_prc,nmax,"qtpie:b_prc"); memory->create(b_prm,nmax,"qtpie:b_prm"); - // dual CG support int size = nmax; - if (dual_enabled) size*= 2; memory->create(p,size,"qtpie:p"); memory->create(q,size,"qtpie:q"); @@ -1006,9 +992,6 @@ double FixQtpieReaxFF::memory_usage() bytes += (double)m_cap * sizeof(int); bytes += (double)m_cap * sizeof(double); - if (dual_enabled) - bytes += (double)atom->nmax*4 * sizeof(double); // double size for q, d, r, and p - return bytes; } diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 07afdf9439..827083c1aa 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -132,9 +132,6 @@ class FixQtpieReaxFF : public Fix { double find_min(double*, int); double distance(const double*, const double*); - - // dual CG support - int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization int matvecs_s, matvecs_t; // Iteration count for each system }; From eb6e5b438abc9784a3b654d76aed364402e784fd Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 10:18:51 +0100 Subject: [PATCH 033/294] Remove virtual keyword --- src/REAXFF/fix_qtpie_reaxff.h | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 827083c1aa..1609202693 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -33,7 +33,7 @@ class FixQtpieReaxFF : public Fix { void post_constructor() override; void init() override; void init_list(int, class NeighList *) override; - virtual void init_storage(); + void init_storage(); void setup_pre_force(int) override; void pre_force(int) override; @@ -89,27 +89,27 @@ class FixQtpieReaxFF : public Fix { int imax, maxwarn; char *pertype_option; // argument to determine how per-type info is obtained - char *gauss_file; // input file for gaussian exponents - double *gauss_exp; // array of gaussian exponents + char *gauss_file; // input file for gaussian exponents + double *gauss_exp; // array of gaussian exponents - virtual void pertype_parameters(char *); + void pertype_parameters(char *); void init_shielding(); void init_taper(); - virtual void allocate_storage(); - virtual void deallocate_storage(); + void allocate_storage(); + void deallocate_storage(); void reallocate_storage(); - virtual void allocate_matrix(); - virtual void deallocate_matrix(); + void allocate_matrix(); + void deallocate_matrix(); void reallocate_matrix(); - virtual void init_matvec(); + void init_matvec(); void init_H(); - virtual void compute_H(); + void compute_H(); double calculate_H(double, double); - virtual void calculate_Q(); + void calculate_Q(); - virtual int CG(double *, double *); - virtual void sparse_matvec(sparse_matrix *, double *, double *); + int CG(double *, double *); + void sparse_matvec(sparse_matrix *, double *, double *); int pack_forward_comm(int, int *, double *, int, int *) override; void unpack_forward_comm(int, int, double *) override; @@ -121,12 +121,12 @@ class FixQtpieReaxFF : public Fix { int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; - virtual double parallel_norm(double *, int); - virtual double parallel_dot(double *, double *, int); - virtual double parallel_vector_acc(double *, int); + double parallel_norm(double *, int); + double parallel_dot(double *, double *, int); + double parallel_vector_acc(double *, int); - virtual void vector_sum(double *, double, double *, double, double *, int); - virtual void vector_add(double *, double, double *, int); + void vector_sum(double *, double, double *, double, double *, int); + void vector_add(double *, double, double *, int); void calc_chi_eff(); double find_min(double*, int); From 6dd45ccfdb38dd4ae374a3f59a85e352bc3610e5 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 10:57:00 +0100 Subject: [PATCH 034/294] Add fix_qtpie_reaxff --- src/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/.gitignore b/src/.gitignore index c26eaaba30..33595ed937 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -982,6 +982,8 @@ /fix_qeq_reaxff.h /fix_qmmm.cpp /fix_qmmm.h +/fix_qtpie_reaxff.cpp +/fix_qtpie_reaxff.h /fix_reaxff.cpp /fix_reaxff.h /fix_reaxff_bonds.cpp From c2e4816717fb132854628fd2a4cee082cb51f154 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 11:32:18 +0100 Subject: [PATCH 035/294] Update contributing authors --- src/REAXFF/fix_qtpie_reaxff.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 4066103152..8acf9af50c 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -13,7 +13,9 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Navraj S Lalli & Efstratios M Kritikos (Imperial College London) + Contributing authors: + Navraj S Lalli (Imperial College London) + Efstratios M Kritikos (California Institute of Technology) ------------------------------------------------------------------------- */ #include "fix_qtpie_reaxff.h" @@ -1158,7 +1160,7 @@ void FixQtpieReaxFF::calc_chi_eff() // use integral pre-screening for overlap calculations const double emin = find_min(gauss_exp,ntypes+1); - const double dist_cutoff = sqrt(pow(emin,-1.)*log(pow(M_PI/(2.*emin),3.)*pow(10.,2.*KSCREEN))); + const double dist_cutoff = sqrt(pow(emin,-1.0)*log(pow(M_PI/(2.0*emin),3.0)*pow(10.0,2.0*KSCREEN))); const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); if(comm_cutoff < dist_cutoff/ANG_TO_BOHRRAD) { From 62b14aa702208ee2e4eda276c2834e869bf4dc19 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 12:27:14 +0100 Subject: [PATCH 036/294] Remove unused include --- src/REAXFF/fix_qtpie_reaxff.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 8acf9af50c..a1ec2e3c10 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -28,7 +28,6 @@ #include "fix_efield.h" #include "force.h" #include "group.h" -#include "memory.h" #include "modify.h" #include "neigh_list.h" #include "neighbor.h" From 79cc70c9dad7c43fa8ade7508bf92ff1cac332ee Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 15:37:26 +0100 Subject: [PATCH 037/294] Rename variable for sum of local and ghost atoms --- src/REAXFF/fix_qtpie_reaxff.cpp | 12 ++++++------ src/REAXFF/fix_qtpie_reaxff.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index a1ec2e3c10..ea0ca4b782 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -99,7 +99,7 @@ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : } shld = nullptr; - nn = ng = n_cap = 0; + nn = nt = n_cap = 0; nmax = 0; m_fill = m_cap = 0; pack_flag = 0; @@ -546,13 +546,13 @@ void FixQtpieReaxFF::setup_pre_force(int vflag) { if (reaxff) { nn = reaxff->list->inum; - ng = reaxff->list->inum + reaxff->list->gnum; + nt = reaxff->list->inum + reaxff->list->gnum; ilist = reaxff->list->ilist; numneigh = reaxff->list->numneigh; firstneigh = reaxff->list->firstneigh; } else { nn = list->inum; - ng = list->inum + list->gnum; + nt = list->inum + list->gnum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -613,13 +613,13 @@ void FixQtpieReaxFF::pre_force(int /*vflag*/) if (reaxff) { nn = reaxff->list->inum; - ng = reaxff->list->inum + reaxff->list->gnum; + nt = reaxff->list->inum + reaxff->list->gnum; ilist = reaxff->list->ilist; numneigh = reaxff->list->numneigh; firstneigh = reaxff->list->firstneigh; } else { nn = list->inum; - ng = list->inum + list->gnum; + nt = list->inum + list->gnum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -1183,7 +1183,7 @@ void FixQtpieReaxFF::calc_chi_eff() sum_n = 0.0; sum_d = 0.0; - for (j = 0; j < ng; j++) { + for (j = 0; j < nt; j++) { dist = distance(x[i],x[j])*ANG_TO_BOHRRAD; // distance between atoms as a multiple of Bohr radius if (dist < dist_cutoff) { diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 1609202693..523e39ecb5 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -48,7 +48,7 @@ class FixQtpieReaxFF : public Fix { protected: int nevery, reaxflag; int matvecs; - int nn, ng, m_fill; + int nn, nt, m_fill; int n_cap, nmax, m_cap; int pack_flag; int nlevels_respa; From f3e5e4b4c1d85409553317bf25d5823f40a1bb23 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Wed, 21 Aug 2024 17:31:06 +0100 Subject: [PATCH 038/294] Rename misleading variable name --- src/REAXFF/fix_qtpie_reaxff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index ea0ca4b782..cf9046d88d 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -48,7 +48,7 @@ using namespace LAMMPS_NS; using namespace FixConst; -static constexpr double EV_TO_KCAL_PER_MOL = 14.4; +static constexpr double CONV_TO_EV = 14.4; static constexpr double SMALL = 1.0e-14; static constexpr double QSUMSMALL = 0.00001; @@ -768,7 +768,7 @@ double FixQtpieReaxFF::calculate_H(double r, double gamma) denom = r * r * r + gamma; denom = pow(denom,1.0/3.0); - return Taper * EV_TO_KCAL_PER_MOL / denom; + return Taper * CONV_TO_EV / denom; } /* ---------------------------------------------------------------------- */ From dff91accb0f45b5605d196801b6b6de24aaf8309 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 23 Aug 2024 12:59:54 +0100 Subject: [PATCH 039/294] Correct calculation of cut off distance --- src/REAXFF/fix_qtpie_reaxff.cpp | 64 ++++++++++++++++----------------- src/REAXFF/fix_qtpie_reaxff.h | 9 ++--- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index cf9046d88d..de4ca30101 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -51,18 +51,19 @@ using namespace FixConst; static constexpr double CONV_TO_EV = 14.4; static constexpr double SMALL = 1.0e-14; static constexpr double QSUMSMALL = 0.00001; +static constexpr double ANGSTROM_TO_BOHRRADIUS = 1.8897261259; static const char cite_fix_qtpie_reaxff[] = - "fix qtpie/reaxff command: doi:https://doi.org/10.1016/j.cplett.2007.02.065\n\n" - "@article{chen2007qtpie,\n" - "title={QTPIE: Charge transfer with polarization current equalization. A fluctuating charge model with correct asymptotics},\n" - "author={Chen, Jiahao and Martinez, Todd J},\n" - "journal={Chemical physics letters},\n" - "volume={438},\n" - "number={4-6},\n" - "pages={315--320},\n" - "year={2007},\n" - "publisher={Elsevier}\n" + "fix qtpie/reaxff command: doi\n\n" + "@article{,\n" + "title={},\n" + "author={},\n" + "journal={},\n" + "volume={},\n" + "number={},\n" + "pages={},\n" + "year={},\n" + "publisher={}\n" "}\n\n"; /* ---------------------------------------------------------------------- */ @@ -233,6 +234,12 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) MPI_Bcast(gauss_exp,ntypes+1,MPI_DOUBLE,0,world); + // define a cutoff distance (in atomic units) beyond which overlap integrals are neglected + // in calc_chi_eff() + const double emin = find_min(gauss_exp,ntypes+1); + const int olap_cut = 10; // overlap integrals are neglected if less than pow(10,-olap_cut) + dist_cutoff = sqrt(1/emin*log(pow(10.0,2.0*olap_cut))); + // read chi, eta and gamma if (utils::strmatch(arg,"^reaxff")) { @@ -1139,8 +1146,6 @@ void FixQtpieReaxFF::calc_chi_eff() { memset(&chi_eff[0],0,atom->nmax*sizeof(double)); - const int KSCREEN = 10; - const double ANG_TO_BOHRRAD = 1.8897259886; // 1 Ang = 1.8897259886 Bohr radius const auto x = (const double * const *)atom->x; const int ntypes = atom->ntypes; const int *type = atom->type; @@ -1148,6 +1153,14 @@ void FixQtpieReaxFF::calc_chi_eff() double dist,overlap,sum_n,sum_d,ea,eb,chia,chib,phia,phib,p,m; int i,j; + // check ghost atoms are stored up to the distance cutoff for overlap integrals + const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); + if(comm_cutoff < dist_cutoff/ANGSTROM_TO_BOHRRADIUS) { + error->all(FLERR,"comm cutoff = {} Angstrom is smaller than distance cutoff = {} Angstrom " + "for overlap integrals in {}. Increase comm cutoff with comm_modify", + comm_cutoff, dist_cutoff/ANGSTROM_TO_BOHRRADIUS, style); + } + // efield energy is in real units of kcal/mol, factor needed for conversion to eV const double qe2f = force->qe2f; const double factor = 1.0/qe2f; @@ -1157,17 +1170,6 @@ void FixQtpieReaxFF::calc_chi_eff() efield->update_efield_variables(); } - // use integral pre-screening for overlap calculations - const double emin = find_min(gauss_exp,ntypes+1); - const double dist_cutoff = sqrt(pow(emin,-1.0)*log(pow(M_PI/(2.0*emin),3.0)*pow(10.0,2.0*KSCREEN))); - - const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); - if(comm_cutoff < dist_cutoff/ANG_TO_BOHRRAD) { - error->all(FLERR,"comm cutoff = {} Angstrom is smaller than distance cutoff = {} Angstrom " - "for overlap integral in {}. Increase comm cutoff with comm_modify", - comm_cutoff, dist_cutoff/ANG_TO_BOHRRAD, style); - } - // compute chi_eff for each local atom for (i = 0; i < nn; i++) { ea = gauss_exp[type[i]]; @@ -1184,23 +1186,17 @@ void FixQtpieReaxFF::calc_chi_eff() sum_d = 0.0; for (j = 0; j < nt; j++) { - dist = distance(x[i],x[j])*ANG_TO_BOHRRAD; // distance between atoms as a multiple of Bohr radius + dist = distance(x[i],x[j])*ANGSTROM_TO_BOHRRADIUS; // in atomic units if (dist < dist_cutoff) { eb = gauss_exp[type[j]]; chib = chi[type[j]]; - // The expressions below are in atomic units - // Implementation from Chen Jiahao, Theory and applications of fluctuating-charge models, 2009 (with normalization constants added) + // overlap integral of two normalised 1s Gaussian type orbitals p = ea + eb; m = ea * eb / p; overlap = pow((4.0*m/p),0.75) * exp(-m*dist*dist); - // Implementation from T. Halgaker et al., Molecular electronic-structure theory, 2000 -// p = ea + eb; -// m = ea * eb / p; -// Overlap = pow((M_PI / p), 1.5) * exp(-m * R * R); - if (efield) { if (efield->varflag != FixEfield::ATOM) { phib = factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); @@ -1226,11 +1222,11 @@ void FixQtpieReaxFF::calc_chi_eff() /* ---------------------------------------------------------------------- */ -double FixQtpieReaxFF::find_min(double *array, int array_length) +double FixQtpieReaxFF::find_min(const double *array, const int array_length) { - // since types start from 1, gaussian exponents start from 1 + // index of first gaussian orbital exponent is 1 double smallest = array[1]; - for (int i = 1; i < array_length; i++) + for (int i = 2; i < array_length; i++) { if (array[i] < smallest) smallest = array[i]; diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 523e39ecb5..8b98025a81 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -82,15 +82,16 @@ class FixQtpieReaxFF : public Fix { double *Hdia_inv; double *b_s, *b_t; double *b_prc, *b_prm; - double *chi_eff; // array of effective electronegativities + double *chi_eff; // array of effective electronegativities //CG storage double *p, *q, *r, *d; int imax, maxwarn; char *pertype_option; // argument to determine how per-type info is obtained - char *gauss_file; // input file for gaussian exponents - double *gauss_exp; // array of gaussian exponents + char *gauss_file; // input file for gaussian orbital exponents + double *gauss_exp; // array of gaussian orbital exponents for each atom type + double dist_cutoff; // separation distance beyond which to neglect overlap integrals void pertype_parameters(char *); void init_shielding(); @@ -129,7 +130,7 @@ class FixQtpieReaxFF : public Fix { void vector_add(double *, double, double *, int); void calc_chi_eff(); - double find_min(double*, int); + double find_min(const double*, const int); double distance(const double*, const double*); int matvecs_s, matvecs_t; // Iteration count for each system From ba2217a4b4413a7c4742ece76010e3d91375b0eb Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 23 Aug 2024 15:20:35 +0100 Subject: [PATCH 040/294] Improve exceptions in reading of gauss file --- src/REAXFF/fix_qtpie_reaxff.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index de4ca30101..ac9415c306 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -209,23 +209,27 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) gauss_exp[0] = 0.0; try { TextFileReader reader(gauss_file,"qtpie/reaxff gaussian exponents"); - reader.ignore_comments = false; + reader.ignore_comments = true; for (int i = 1; i <= ntypes; i++) { const char *line = reader.next_line(); - std::cout << "Orbital exponent " << line; if (!line) - throw TokenizerException("Fix qtpie/reaxff: Invalid param file format",""); + throw TokenizerException("Fix qtpie/reaxff: Incorrect number of atom types in gauss file",""); ValueTokenizer values(line); if (values.count() != 2) - throw TokenizerException("Fix qtpie/reaxff: Incorrect format of param file",""); + throw TokenizerException("Fix qtpie/reaxff: Incorrect number of values per line " + "in gauss file",std::to_string(values.count())); int itype = values.next_int(); if ((itype < 1) || (itype > ntypes)) - throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in param file", + throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in gauss file", std::to_string(itype)); - gauss_exp[itype] = values.next_double(); + double expo = values.next_double(); + if (expo < 0) + throw TokenizerException("Fix qtpie/reaxff: Invalid orbital exponent in gauss file", + std::to_string(expo)); + gauss_exp[itype] = expo; } } catch (std::exception &e) { error->one(FLERR,e.what()); From 5fab9e665f3243977f216f072b4c285bbcc248c1 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Mon, 2 Sep 2024 16:59:07 +0100 Subject: [PATCH 041/294] Update with changes made to fix_qeq_reaxff.cpp --- src/REAXFF/fix_qtpie_reaxff.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index ac9415c306..904fb19029 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -372,7 +372,8 @@ void FixQtpieReaxFF::reallocate_storage() void FixQtpieReaxFF::allocate_matrix() { - int i,ii,m; + int i,ii; + bigint m; int mincap; double safezone; @@ -394,14 +395,17 @@ void FixQtpieReaxFF::allocate_matrix() i = ilist[ii]; m += numneigh[i]; } - m_cap = MAX((int)(m * safezone), mincap * REAX_MIN_NBRS); + bigint m_cap_big = (bigint)MAX(m * safezone, mincap * REAX_MIN_NBRS); + if (m_cap_big > MAXSMALLINT) + error->one(FLERR,"Too many neighbors in fix qeq/reaxff"); + m_cap = m_cap_big; H.n = n_cap; H.m = m_cap; - memory->create(H.firstnbr,n_cap,"qtpie:H.firstnbr"); - memory->create(H.numnbrs,n_cap,"qtpie:H.numnbrs"); - memory->create(H.jlist,m_cap,"qtpie:H.jlist"); - memory->create(H.val,m_cap,"qtpie:H.val"); + memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); + memory->create(H.jlist,m_cap,"qeq:H.jlist"); + memory->create(H.val,m_cap,"qeq:H.val"); } /* ---------------------------------------------------------------------- */ From 49dcb679f642b6d40f4b1a3353ca5cf9b405611d Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 3 Sep 2024 12:22:10 +0100 Subject: [PATCH 042/294] Change names of orbital exponents --- src/REAXFF/fix_qtpie_reaxff.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 904fb19029..ec573f6a57 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -203,7 +203,7 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) const int *type = atom->type; const int ntypes = atom->ntypes; - // read gaussian exponents + // read gaussian orbital exponents memory->create(gauss_exp,ntypes+1,"qtpie/reaxff:gauss_exp"); if (comm->me == 0) { gauss_exp[0] = 0.0; @@ -225,11 +225,11 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in gauss file", std::to_string(itype)); - double expo = values.next_double(); - if (expo < 0) + double exp = values.next_double(); + if (exp < 0) throw TokenizerException("Fix qtpie/reaxff: Invalid orbital exponent in gauss file", - std::to_string(expo)); - gauss_exp[itype] = expo; + std::to_string(exp)); + gauss_exp[itype] = exp; } } catch (std::exception &e) { error->one(FLERR,e.what()); @@ -240,9 +240,9 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) // define a cutoff distance (in atomic units) beyond which overlap integrals are neglected // in calc_chi_eff() - const double emin = find_min(gauss_exp,ntypes+1); + const double expmin = find_min(gauss_exp,ntypes+1); const int olap_cut = 10; // overlap integrals are neglected if less than pow(10,-olap_cut) - dist_cutoff = sqrt(1/emin*log(pow(10.0,2.0*olap_cut))); + dist_cutoff = sqrt(2*olap_cut/expmin*log(10.0)); // read chi, eta and gamma @@ -1158,7 +1158,7 @@ void FixQtpieReaxFF::calc_chi_eff() const int ntypes = atom->ntypes; const int *type = atom->type; - double dist,overlap,sum_n,sum_d,ea,eb,chia,chib,phia,phib,p,m; + double dist,overlap,sum_n,sum_d,expa,expb,chia,chib,phia,phib,p,m; int i,j; // check ghost atoms are stored up to the distance cutoff for overlap integrals @@ -1180,7 +1180,7 @@ void FixQtpieReaxFF::calc_chi_eff() // compute chi_eff for each local atom for (i = 0; i < nn; i++) { - ea = gauss_exp[type[i]]; + expa = gauss_exp[type[i]]; chia = chi[type[i]]; if (efield) { if (efield->varflag != FixEfield::ATOM) { @@ -1197,12 +1197,12 @@ void FixQtpieReaxFF::calc_chi_eff() dist = distance(x[i],x[j])*ANGSTROM_TO_BOHRRADIUS; // in atomic units if (dist < dist_cutoff) { - eb = gauss_exp[type[j]]; + expb = gauss_exp[type[j]]; chib = chi[type[j]]; // overlap integral of two normalised 1s Gaussian type orbitals - p = ea + eb; - m = ea * eb / p; + p = expa + expb; + m = expa * expb / p; overlap = pow((4.0*m/p),0.75) * exp(-m*dist*dist); if (efield) { From 3a5e764730afc6a90dc6850109d2643586c73558 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 3 Sep 2024 17:50:14 +0100 Subject: [PATCH 043/294] Fix whitespace --- src/REAXFF/fix_qtpie_reaxff.cpp | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index ec573f6a57..5fd60e51c5 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: + Contributing authors: Navraj S Lalli (Imperial College London) Efstratios M Kritikos (California Institute of Technology) ------------------------------------------------------------------------- */ @@ -218,15 +218,15 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) if (values.count() != 2) throw TokenizerException("Fix qtpie/reaxff: Incorrect number of values per line " - "in gauss file",std::to_string(values.count())); + "in gauss file",std::to_string(values.count())); int itype = values.next_int(); if ((itype < 1) || (itype > ntypes)) throw TokenizerException("Fix qtpie/reaxff: Invalid atom type in gauss file", std::to_string(itype)); - double exp = values.next_double(); - if (exp < 0) + double exp = values.next_double(); + if (exp < 0) throw TokenizerException("Fix qtpie/reaxff: Invalid orbital exponent in gauss file", std::to_string(exp)); gauss_exp[itype] = exp; @@ -1165,14 +1165,14 @@ void FixQtpieReaxFF::calc_chi_eff() const double comm_cutoff = MAX(neighbor->cutneighmax,comm->cutghostuser); if(comm_cutoff < dist_cutoff/ANGSTROM_TO_BOHRRADIUS) { error->all(FLERR,"comm cutoff = {} Angstrom is smaller than distance cutoff = {} Angstrom " - "for overlap integrals in {}. Increase comm cutoff with comm_modify", - comm_cutoff, dist_cutoff/ANGSTROM_TO_BOHRRADIUS, style); + "for overlap integrals in {}. Increase comm cutoff with comm_modify", + comm_cutoff, dist_cutoff/ANGSTROM_TO_BOHRRADIUS, style); } // efield energy is in real units of kcal/mol, factor needed for conversion to eV const double qe2f = force->qe2f; const double factor = 1.0/qe2f; - + if (efield) { if (efield->varflag != FixEfield::CONSTANT) efield->update_efield_variables(); @@ -1184,9 +1184,9 @@ void FixQtpieReaxFF::calc_chi_eff() chia = chi[type[i]]; if (efield) { if (efield->varflag != FixEfield::ATOM) { - phia = factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); + phia = factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); } else { // atom-style potential from FixEfield - phia = efield->efield[i][3]; + phia = efield->efield[i][3]; } } @@ -1200,22 +1200,22 @@ void FixQtpieReaxFF::calc_chi_eff() expb = gauss_exp[type[j]]; chib = chi[type[j]]; - // overlap integral of two normalised 1s Gaussian type orbitals + // overlap integral of two normalised 1s Gaussian type orbitals p = expa + expb; m = expa * expb / p; overlap = pow((4.0*m/p),0.75) * exp(-m*dist*dist); if (efield) { - if (efield->varflag != FixEfield::ATOM) { - phib = factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); - } else { // atom-style potential from FixEfield - phib = efield->efield[j][3]; - } - sum_n += (chia - chib + phib - phia) * overlap; + if (efield->varflag != FixEfield::ATOM) { + phib = factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); + } else { // atom-style potential from FixEfield + phib = efield->efield[j][3]; + } + sum_n += (chia - chib + phib - phia) * overlap; } else { - sum_n += (chia - chib) * overlap; + sum_n += (chia - chib) * overlap; } - sum_d += overlap; + sum_d += overlap; } } @@ -1236,7 +1236,7 @@ double FixQtpieReaxFF::find_min(const double *array, const int array_length) double smallest = array[1]; for (int i = 2; i < array_length; i++) { - if (array[i] < smallest) + if (array[i] < smallest) smallest = array[i]; } return smallest; From 9932c4e38b0d9f98519dfdb3b2242b1c8c6a3eea Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 4 Sep 2024 22:38:19 -0400 Subject: [PATCH 044/294] add 3d float views to ArrayTypes --- src/KOKKOS/kokkos_type.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 7f0eb5c105..dc0b2c2ecb 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -1079,6 +1079,14 @@ typedef tdual_float_2d::t_host_um t_float_2d_um; typedef tdual_float_2d::t_host_const_um t_float_2d_const_um; typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread; +//3d float array n +typedef Kokkos::DualView tdual_float_3d; +typedef tdual_float_3d::t_host t_float_3d; +typedef tdual_float_3d::t_host_const t_float_3d_const; +typedef tdual_float_3d::t_host_um t_float_3d_um; +typedef tdual_float_3d::t_host_const_um t_float_3d_const_um; +typedef tdual_float_3d::t_host_const_randomread t_float_3d_randomread; + #ifdef LMP_KOKKOS_NO_LEGACY typedef Kokkos::DualView tdual_float_1d_4; #else From 3312ef979387e3385536f207740b0d0653c081b4 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 5 Sep 2024 08:25:13 -0400 Subject: [PATCH 045/294] KOKKOS_INLINE_FUNCTION powint() to replace MathSpecial::powint --- src/KOKKOS/fix_wall_region_kokkos.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 1675cee0ce..49fa49f7d8 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -22,15 +22,29 @@ #include "atom_kokkos.h" #include "error.h" #include "kokkos_base.h" -#include "math_special.h" #include "memory_kokkos.h" #include "region.h" using namespace LAMMPS_NS; -using MathSpecial::powint; enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; +KOKKOS_INLINE_FUNCTION double powint(const double &x, const int n) +{ + double yy, ww; + + if (n == 0) return 1.0; + if (x == 0.0) return 0.0; + int nn = (n > 0) ? n : -n; + ww = x; + + for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww) + if (nn & 1) yy *= ww; + + return (n > 0) ? yy : 1.0 / yy; +} + + /* ---------------------------------------------------------------------- */ template From 33080199e762b099de2b59408b8b563cf2437d25 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 5 Sep 2024 08:29:08 -0400 Subject: [PATCH 046/294] fix cuda warnings --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index 28a3cbfe51..0974008aac 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -57,6 +57,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_v = atomKK->k_v.template view(); auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); + auto l_groupbit = groupbit; int d_ncount; @@ -67,7 +68,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { - if (d_mask[i] & groupbit) { + if (d_mask[i] & l_groupbit) { const double dtfm = dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); @@ -92,10 +93,11 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); + auto l_groupbit = groupbit; atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { - if (d_mask[i] & groupbit) { + if (d_mask[i] & l_groupbit) { const double dtfm = dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); @@ -127,13 +129,13 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) template void FixNVELimitKokkos::final_integrate() { - double dtfm,vsq; int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; auto d_v = atomKK->k_v.template view(); auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); + auto l_groupbit = groupbit; int d_ncount; @@ -143,7 +145,7 @@ void FixNVELimitKokkos::final_integrate() atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK ); Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { - if (d_mask[i] & groupbit) { + if (d_mask[i] & l_groupbit) { const double dtfm = dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); @@ -167,7 +169,7 @@ void FixNVELimitKokkos::final_integrate() atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK ); Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { - if (d_mask[i] & groupbit) { + if (d_mask[i] & l_groupbit) { const double dtfm = dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); From e22ff76132e6e2c71d37df6caf7731c45f2384e5 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 5 Sep 2024 08:32:21 -0400 Subject: [PATCH 047/294] fix cuda warnings --- src/KOKKOS/fix_recenter_kokkos.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index fd23f731a2..369a961f09 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -114,9 +114,11 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) copymode = 1; + auto l_group2bit = group2bit; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), - LAMMPS_LAMBDA(int i) { - if (d_mask[i] & group2bit) { + KOKKOS_LAMBDA(const int i) { + if (d_mask[i] & l_group2bit) { d_x(i,0) += shift[0]; d_x(i,1) += shift[1]; d_x(i,2) += shift[2]; From bbc3dc295891b4b7292319494fff028ffa92e0ee Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 11:56:19 -0400 Subject: [PATCH 048/294] auto atomKK variables for lambda capture --- src/KOKKOS/fix_recenter_kokkos.cpp | 12 ++++++------ src/KOKKOS/fix_recenter_kokkos.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index 369a961f09..7e0e3e31b7 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -52,10 +52,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) { atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); - - d_x = atomKK->k_x.view(); - d_mask = atomKK->k_mask.view(); int nlocal = atomKK->nlocal; if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; @@ -112,10 +108,12 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - copymode = 1; - + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); auto l_group2bit = group2bit; + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i) { if (d_mask[i] & l_group2bit) { @@ -126,6 +124,8 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) }); copymode = 0; + + atomKK->modified(execution_space,datamask_modify); } diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h index f945802ec0..4e28c41d18 100644 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -36,8 +36,8 @@ class FixRecenterKokkos : public FixRecenter { void initial_integrate(int) override; private: - typename ArrayTypes::t_x_array d_x; - typename ArrayTypes::t_int_1d d_mask; + //typename ArrayTypes::t_x_array d_x; + //typename ArrayTypes::t_int_1d d_mask; }; } // namespace LAMMPS_NS From e25979386afe9de57422567a9d45bd088ef997ae Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 12:18:33 -0400 Subject: [PATCH 049/294] Update fix_nve_limit_kokkos.cpp --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index 0974008aac..aba8bbe39f 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -58,6 +58,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); auto l_groupbit = groupbit; + auto l_dtf = dtf; int d_ncount; @@ -69,7 +70,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { - const double dtfm = dtf / d_rmass[i]; + const double dtfm = l_dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); d_v(i,2) += dtfm * d_f(i,2); @@ -98,7 +99,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { - const double dtfm = dtf / d_mass[d_type[i]]; + const double dtfm = l_dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); d_v(i,2) += dtfm * d_f(i,2); @@ -136,6 +137,7 @@ void FixNVELimitKokkos::final_integrate() auto d_f = atomKK->k_f.template view(); auto d_mask = atomKK->k_mask.template view(); auto l_groupbit = groupbit; + auto l_dtf = dtf; int d_ncount; @@ -146,7 +148,7 @@ void FixNVELimitKokkos::final_integrate() Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { - const double dtfm = dtf / d_rmass[i]; + const double dtfm = l_dtf / d_rmass[i]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); d_v(i,2) += dtfm * d_f(i,2); @@ -170,7 +172,7 @@ void FixNVELimitKokkos::final_integrate() Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) { if (d_mask[i] & l_groupbit) { - const double dtfm = dtf / d_mass[d_type[i]]; + const double dtfm = l_dtf / d_mass[d_type[i]]; d_v(i,0) += dtfm * d_f(i,0); d_v(i,1) += dtfm * d_f(i,1); d_v(i,2) += dtfm * d_f(i,2); From 6b83ef8d338d10e90b65a6e6912528105f2eb9c9 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 12:21:29 -0400 Subject: [PATCH 050/294] auto var lambda capture --- src/KOKKOS/region_sphere_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index e27b5ef14c..42cc0383d8 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -61,11 +61,12 @@ void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_i auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); + auto l_groupbit = groupbit; copymode = 1; Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA( const int &i ) { - if (d_mask[i] & groupbit) { + if (d_mask[i] & l_groupbit) { double x_tmp = d_x(i,0); double y_tmp = d_x(i,1); double z_tmp = d_x(i,2); From 74e61c05b9dca0928b380c3562b4556b27e355f4 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 12:42:49 -0400 Subject: [PATCH 051/294] fix warnings --- src/KOKKOS/fix_cmap_kokkos.cpp | 188 +++++++++++++++++---------------- 1 file changed, 95 insertions(+), 93 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e47e1e1307..427c2cd7d5 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -269,18 +269,18 @@ KOKKOS_INLINE_FUNCTION void FixCMAPKokkos::operator()(const int n) const { - int i1,i2,i3,i4,i5,type,nlist; + int i1,i2,i3,i4,i5,type; int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; - int list[5]; + // vectors needed to calculate the cross-term dihedral angles double vb21x,vb21y,vb21z,vb32x,vb32y,vb32z,vb34x,vb34y,vb34z; double vb23x,vb23y,vb23z; double vb43x,vb43y,vb43z,vb45x,vb45y,vb45z,a1x,a1y,a1z,b1x,b1y,b1z; double a2x,a2y,a2z,b2x,b2y,b2z,r32,a1sq,b1sq,a2sq,b2sq,dpr21r32,dpr34r32; - double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z; + double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z; // cross-term dihedral angles double phi,psi,phi1,psi1; - double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[CMAPMAX]; + double f1[3],f2[3],f3[3],f4[3],f5[3]; double gs[4],d1gs[4],d2gs[4],d12gs[4]; // vectors needed for the gradient/force calculation @@ -300,115 +300,111 @@ void FixCMAPKokkos::operator()(const int n) const int nlocal = atomKK->nlocal; - i1 = d_crosstermlist(n,0); - i2 = d_crosstermlist(n,1); - i3 = d_crosstermlist(n,2); - i4 = d_crosstermlist(n,3); - i5 = d_crosstermlist(n,4); - type = d_crosstermlist(n,5); - if (type == 0) return; + i1 = d_crosstermlist(n,0); + i2 = d_crosstermlist(n,1); + i3 = d_crosstermlist(n,2); + i4 = d_crosstermlist(n,3); + i5 = d_crosstermlist(n,4); + type = d_crosstermlist(n,5); + if (type == 0) return; - // calculate bond vectors for both dihedrals + // calculate bond vectors for both dihedrals - // phi - // vb21 = r2 - r1 + // phi + // vb21 = r2 - r1 - vb21x = d_x(i2,0) - d_x(i1,0); - vb21y = d_x(i2,1) - d_x(i1,1); - vb21z = d_x(i2,2) - d_x(i1,2); - vb12x = -1.0*vb21x; - vb12y = -1.0*vb21y; - vb12z = -1.0*vb21z; - vb32x = d_x(i3,0) - d_x(i2,0); - vb32y = d_x(i3,1) - d_x(i2,1); - vb32z = d_x(i3,2) - d_x(i2,2); - vb23x = -1.0*vb32x; - vb23y = -1.0*vb32y; - vb23z = -1.0*vb32z; + vb21x = d_x(i2,0) - d_x(i1,0); + vb21y = d_x(i2,1) - d_x(i1,1); + vb21z = d_x(i2,2) - d_x(i1,2); + vb12x = -1.0*vb21x; + vb12y = -1.0*vb21y; + vb12z = -1.0*vb21z; + vb32x = d_x(i3,0) - d_x(i2,0); + vb32y = d_x(i3,1) - d_x(i2,1); + vb32z = d_x(i3,2) - d_x(i2,2); + vb23x = -1.0*vb32x; + vb23y = -1.0*vb32y; + vb23z = -1.0*vb32z; - vb34x = d_x(i3,0) - d_x(i4,0); - vb34y = d_x(i3,1) - d_x(i4,1); - vb34z = d_x(i3,2) - d_x(i4,2); + vb34x = d_x(i3,0) - d_x(i4,0); + vb34y = d_x(i3,1) - d_x(i4,1); + vb34z = d_x(i3,2) - d_x(i4,2); - // psi - // bond vectors same as for phi: vb32 + // psi + // bond vectors same as for phi: vb32 - vb43x = -1.0*vb34x; - vb43y = -1.0*vb34y; - vb43z = -1.0*vb34z; + vb43x = -1.0*vb34x; + vb43y = -1.0*vb34y; + vb43z = -1.0*vb34z; - vb45x = d_x(i4,0) - d_x(i5,0); - vb45y = d_x(i4,1) - d_x(i5,1); - vb45z = d_x(i4,2) - d_x(i5,2); - vb54x = -1.0*vb45x; - vb54y = -1.0*vb45y; - vb54z = -1.0*vb45z; + vb45x = d_x(i4,0) - d_x(i5,0); + vb45y = d_x(i4,1) - d_x(i5,1); + vb45z = d_x(i4,2) - d_x(i5,2); - // calculate normal vectors for planes that define the dihedral angles + // calculate normal vectors for planes that define the dihedral angles + a1x = vb12y*vb23z - vb12z*vb23y; + a1y = vb12z*vb23x - vb12x*vb23z; + a1z = vb12x*vb23y - vb12y*vb23x; - a1x = vb12y*vb23z - vb12z*vb23y; - a1y = vb12z*vb23x - vb12x*vb23z; - a1z = vb12x*vb23y - vb12y*vb23x; + b1x = vb43y*vb23z - vb43z*vb23y; + b1y = vb43z*vb23x - vb43x*vb23z; + b1z = vb43x*vb23y - vb43y*vb23x; - b1x = vb43y*vb23z - vb43z*vb23y; - b1y = vb43z*vb23x - vb43x*vb23z; - b1z = vb43x*vb23y - vb43y*vb23x; + a2x = vb23y*vb34z - vb23z*vb34y; + a2y = vb23z*vb34x - vb23x*vb34z; + a2z = vb23x*vb34y - vb23y*vb34x; - a2x = vb23y*vb34z - vb23z*vb34y; - a2y = vb23z*vb34x - vb23x*vb34z; - a2z = vb23x*vb34y - vb23y*vb34x; + b2x = vb45y*vb43z - vb45z*vb43y; + b2y = vb45z*vb43x - vb45x*vb43z; + b2z = vb45x*vb43y - vb45y*vb43x; - b2x = vb45y*vb43z - vb45z*vb43y; - b2y = vb45z*vb43x - vb45x*vb43z; - b2z = vb45x*vb43y - vb45y*vb43x; + // calculate terms used later in calculations - // calculate terms used later in calculations + r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); + a1sq = a1x*a1x + a1y*a1y + a1z*a1z; + b1sq = b1x*b1x + b1y*b1y + b1z*b1z; - r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); - a1sq = a1x*a1x + a1y*a1y + a1z*a1z; - b1sq = b1x*b1x + b1y*b1y + b1z*b1z; + r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); + a2sq = a2x*a2x + a2y*a2y + a2z*a2z; + b2sq = b2x*b2x + b2y*b2y + b2z*b2z; + //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) + // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); + if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; + dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; + dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; + dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; + dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; - r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); - a2sq = a2x*a2x + a2y*a2y + a2z*a2z; - b2sq = b2x*b2x + b2y*b2y + b2z*b2z; - //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) - // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); - if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; - dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; - dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; - dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; - dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; + // calculate the backbone dihedral angles as VMD and GROMACS - // calculate the backbone dihedral angles as VMD and GROMACS + phi = FixCMAP::dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); + psi = FixCMAP::dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); - phi = FixCMAP::dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); - psi = FixCMAP::dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + if (phi == 180.0) phi= -180.0; + if (psi == 180.0) psi= -180.0; - if (phi == 180.0) phi= -180.0; - if (psi == 180.0) psi= -180.0; + phi1 = phi; + if (phi1 < 0.0) phi1 += 360.0; + psi1 = psi; + if (psi1 < 0.0) psi1 += 360.0; - phi1 = phi; - if (phi1 < 0.0) phi1 += 360.0; - psi1 = psi; - if (psi1 < 0.0) psi1 += 360.0; + // find the neighbor grid point index - // find the neighbor grid point index + li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); - li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); - li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); - - li3 = int((phi-CMAPXMIN2)/CMAPDX); - li4 = int((psi-CMAPXMIN2)/CMAPDX); - mli3 = li3 % CMAPDIM; - mli4 = li4 % CMAPDIM; - mli31 = (li3+1) % CMAPDIM; - mli41 = (li4+1) %CMAPDIM; - mli1 = li1 % CMAPDIM; - mli2 = li2 % CMAPDIM; - mli11 = (li1+1) % CMAPDIM; - mli21 = (li2+1) %CMAPDIM; - t1 = type-1; - if (t1 < 0 || t1 > 5) error->all(FLERR,"Invalid CMAP crossterm_type"); + li3 = int((phi-CMAPXMIN2)/CMAPDX); + li4 = int((psi-CMAPXMIN2)/CMAPDX); + mli3 = li3 % CMAPDIM; + mli4 = li4 % CMAPDIM; + mli31 = (li3+1) % CMAPDIM; + mli41 = (li4+1) %CMAPDIM; + mli1 = li1 % CMAPDIM; + mli2 = li2 % CMAPDIM; + mli11 = (li1+1) % CMAPDIM; + mli21 = (li2+1) %CMAPDIM; + t1 = type-1; + if (t1 < 0 || t1 > 5) Kokkos::abort("Invalid CMAP crossterm_type"); // determine the values and derivatives for the grid square points @@ -539,7 +535,13 @@ void FixCMAPKokkos::operator()(const int n) const /* if (evflag) { //std::cerr << "******** tally energy and/or virial\n"; - nlist = 0; + int nlist = 0; + int list[5]; + double vb54x = -1.0*vb45x; + double vb54y = -1.0*vb45y; + double vb54z = -1.0*vb45z; + double vcmap[CMAPMAX]; + if (i1 < nlocal) list[nlist++] = i1; if (i2 < nlocal) list[nlist++] = i2; if (i3 < nlocal) list[nlist++] = i3; From b468e1cb9af61ac4fdd6dbedc2da30dfe2738e56 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:04:31 -0400 Subject: [PATCH 052/294] kokkos atan2 --- src/KOKKOS/fix_cmap_kokkos.cpp | 30 ++++++++++++++++++++++++++++-- src/KOKKOS/fix_cmap_kokkos.h | 5 +++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 427c2cd7d5..dfdd166622 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -377,8 +377,8 @@ void FixCMAPKokkos::operator()(const int n) const // calculate the backbone dihedral angles as VMD and GROMACS - phi = FixCMAP::dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); - psi = FixCMAP::dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); + psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; @@ -716,6 +716,32 @@ int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixCMAPKokkos::dihedral_angle_atan2(double fx, double fy, double fz, + double ax, double ay, double az, + double bx, double by, double bz, + double absg) const +{ + // calculate the dihedral angle + + double angle = 0.0, arg1, arg2; + + arg1 = absg*(fx*bx+fy*by+fz*bz); + arg2 = ax*bx+ay*by+az*bz; + + if (arg1 == 0 && arg2 == 0) + Kokkos::abort("CMAP: atan2 function cannot take 2 zero arguments"); + else { + angle = Kokkos::atan2(arg1,arg2); + angle = angle*180.0/MY_PI; + } + + return angle; +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 42756116b2..41b6d27fbb 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -77,6 +77,11 @@ class FixCMAPKokkos : public FixCMAP { DAT::tdual_float_3d k_cmapgrid, k_d1cmapgrid, k_d2cmapgrid, k_d12cmapgrid; typename AT::t_float_3d d_cmapgrid, d_d1cmapgrid, d_d2cmapgrid, d_d12cmapgrid; + // calculate dihedral angles + KOKKOS_INLINE_FUNCTION + double dihedral_angle_atan2(double, double, double, double, double, double, double, double, + double, double) const; + // perform bicubic interpolation at point of interest KOKKOS_INLINE_FUNCTION void bc_interpol(double, double, int, int, double *, double *, double *, double *, From 8eeba71f5f2652c2fc1ec4aacf870588b87fb01d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:19:47 -0400 Subject: [PATCH 053/294] cleanup --- src/KOKKOS/fix_cmap_kokkos.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index dfdd166622..5cffaa60fd 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -40,7 +40,6 @@ static constexpr double LB_FACTOR = 1.5; static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 -static constexpr double CMAPXMIN = -360.0; static constexpr double CMAPXMIN2 = -180.0; static constexpr double CMAPDX = 15.0; // 360/CMAPDIM From 13357745935664600d3014f97a89762c02190345 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:24:13 -0400 Subject: [PATCH 054/294] Update fix_recenter_kokkos.cpp --- src/KOKKOS/fix_recenter_kokkos.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index 7e0e3e31b7..de8b2639be 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -103,10 +103,10 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) // shift coords by difference between actual COM and requested COM - shift[0] = xflag ? (xtarget - xcm[0]) : 0.0; - shift[1] = yflag ? (ytarget - xcm[1]) : 0.0; - shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; - distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + double shiftx = xflag ? (xtarget - xcm[0]) : 0.0; + double shifty = yflag ? (ytarget - xcm[1]) : 0.0; + double shiftz = zflag ? (ztarget - xcm[2]) : 0.0; + distance = sqrt(shiftx*shiftx + shifty*shifty + shiftz*shiftz); auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); @@ -117,9 +117,9 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i) { if (d_mask[i] & l_group2bit) { - d_x(i,0) += shift[0]; - d_x(i,1) += shift[1]; - d_x(i,2) += shift[2]; + d_x(i,0) += shiftx; + d_x(i,1) += shifty; + d_x(i,2) += shiftz; } }); From 638bba5319234b297f3ae415b988d2af535c4c32 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:27:08 -0400 Subject: [PATCH 055/294] fix warnings --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index aba8bbe39f..a910510f9c 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -59,6 +59,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_mask = atomKK->k_mask.template view(); auto l_groupbit = groupbit; auto l_dtf = dtf; + auto l_vlimitsq = vlimitsq; int d_ncount; @@ -76,9 +77,9 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_v(i,2) += dtfm * d_f(i,2); const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); - if (vsq > vlimitsq) { + if (vsq > l_vlimitsq) { l_ncount++; - const double scale = sqrt(vlimitsq/vsq); + const double scale = sqrt(l_vlimitsq/vsq); d_v(i,0) *= scale; d_v(i,1) *= scale; d_v(i,2) *= scale; @@ -105,9 +106,9 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_v(i,2) += dtfm * d_f(i,2); const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); - if (vsq > vlimitsq) { + if (vsq > l_vlimitsq) { l_ncount++; - const double scale = sqrt(vlimitsq/vsq); + const double scale = sqrt(l_vlimitsq/vsq); d_v(i,0) *= scale; d_v(i,1) *= scale; d_v(i,2) *= scale; @@ -138,6 +139,7 @@ void FixNVELimitKokkos::final_integrate() auto d_mask = atomKK->k_mask.template view(); auto l_groupbit = groupbit; auto l_dtf = dtf; + auto l_vlimitsq = vlimitsq; int d_ncount; @@ -154,9 +156,9 @@ void FixNVELimitKokkos::final_integrate() d_v(i,2) += dtfm * d_f(i,2); const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); - if (vsq > vlimitsq) { + if (vsq > l_vlimitsq) { l_ncount++; - const double scale = sqrt(vlimitsq/vsq); + const double scale = sqrt(l_vlimitsq/vsq); d_v(i,0) *= scale; d_v(i,1) *= scale; d_v(i,2) *= scale; @@ -178,9 +180,9 @@ void FixNVELimitKokkos::final_integrate() d_v(i,2) += dtfm * d_f(i,2); const double vsq = d_v(i,0)*d_v(i,0) + d_v(i,1)*d_v(i,1) + d_v(i,2)*d_v(i,2); - if (vsq > vlimitsq) { + if (vsq > l_vlimitsq) { l_ncount++; - const double scale = sqrt(vlimitsq/vsq); + const double scale = sqrt(l_vlimitsq/vsq); d_v(i,0) *= scale; d_v(i,1) *= scale; d_v(i,2) *= scale; From 36494af3c73463411c3e46a7a496ce3bd6707bfb Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:36:13 -0400 Subject: [PATCH 056/294] match base class in kokkos --- src/KOKKOS/region_block_kokkos.cpp | 1 + src/KOKKOS/region_sphere_kokkos.cpp | 6 +++--- src/KOKKOS/region_sphere_kokkos.h | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 8df33c32db..f5e7499456 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -89,6 +89,7 @@ KOKKOS_INLINE_FUNCTION int RegBlockKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 42cc0383d8..9b747b92df 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -53,15 +53,14 @@ int RegSphereKokkos::k_inside(double x, double y, double z) const template void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { - groupbit = groupbit_in; - d_match = k_match_in.template view(); auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); - auto l_groupbit = groupbit; + auto d_match = k_match_in.template view(); + auto l_groupbit = groupbit_in; copymode = 1; @@ -95,6 +94,7 @@ KOKKOS_INLINE_FUNCTION int RegSphereKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 8ccd6217bf..0cf5985d84 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -46,7 +46,6 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { private: int groupbit; - typename AT::t_int_1d d_match; KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; From eedcbc6b5f76318a311da3275fb866dd0680b348 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:36:34 -0400 Subject: [PATCH 057/294] Revert "match base class in kokkos" This reverts commit 36494af3c73463411c3e46a7a496ce3bd6707bfb. --- src/KOKKOS/region_block_kokkos.cpp | 1 - src/KOKKOS/region_sphere_kokkos.cpp | 6 +++--- src/KOKKOS/region_sphere_kokkos.h | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index f5e7499456..8df33c32db 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -89,7 +89,6 @@ KOKKOS_INLINE_FUNCTION int RegBlockKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 9b747b92df..42cc0383d8 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -53,14 +53,15 @@ int RegSphereKokkos::k_inside(double x, double y, double z) const template void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { + groupbit = groupbit_in; + d_match = k_match_in.template view(); auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); - auto d_match = k_match_in.template view(); - auto l_groupbit = groupbit_in; + auto l_groupbit = groupbit; copymode = 1; @@ -94,7 +95,6 @@ KOKKOS_INLINE_FUNCTION int RegSphereKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 0cf5985d84..8ccd6217bf 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -46,6 +46,7 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { private: int groupbit; + typename AT::t_int_1d d_match; KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; From e65fb63a97ad25b1f7700c782ffd09214e9d15fe Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:39:18 -0400 Subject: [PATCH 058/294] match base class in kokkos --- src/KOKKOS/region_block_kokkos.cpp | 1 + src/KOKKOS/region_sphere_kokkos.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 8df33c32db..f5e7499456 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -89,6 +89,7 @@ KOKKOS_INLINE_FUNCTION int RegBlockKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 42cc0383d8..bb73d3d425 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -95,6 +95,7 @@ KOKKOS_INLINE_FUNCTION int RegSphereKokkos::match(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; return !(k_inside(x,y,z) ^ interior); } From 2a2ea89524d82c14c5718a306e0cc1fd557e767c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:40:47 -0400 Subject: [PATCH 059/294] fix warnings --- src/KOKKOS/region_sphere_kokkos.cpp | 1 - src/KOKKOS/region_sphere_kokkos.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index bb73d3d425..390c2ae49c 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -53,7 +53,6 @@ int RegSphereKokkos::k_inside(double x, double y, double z) const template void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { - groupbit = groupbit_in; d_match = k_match_in.template view(); auto execution_space = ExecutionSpaceFromDevice::space; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 8ccd6217bf..579f339ca8 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -45,8 +45,6 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { //void operator()(TagRegBlockMatchAll, const int&) const; private: - int groupbit; - typename AT::t_int_1d d_match; KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; From b4e01aaa45a5a786dc37c6a950fcb310f5688562 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:42:16 -0400 Subject: [PATCH 060/294] fix warnings --- src/KOKKOS/region_sphere_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 390c2ae49c..9b747b92df 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -53,14 +53,14 @@ int RegSphereKokkos::k_inside(double x, double y, double z) const template void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { - d_match = k_match_in.template view(); auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); - auto l_groupbit = groupbit; + auto d_match = k_match_in.template view(); + auto l_groupbit = groupbit_in; copymode = 1; From 1683205fedd5e72bdd766ee17383469fbcef005b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 13:42:33 -0400 Subject: [PATCH 061/294] cleanup --- src/KOKKOS/fix_cmap_kokkos.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 41b6d27fbb..fcf594bd31 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -52,7 +52,6 @@ class FixCMAPKokkos : public FixCMAP { protected: typename AT::t_x_array d_x; typename AT::t_f_array d_f; - //typename AT::t_int_1d d_type, d_mask; DAT::tdual_int_1d k_sametag; typename AT::t_int_1d d_sametag; From d19f5e0e8e5d824898c693ab1b1a9faa67e62eff Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 19:30:15 -0400 Subject: [PATCH 062/294] bugfix --- src/KOKKOS/fix_cmap_kokkos.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 5cffaa60fd..4ee00b263b 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -192,13 +192,13 @@ void FixCMAPKokkos::pre_neighbor() ncrosstermlist = 0; for (i = 0; i < nlocal; i++) { - for (m = 0; m < d_num_crossterm(i); m++) { + for (m = 0; m < k_num_crossterm.h_view(i); m++) { - atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); - atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); - atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); - atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); - atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); + atom1 = AtomKokkos::map_kokkos(k_crossterm_atom1.h_view(i,m),map_style,k_map_array,k_map_hash); + atom2 = AtomKokkos::map_kokkos(k_crossterm_atom2.h_view((i,m),map_style,k_map_array,k_map_hash); + atom3 = AtomKokkos::map_kokkos(k_crossterm_atom3.h_view(i,m),map_style,k_map_array,k_map_hash); + atom4 = AtomKokkos::map_kokkos(k_crossterm_atom4.h_view(i,m),map_style,k_map_array,k_map_hash); + atom5 = AtomKokkos::map_kokkos(k_crossterm_atom5.h_view(i,m),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) From 0b92bf2c0cd6485e8fc49ae0d21329fcf81547ab Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 6 Sep 2024 19:40:21 -0400 Subject: [PATCH 063/294] oops --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 4ee00b263b..8aaea2bf73 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -195,7 +195,7 @@ void FixCMAPKokkos::pre_neighbor() for (m = 0; m < k_num_crossterm.h_view(i); m++) { atom1 = AtomKokkos::map_kokkos(k_crossterm_atom1.h_view(i,m),map_style,k_map_array,k_map_hash); - atom2 = AtomKokkos::map_kokkos(k_crossterm_atom2.h_view((i,m),map_style,k_map_array,k_map_hash); + atom2 = AtomKokkos::map_kokkos(k_crossterm_atom2.h_view(i,m),map_style,k_map_array,k_map_hash); atom3 = AtomKokkos::map_kokkos(k_crossterm_atom3.h_view(i,m),map_style,k_map_array,k_map_hash); atom4 = AtomKokkos::map_kokkos(k_crossterm_atom4.h_view(i,m),map_style,k_map_array,k_map_hash); atom5 = AtomKokkos::map_kokkos(k_crossterm_atom5.h_view(i,m),map_style,k_map_array,k_map_hash); From bce51ea2c93c864a20a9404bf5f83f85872c1fde Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 8 Sep 2024 12:53:49 -0400 Subject: [PATCH 064/294] Update fix_nve_limit_kokkos.cpp --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index a910510f9c..ed0bb44704 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -59,6 +59,7 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) auto d_mask = atomKK->k_mask.template view(); auto l_groupbit = groupbit; auto l_dtf = dtf; + auto l_dtv = dtv; auto l_vlimitsq = vlimitsq; int d_ncount; @@ -85,9 +86,9 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_v(i,2) *= scale; } - d_x(i,0) += dtv * d_v(i,0); - d_x(i,1) += dtv * d_v(i,1); - d_x(i,2) += dtv * d_v(i,2); + d_x(i,0) += l_dtv * d_v(i,0); + d_x(i,1) += l_dtv * d_v(i,1); + d_x(i,2) += l_dtv * d_v(i,2); } }, d_ncount); @@ -114,9 +115,9 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) d_v(i,2) *= scale; } - d_x(i,0) += dtv * d_v(i,0); - d_x(i,1) += dtv * d_v(i,1); - d_x(i,2) += dtv * d_v(i,2); + d_x(i,0) += l_dtv * d_v(i,0); + d_x(i,1) += l_dtv * d_v(i,1); + d_x(i,2) += l_dtv * d_v(i,2); } }, d_ncount); } From b67b38ab683118df3b9e8f5926cd51e1958481e2 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 8 Sep 2024 16:36:39 -0400 Subject: [PATCH 065/294] kokkos parallel_for --- src/KOKKOS/fix_cmap_kokkos.cpp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 8aaea2bf73..e65a7ed1ea 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -98,7 +98,6 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : k_d1cmapgrid.h_view(i,j,k) = d1cmapgrid[i][j][k]; k_d2cmapgrid.h_view(i,j,k) = d2cmapgrid[i][j][k]; k_d12cmapgrid.h_view(i,j,k) = d12cmapgrid[i][j][k]; - } } } @@ -158,7 +157,6 @@ void FixCMAPKokkos::init() template void FixCMAPKokkos::pre_neighbor() { - int i,m,atom1,atom2,atom3,atom4,atom5; const int me = comm->me; const int nprocs = comm->nprocs; @@ -189,19 +187,19 @@ void FixCMAPKokkos::pre_neighbor() atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); - ncrosstermlist = 0; + Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int i) { - for (i = 0; i < nlocal; i++) { - for (m = 0; m < k_num_crossterm.h_view(i); m++) { + ncrosstermlist = 0; - atom1 = AtomKokkos::map_kokkos(k_crossterm_atom1.h_view(i,m),map_style,k_map_array,k_map_hash); - atom2 = AtomKokkos::map_kokkos(k_crossterm_atom2.h_view(i,m),map_style,k_map_array,k_map_hash); - atom3 = AtomKokkos::map_kokkos(k_crossterm_atom3.h_view(i,m),map_style,k_map_array,k_map_hash); - atom4 = AtomKokkos::map_kokkos(k_crossterm_atom4.h_view(i,m),map_style,k_map_array,k_map_hash); - atom5 = AtomKokkos::map_kokkos(k_crossterm_atom5.h_view(i,m),map_style,k_map_array,k_map_hash); + for (int m = 0; m < d_num_crossterm(i); m++) { - if (atom1 == -1 || atom2 == -1 || atom3 == -1 || - atom4 == -1 || atom5 == -1) + int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); + int atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); + int atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); + int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); + int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); + + if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) error->one(FLERR,"CMAP atoms {} {} {} {} {} missing on " "proc {} at step {}", d_crossterm_atom1(i,m),d_crossterm_atom2(i,m), @@ -230,7 +228,8 @@ void FixCMAPKokkos::pre_neighbor() ncrosstermlist++; } } - } + }); + } From 29e0ec3809bfdf7f7f5e2bfda9795a36d559e398 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 8 Sep 2024 16:46:16 -0400 Subject: [PATCH 066/294] revert to original cmap plus minimal changes for kk --- src/MOLECULE/fix_cmap.cpp | 40 +++++++++------------------------------ src/MOLECULE/fix_cmap.h | 12 ++++-------- 2 files changed, 13 insertions(+), 39 deletions(-) diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index 25b2cefe27..8a8e4b30aa 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -46,9 +46,6 @@ #include #include -// FIXME: remove after debugging done -#include - using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; @@ -62,7 +59,6 @@ static constexpr double CMAPXMIN = -360.0; static constexpr double CMAPXMIN2 = -180.0; static constexpr double CMAPDX = 15.0; // 360/CMAPDIM - /* ---------------------------------------------------------------------- */ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : @@ -75,8 +71,6 @@ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : { if (narg != 4) error->all(FLERR,"Illegal fix cmap command"); - - std::cerr << "*** FixCMAP constructor\n"; restart_global = 1; restart_peratom = 1; energy_global_flag = energy_peratom_flag = 1; @@ -137,7 +131,6 @@ FixCMAP::~FixCMAP() if (copymode) return; // unregister callbacks to this fix from Atom class - atom->delete_callback(id,Atom::GROW); atom->delete_callback(id,Atom::RESTART); @@ -188,7 +181,7 @@ void FixCMAP::init() // pre-compute the derivatives of the maps - for (i = 0; i < CMAPMAX; i++) + for (i = 0; i < 6; i++) set_map_derivatives(cmapgrid[i],d1cmapgrid[i],d2cmapgrid[i],d12cmapgrid[i]); if (utils::strmatch(update->integrate_style,"^respa")) { @@ -251,7 +244,7 @@ void FixCMAP::pre_neighbor() if (maxcrossterm == 0) { if (nprocs == 1) maxcrossterm = ncmap; else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); - memory->create(crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + memory->create(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); } int nlocal = atom->nlocal; @@ -283,7 +276,7 @@ void FixCMAP::pre_neighbor() i <= atom4 && i <= atom5) { if (ncrosstermlist == maxcrossterm) { maxcrossterm += LISTDELTA; - memory->grow(crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + memory->grow(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); } crosstermlist[ncrosstermlist][0] = atom1; crosstermlist[ncrosstermlist][1] = atom2; @@ -312,7 +305,7 @@ void FixCMAP::pre_reverse(int eflag, int /*vflag*/) void FixCMAP::post_force(int vflag) { - int i1,i2,i3,i4,i5,type,nlist; + int n,i1,i2,i3,i4,i5,type,nlist; int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; int list[5]; // vectors needed to calculate the cross-term dihedral angles @@ -323,7 +316,7 @@ void FixCMAP::post_force(int vflag) double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z; // cross-term dihedral angles double phi,psi,phi1,psi1; - double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[CMAPMAX]; + double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[6]; double gs[4],d1gs[4],d2gs[4],d12gs[4]; double engfraction; // vectors needed for the gradient/force calculation @@ -345,11 +338,11 @@ void FixCMAP::post_force(int vflag) double **f = atom->f; int nlocal = atom->nlocal; - //if( ncrosstermlist>0 ) ecmap = 0.0; + ecmap = 0.0; int eflag = eflag_caller; ev_init(eflag,vflag); - for (int n = 0; n < ncrosstermlist; n++) { + for (n = 0; n < ncrosstermlist; n++) { i1 = crosstermlist[n][0]; i2 = crosstermlist[n][1]; i3 = crosstermlist[n][2]; @@ -486,7 +479,6 @@ void FixCMAP::post_force(int vflag) bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs); - // sum up cmap energy contributions engfraction = 0.2 * E; @@ -496,9 +488,6 @@ void FixCMAP::post_force(int vflag) if (i4 < nlocal) ecmap += engfraction; if (i5 < nlocal) ecmap += engfraction; - //std::cerr << fmt::format("*** i {} {} {} {} {} nlocal {} E {} ecmap {}\n", - //i1,i2,i3,i4,i5,nlocal,E,ecmap); - // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; @@ -607,13 +596,7 @@ void FixCMAP::post_force(int vflag) ((vb54y+vb43y+vb32y)*f5[2]); ev_tally(nlist,list,5.0,E,vcmap); } - - std::cerr << fmt::format("*** n {} ecmap {}\n",n,ecmap); - } - - std::cerr << fmt::format("*** post_force eflag {} eflag_caller {} evflag {} thermo_energy {} ncrosstermlist {} vflag {} ecmap {}\n",eflag,eflag_caller,evflag,thermo_energy,ncrosstermlist,vflag,ecmap); - } /* ---------------------------------------------------------------------- */ @@ -621,7 +604,6 @@ void FixCMAP::post_force(int vflag) void FixCMAP::post_force_respa(int vflag, int ilevel, int /*iloop*/) { if (ilevel == ilevel_respa) post_force(vflag); - std::cerr << fmt::format("*** post_force_respa ecmap {}\n",ecmap); } /* ---------------------------------------------------------------------- */ @@ -629,7 +611,6 @@ void FixCMAP::post_force_respa(int vflag, int ilevel, int /*iloop*/) void FixCMAP::min_post_force(int vflag) { post_force(vflag); - std::cerr << fmt::format("*** min_post_force vflag {} ecmap {}\n",vflag,ecmap); } /* ---------------------------------------------------------------------- @@ -639,10 +620,7 @@ void FixCMAP::min_post_force(int vflag) double FixCMAP::compute_scalar() { double all; - - MPI_Allreduce(&ecmap,&all,1,MPI_DOUBLE,MPI_SUM,world); - utils::logmesg(lmp, "compute_scalar: ecmap {} all {}\n", ecmap, all); return all; } @@ -677,7 +655,7 @@ void FixCMAP::read_grid_map(char *cmapfile) } } - MPI_Bcast(&cmapgrid[0][0][0],CMAPMAX*CMAPDIM*CMAPDIM,MPI_DOUBLE,0,world); + MPI_Bcast(&cmapgrid[0][0][0],6*CMAPDIM*CMAPDIM,MPI_DOUBLE,0,world); } /* ---------------------------------------------------------------------- */ @@ -837,7 +815,7 @@ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo, double FixCMAP::dihedral_angle_atan2(double fx, double fy, double fz, double ax, double ay, double az, double bx, double by, double bz, - double absg) const + double absg) { // calculate the dihedral angle diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h index 1d5d6a3d35..856dc06852 100644 --- a/src/MOLECULE/fix_cmap.h +++ b/src/MOLECULE/fix_cmap.h @@ -21,7 +21,6 @@ FixStyle(cmap,FixCMAP); #define LMP_FIX_CMAP_H #include "fix.h" - namespace LAMMPS_NS { class FixCMAP : public Fix { @@ -65,8 +64,6 @@ class FixCMAP : public Fix { double memory_usage() override; - double ecmap; - protected: int eflag_caller; int ctype, ilevel_respa; @@ -82,6 +79,9 @@ class FixCMAP : public Fix { tagint **crossterm_atom1, **crossterm_atom2, **crossterm_atom3; tagint **crossterm_atom4, **crossterm_atom5; + double E, dEdPhi, dEdPsi; + double ecmap; + double fcmap[4], cij[4][4]; double *g_axis; // CMAP grid points obtained from external file @@ -112,7 +112,7 @@ class FixCMAP : public Fix { // calculate dihedral angles double dihedral_angle_atan2(double, double, double, double, double, double, double, double, - double, double) const; + double, double); // calculate bicubic interpolation coefficient matrix c_ij @@ -121,10 +121,6 @@ class FixCMAP : public Fix { // perform bicubic interpolation at point of interest void bc_interpol(double, double, int, int, double *, double *, double *, double *); - - private: - double E, dEdPhi, dEdPsi, cij[4][4]; - }; } // namespace LAMMPS_NS From 3dcfb3f755b19cb40e97b087cc7c375c5c7bf073 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 8 Sep 2024 16:54:44 -0400 Subject: [PATCH 067/294] replace error->all() with kokkos::abort() --- src/KOKKOS/fix_cmap_kokkos.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e65a7ed1ea..29622b9c97 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -199,12 +199,18 @@ void FixCMAPKokkos::pre_neighbor() int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); - if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) - error->one(FLERR,"CMAP atoms {} {} {} {} {} missing on " + if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) { + + auto error_msg = fmt::format("CMAP atoms {} {} {} {} {} missing on " "proc {} at step {}", d_crossterm_atom1(i,m),d_crossterm_atom2(i,m), d_crossterm_atom3(i,m),d_crossterm_atom4(i,m), d_crossterm_atom5(i,m),me,update->ntimestep); + + Kokkos::abort(error_msg.c_str()); + + } + atom1 = closest_image(i,atom1); atom2 = closest_image(i,atom2); atom3 = closest_image(i,atom3); From 9cfd809b6ee9af9ea52b922307f82f53df0714b1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 8 Sep 2024 17:03:40 -0400 Subject: [PATCH 068/294] feeble attempt --- src/KOKKOS/fix_cmap_kokkos.cpp | 45 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 29622b9c97..35a375e0aa 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -187,9 +187,9 @@ void FixCMAPKokkos::pre_neighbor() atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); - Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int i) { + //ncrosstermlist = 0; - ncrosstermlist = 0; + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncrosstermlist) { for (int m = 0; m < d_num_crossterm(i); m++) { @@ -201,14 +201,14 @@ void FixCMAPKokkos::pre_neighbor() if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) { - auto error_msg = fmt::format("CMAP atoms {} {} {} {} {} missing on " - "proc {} at step {}", - d_crossterm_atom1(i,m),d_crossterm_atom2(i,m), - d_crossterm_atom3(i,m),d_crossterm_atom4(i,m), - d_crossterm_atom5(i,m),me,update->ntimestep); + //auto error_msg = fmt::format("CMAP atoms {} {} {} {} {} missing on proc {} at step {}", + // d_crossterm_atom1(i,m),d_crossterm_atom2(i,m),d_crossterm_atom3(i,m), + // d_crossterm_atom4(i,m),d_crossterm_atom5(i,m),me,update->ntimestep); + + //Kokkos::abort(error_msg.c_str()); + + Kokkos::abort("CMAP atoms missing on proc"); - Kokkos::abort(error_msg.c_str()); - } atom1 = closest_image(i,atom1); @@ -219,22 +219,25 @@ void FixCMAPKokkos::pre_neighbor() if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { - if (ncrosstermlist == maxcrossterm) { - maxcrossterm += LISTDELTA; - memoryKK->grow_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); - d_crosstermlist = k_crosstermlist.template view(); + if (l_ncrosstermlist == maxcrossterm) { + //maxcrossterm += LISTDELTA; + //memoryKK->grow_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + //d_crosstermlist = k_crosstermlist.template view(); + + Kokkos::abort("ncrosstermlist == maxcrossterm"); + } - d_crosstermlist(ncrosstermlist,0) = atom1; - d_crosstermlist(ncrosstermlist,1) = atom2; - d_crosstermlist(ncrosstermlist,2) = atom3; - d_crosstermlist(ncrosstermlist,3) = atom4; - d_crosstermlist(ncrosstermlist,4) = atom5; - d_crosstermlist(ncrosstermlist,5) = d_crossterm_type(i,m); - ncrosstermlist++; + d_crosstermlist(l_ncrosstermlist,0) = atom1; + d_crosstermlist(l_ncrosstermlist,1) = atom2; + d_crosstermlist(l_ncrosstermlist,2) = atom3; + d_crosstermlist(l_ncrosstermlist,3) = atom4; + d_crosstermlist(l_ncrosstermlist,4) = atom5; + d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); + l_ncrosstermlist++; } } - }); + }, ncrosstermlist); } From 7006e19d596bffd01b6a3da2702a46bf0469b20c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 10 Sep 2024 07:57:51 -0400 Subject: [PATCH 069/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 35a375e0aa..0c506d4875 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -191,7 +191,7 @@ void FixCMAPKokkos::pre_neighbor() Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncrosstermlist) { - for (int m = 0; m < d_num_crossterm(i); m++) { + for( int m = 0; m < d_num_crossterm(i); m++) { int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); int atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); @@ -199,34 +199,21 @@ void FixCMAPKokkos::pre_neighbor() int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); - if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) { - - //auto error_msg = fmt::format("CMAP atoms {} {} {} {} {} missing on proc {} at step {}", - // d_crossterm_atom1(i,m),d_crossterm_atom2(i,m),d_crossterm_atom3(i,m), - // d_crossterm_atom4(i,m),d_crossterm_atom5(i,m),me,update->ntimestep); - - //Kokkos::abort(error_msg.c_str()); - + if( atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) Kokkos::abort("CMAP atoms missing on proc"); - } - atom1 = closest_image(i,atom1); atom2 = closest_image(i,atom2); atom3 = closest_image(i,atom3); atom4 = closest_image(i,atom4); atom5 = closest_image(i,atom5); - if (i <= atom1 && i <= atom2 && i <= atom3 && - i <= atom4 && i <= atom5) { - + if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { if (l_ncrosstermlist == maxcrossterm) { //maxcrossterm += LISTDELTA; //memoryKK->grow_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); //d_crosstermlist = k_crosstermlist.template view(); - Kokkos::abort("ncrosstermlist == maxcrossterm"); - } d_crosstermlist(l_ncrosstermlist,0) = atom1; d_crosstermlist(l_ncrosstermlist,1) = atom2; @@ -239,6 +226,8 @@ void FixCMAPKokkos::pre_neighbor() } }, ncrosstermlist); + std::cerr << fmt::format("*** pre_neighbor ncrosstermlist {} ncmap {}\n",ncrosstermlist, ncmap); + } From 5e8ecf9cb4ebc63dcb5d8092b9c7cc940030bf80 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 10 Sep 2024 14:45:16 +0100 Subject: [PATCH 070/294] Rename variables and function for min exponent --- src/REAXFF/fix_qtpie_reaxff.cpp | 14 +++++++------- src/REAXFF/fix_qtpie_reaxff.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 5fd60e51c5..06b5ff1660 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -240,9 +240,9 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) // define a cutoff distance (in atomic units) beyond which overlap integrals are neglected // in calc_chi_eff() - const double expmin = find_min(gauss_exp,ntypes+1); + const double exp_min = find_min_exp(gauss_exp,ntypes+1); const int olap_cut = 10; // overlap integrals are neglected if less than pow(10,-olap_cut) - dist_cutoff = sqrt(2*olap_cut/expmin*log(10.0)); + dist_cutoff = sqrt(2*olap_cut/exp_min*log(10.0)); // read chi, eta and gamma @@ -1230,16 +1230,16 @@ void FixQtpieReaxFF::calc_chi_eff() /* ---------------------------------------------------------------------- */ -double FixQtpieReaxFF::find_min(const double *array, const int array_length) +double FixQtpieReaxFF::find_min_exp(const double *array, const int array_length) { // index of first gaussian orbital exponent is 1 - double smallest = array[1]; + double exp_min = array[1]; for (int i = 2; i < array_length; i++) { - if (array[i] < smallest) - smallest = array[i]; + if (array[i] < exp_min) + exp_min = array[i]; } - return smallest; + return exp_min; } /* ---------------------------------------------------------------------- */ diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 8b98025a81..2a89e6f746 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -130,7 +130,7 @@ class FixQtpieReaxFF : public Fix { void vector_add(double *, double, double *, int); void calc_chi_eff(); - double find_min(const double*, const int); + double find_min_exp(const double*, const int); double distance(const double*, const double*); int matvecs_s, matvecs_t; // Iteration count for each system From 25f33e8721a801206db1bcc85b25ec6c1fbb0544 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 10 Sep 2024 16:20:51 +0100 Subject: [PATCH 071/294] Add water examples featuring fix qtpie/reaxff --- examples/reaxff/water/gauss_exp.txt | 5 ++++ examples/reaxff/water/in.water.qtpie | 29 +++++++++++++++++++++ examples/reaxff/water/in.water.qtpie.field | 30 ++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 examples/reaxff/water/gauss_exp.txt create mode 100644 examples/reaxff/water/in.water.qtpie create mode 100644 examples/reaxff/water/in.water.qtpie.field diff --git a/examples/reaxff/water/gauss_exp.txt b/examples/reaxff/water/gauss_exp.txt new file mode 100644 index 0000000000..4210471e9f --- /dev/null +++ b/examples/reaxff/water/gauss_exp.txt @@ -0,0 +1,5 @@ +# Gaussian orbital exponents (required for fix qtpie/reaxff) taken from Table 2.2 +# of Chen, J. (2009). Theory and applications of fluctuating-charge models. +# The units of the exponents are 1 / (Bohr radius)^2 . +1 0.2240 # O +2 0.5434 # H diff --git a/examples/reaxff/water/in.water.qtpie b/examples/reaxff/water/in.water.qtpie new file mode 100644 index 0000000000..a8f8759444 --- /dev/null +++ b/examples/reaxff/water/in.water.qtpie @@ -0,0 +1,29 @@ +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 diff --git a/examples/reaxff/water/in.water.qtpie.field b/examples/reaxff/water/in.water.qtpie.field new file mode 100644 index 0000000000..e5ac77484f --- /dev/null +++ b/examples/reaxff/water/in.water.qtpie.field @@ -0,0 +1,30 @@ +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 From 6f2c4aaf0b80ef3db943933ddc29291ba3c4f1b5 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Tue, 10 Sep 2024 16:40:19 +0100 Subject: [PATCH 072/294] Remove unused code --- src/REAXFF/fix_qtpie_reaxff.cpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 06b5ff1660..5174cfc112 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -21,7 +21,6 @@ #include "fix_qtpie_reaxff.h" #include "atom.h" -#include "citeme.h" #include "comm.h" #include "domain.h" #include "error.h" @@ -53,19 +52,6 @@ static constexpr double SMALL = 1.0e-14; static constexpr double QSUMSMALL = 0.00001; static constexpr double ANGSTROM_TO_BOHRRADIUS = 1.8897261259; -static const char cite_fix_qtpie_reaxff[] = - "fix qtpie/reaxff command: doi\n\n" - "@article{,\n" - "title={},\n" - "author={},\n" - "journal={},\n" - "volume={},\n" - "number={},\n" - "pages={},\n" - "year={},\n" - "publisher={}\n" - "}\n\n"; - /* ---------------------------------------------------------------------- */ FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : @@ -173,8 +159,6 @@ FixQtpieReaxFF::~FixQtpieReaxFF() void FixQtpieReaxFF::post_constructor() { - if (lmp->citeme) lmp->citeme->add(cite_fix_qtpie_reaxff); - grow_arrays(atom->nmax); for (int i = 0; i < atom->nmax; i++) for (int j = 0; j < nprev; ++j) @@ -471,16 +455,6 @@ void FixQtpieReaxFF::init() if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM) error->all(FLERR,"Atom-style external electric field requires atom-style " "potential variable when used with fix {}", style); - // if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) || - // ((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) || - // ((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic)) - // error->all(FLERR,"Must not have electric field component in direction of periodic " - // "boundary when using charge equilibration with ReaxFF."); - // if (((fabs(efield->ex) > SMALL) && domain->xperiodic) || - // ((fabs(efield->ey) > SMALL) && domain->yperiodic) || - // ((fabs(efield->ez) > SMALL) && domain->zperiodic)) - // error->all(FLERR,"Must not have electric field component in direction of periodic " - // "boundary when using charge equilibration with ReaxFF."); } // we need a half neighbor list w/ Newton off From d67d23738667519acfaf6a28ddce5c8ed2dc19b5 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 12 Sep 2024 19:41:12 +0100 Subject: [PATCH 073/294] Update author contributions --- src/REAXFF/fix_qtpie_reaxff.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 5174cfc112..548bce8cfb 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -14,8 +14,10 @@ /* ---------------------------------------------------------------------- Contributing authors: - Navraj S Lalli (Imperial College London) - Efstratios M Kritikos (California Institute of Technology) + Efstratios M Kritikos, California Institute of Technology + (Implemented original version in LAMMMPS Aug 2019) + Navraj S Lalli, Imperial College London + (Reimplemented QTPIE as a new fix in LAMMPS Aug 2024 and extended functionality) ------------------------------------------------------------------------- */ #include "fix_qtpie_reaxff.h" From 62f82a7fe12cd7ae99bdf9e7eb40a4359a5a4af1 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 15:46:27 +0100 Subject: [PATCH 074/294] Remove additional fix name --- src/REAXFF/fix_qtpie_reaxff.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 2a89e6f746..2d82ad197c 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -13,7 +13,6 @@ #ifdef FIX_CLASS // clang-format off -FixStyle(qtpie/reax,FixQtpieReaxFF); FixStyle(qtpie/reaxff,FixQtpieReaxFF); // clang-format on #else From d56f43b4e616e0b06d64abb38fc9f5d2e9856b6b Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 15:50:44 +0100 Subject: [PATCH 075/294] Remove unnecessary tests --- src/REAXFF/fix_qtpie_reaxff.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 548bce8cfb..9701704972 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -1196,11 +1196,6 @@ void FixQtpieReaxFF::calc_chi_eff() } chi_eff[i] = sum_n / sum_d; - - if (fabs(sum_n) < SMALL && fabs(sum_d) < SMALL) - error->all(FLERR,"Unexpected value: fabs(sum_d) is {}", fabs(sum_d)); - if (fabs(sum_d) < 1.0) - error->all(FLERR,"Unexpected value: fabs(sum_d) is {}", fabs(sum_d)); } } From 8ec010f8ca695b2c7d8a650c982e2037ca8f849d Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 15:54:12 +0100 Subject: [PATCH 076/294] Remove unused header file --- src/REAXFF/fix_qtpie_reaxff.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 9701704972..0b2ab30294 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -44,7 +44,6 @@ #include #include #include -#include using namespace LAMMPS_NS; using namespace FixConst; From bd07f1e8e04eac8d93e890bd0c1fc2471120f793 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 15:56:16 +0100 Subject: [PATCH 077/294] Change qeq to qtpie --- src/REAXFF/fix_qtpie_reaxff.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 0b2ab30294..c26c77c882 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -382,15 +382,15 @@ void FixQtpieReaxFF::allocate_matrix() } bigint m_cap_big = (bigint)MAX(m * safezone, mincap * REAX_MIN_NBRS); if (m_cap_big > MAXSMALLINT) - error->one(FLERR,"Too many neighbors in fix qeq/reaxff"); + error->one(FLERR,"Too many neighbors in fix {}",style); m_cap = m_cap_big; H.n = n_cap; H.m = m_cap; - memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); - memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); - memory->create(H.jlist,m_cap,"qeq:H.jlist"); - memory->create(H.val,m_cap,"qeq:H.val"); + memory->create(H.firstnbr,n_cap,"qtpie:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qtpie:H.numnbrs"); + memory->create(H.jlist,m_cap,"qtpie:H.jlist"); + memory->create(H.val,m_cap,"qtpie:H.val"); } /* ---------------------------------------------------------------------- */ From af6efcc5145d377aa964575b2be7a045a8c4749f Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 16:43:13 +0100 Subject: [PATCH 078/294] Add fix qtpie/reaxff documentation --- doc/src/Commands_fix.rst | 1 + doc/src/fix.rst | 1 + doc/src/fix_qtpie_reaxff.rst | 178 ++++++++++++++++++++ doc/utils/sphinx-config/false_positives.txt | 4 + 4 files changed, 184 insertions(+) create mode 100644 doc/src/fix_qtpie_reaxff.rst diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index d9febcc289..6b75d6779c 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -186,6 +186,7 @@ OPT. * :doc:`qeq/slater ` * :doc:`qmmm ` * :doc:`qtb ` + * :doc:`qtpie/reaxff ` * :doc:`rattle ` * :doc:`reaxff/bonds (k) ` * :doc:`reaxff/species (k) ` diff --git a/doc/src/fix.rst b/doc/src/fix.rst index 4919c226fd..b17906d414 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -365,6 +365,7 @@ accelerated styles exist. * :doc:`qeq/slater ` - charge equilibration via Slater method * :doc:`qmmm ` - functionality to enable a quantum mechanics/molecular mechanics coupling * :doc:`qtb ` - implement quantum thermal bath scheme +* :doc:`qtpie/reaxff ` - apply QTPIE charge equilibration * :doc:`rattle ` - RATTLE constraints on bonds and/or angles * :doc:`reaxff/bonds ` - write out ReaxFF bond information * :doc:`reaxff/species ` - write out ReaxFF molecule information diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst new file mode 100644 index 0000000000..eb5bb4e982 --- /dev/null +++ b/doc/src/fix_qtpie_reaxff.rst @@ -0,0 +1,178 @@ +.. index:: fix qtpie/reaxff + +fix qtpie/reaxff command +======================== + +Syntax +"""""" + +.. code-block:: LAMMPS + + fix ID group-ID qtpie/reaxff Nevery cutlo cuthi tolerance params gfile args + +* ID, group-ID are documented in :doc:`fix ` command +* qtpie/reaxff = style name of this fix command +* Nevery = perform QTPIE every this many steps +* cutlo,cuthi = lo and hi cutoff for Taper radius +* tolerance = precision to which charges will be equilibrated +* params = reaxff or a filename +* gfile = the name of a file containing Gaussian orbital exponents +* one or more keywords or keyword/value pairs may be appended + + .. parsed-literal:: + + keyword = *maxiter* + *maxiter* N = limit the number of iterations to *N* + +Examples +"""""""" + +.. code-block:: LAMMPS + + fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie + fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie maxiter 500 + +Description +""""""""""" + +The QTPIE charge equilibration method is an extension of the QEq charge +equilibration method. With QTPIE, the partial charges on individual atoms +are computed by minimizing the electrostatic energy of the system in the +same way as the QEq method but where the Mulliken electronegativity, +:math:`\chi_i`, of each atom in the QEq charge equilibration scheme +:ref:`(Rappe and Goddard) ` is replaced with an effective +electronegativity given by :ref:`(Chen) ` + +.. math:: + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_j - \phi_i) S_{ij}} + {\sum_{m=1}^{N}S_{im}}, + +which acts to penalize long-range charge transfer seen with the QEq charge +equilibration scheme. In this equation, :math:`N` is the number of atoms in +the system, :math:`S_{ij}` is the overlap integral between atom :math:`i` +and atom :math:`j`, and :math:`\phi_i` and :math:`\phi_j` are the electric +potentials at the position of atom :math:`i` and :math:`j` due to +an external electric field, respectively. + +This fix is typically used in conjunction with the ReaxFF force +field model as implemented in the :doc:`pair_style reaxff ` +command, but it can be used with any potential in LAMMPS, so long as it +defines and uses charges on each atom. For more technical details about the +charge equilibration performed by `fix qtpie/reaxff`, which is the same as in +:doc:`fix qeq/reaxff ` except for the use of +:math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) `. +To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in +:ref:`(Aktulga) ` with :math:`\chi_{\mathrm{eff},k}`. + +This fix requires the Mulliken electronegativity, :math:`\chi`, in eV, the +self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb +constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* setting above +is the word "reaxff", then these are extracted from the +:doc:`pair_style reaxff ` command and the ReaxFF force field +file it reads in. If a file name is specified for *params*, then the +parameters are taken from the specified file and the file must contain +one line for each atom type. The latter form must be used when performing +QTPIE with a non-ReaxFF potential. Each line should be formatted as follows, +ensuring that the parameters are given in units of eV, eV, and :math:`\AA^{-1}`, +respectively: + +.. parsed-literal:: + + itype chi eta gamma + +where *itype* is the atom type from 1 to Ntypes. Note that eta is +defined here as twice the eta value in the ReaxFF file. + +The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}` +are computed by using normalized 1s Gaussian type orbitals. The Gaussian +orbital exponents, :math:`\alpha`, that are needed to compute the overlap +integrals are taken from the file given by *gfile*. +This file must contain one line for each atom type and provide the Gaussian +orbital exponent for each atom type in units of inverse square Bohr radius. +Each line should be formatted as follows: + +.. parsed-literal:: + + itype alpha + +Empty lines or any text following the pound sign (#) are ignored. An example +*gfile* for a system with two atom types is + +.. parsed-literal:: + + # An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009). + # Theory and applications of fluctuating-charge models. + # The units of the exponents are 1 / (Bohr radius)^2 . + 1 0.2240 # O + 2 0.5434 # H + +The optional *maxiter* keyword allows changing the max number +of iterations in the linear solver. The default value is 200. + +.. note:: + + In order to solve the self-consistent equations for electronegativity + equalization, LAMMPS imposes the additional constraint that all the + charges in the fix group must add up to zero. The initial charge + assignments should also satisfy this constraint. LAMMPS will print a + warning if that is not the case. + +Restart, fix_modify, output, run start/stop, minimize info +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +No information about this fix is written to :doc:`binary restart files +`. This fix computes a global scalar (the number of +iterations) for access by various :doc:`output commands `. +No parameter of this fix can be used with the *start/stop* keywords of +the :doc:`run ` command. + +This fix is invoked during :doc:`energy minimization `. + +Restrictions +"""""""""""" + +This fix is part of the REAXFF package. It is only enabled if +LAMMPS was built with that package. See the :doc:`Build package +` page for more info. + +This fix does not correctly handle interactions involving multiple +periodic images of the same atom. Hence, it should not be used for +periodic cell dimensions less than 10 Angstroms. + +This fix may be used in combination with :doc:`fix efield ` +and will apply the external electric field during charge equilibration, +but there may be only one fix efield instance used and the electric field +must be applied to all atoms in the system. Consequently, `fix efield` must +be used with *group-ID* all and must not be used with the keyword *region*. +Equal-style variables can be used for electric field vector +components without any further settings. Atom-style variables can be used +for spatially-varying electric field vector components, but the resulting +electric potential must be specified as an atom-style variable using +the *potential* keyword for `fix efield`. + +Related commands +"""""""""""""""" + +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff ` + +Default +""""""" + +maxiter 200 + +---------- + +.. _Rappe3: + +**(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95, +3358-3363 (1991). + +.. _qtpie-Chen: + +**(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models. +University of Illinois at Urbana-Champaign, 2009. + +.. _qeq-Aktulga2: + +**(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38, +245-259 (2012). diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index cfbddbe5f6..65c1031fcf 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -1303,6 +1303,7 @@ gettimeofday geturl gewald Gezelter +gfile Gflop gfortran ghostneigh @@ -1709,6 +1710,7 @@ Jewett jgissing ji Jiang +Jiahao Jiao jik JIK @@ -2363,6 +2365,7 @@ mui Mukherjee Mulders Müller +Mulliken mult multi multibody @@ -3069,6 +3072,7 @@ qqr qqrd Qsb qtb +qtpie quadratically quadrupolar quadrupole From 3f232caf9b7628abd6feffa768a0c88abf53c1c8 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 17:13:59 +0100 Subject: [PATCH 079/294] Fix whitespace --- doc/src/fix_qtpie_reaxff.rst | 56 ++++++++++++++++----------------- src/REAXFF/fix_qtpie_reaxff.cpp | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index eb5bb4e982..5900c3c6e7 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -37,41 +37,41 @@ Description The QTPIE charge equilibration method is an extension of the QEq charge equilibration method. With QTPIE, the partial charges on individual atoms -are computed by minimizing the electrostatic energy of the system in the -same way as the QEq method but where the Mulliken electronegativity, -:math:`\chi_i`, of each atom in the QEq charge equilibration scheme -:ref:`(Rappe and Goddard) ` is replaced with an effective +are computed by minimizing the electrostatic energy of the system in the +same way as the QEq method but where the Mulliken electronegativity, +:math:`\chi_i`, of each atom in the QEq charge equilibration scheme +:ref:`(Rappe and Goddard) ` is replaced with an effective electronegativity given by :ref:`(Chen) ` .. math:: \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_j - \phi_i) S_{ij}} {\sum_{m=1}^{N}S_{im}}, -which acts to penalize long-range charge transfer seen with the QEq charge +which acts to penalize long-range charge transfer seen with the QEq charge equilibration scheme. In this equation, :math:`N` is the number of atoms in -the system, :math:`S_{ij}` is the overlap integral between atom :math:`i` -and atom :math:`j`, and :math:`\phi_i` and :math:`\phi_j` are the electric +the system, :math:`S_{ij}` is the overlap integral between atom :math:`i` +and atom :math:`j`, and :math:`\phi_i` and :math:`\phi_j` are the electric potentials at the position of atom :math:`i` and :math:`j` due to -an external electric field, respectively. +an external electric field, respectively. This fix is typically used in conjunction with the ReaxFF force field model as implemented in the :doc:`pair_style reaxff ` command, but it can be used with any potential in LAMMPS, so long as it -defines and uses charges on each atom. For more technical details about the +defines and uses charges on each atom. For more technical details about the charge equilibration performed by `fix qtpie/reaxff`, which is the same as in -:doc:`fix qeq/reaxff ` except for the use of +:doc:`fix qeq/reaxff ` except for the use of :math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) `. -To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in +To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in :ref:`(Aktulga) ` with :math:`\chi_{\mathrm{eff},k}`. This fix requires the Mulliken electronegativity, :math:`\chi`, in eV, the self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb -constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* setting above -is the word "reaxff", then these are extracted from the -:doc:`pair_style reaxff ` command and the ReaxFF force field -file it reads in. If a file name is specified for *params*, then the -parameters are taken from the specified file and the file must contain -one line for each atom type. The latter form must be used when performing +constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* setting above +is the word "reaxff", then these are extracted from the +:doc:`pair_style reaxff ` command and the ReaxFF force field +file it reads in. If a file name is specified for *params*, then the +parameters are taken from the specified file and the file must contain +one line for each atom type. The latter form must be used when performing QTPIE with a non-ReaxFF potential. Each line should be formatted as follows, ensuring that the parameters are given in units of eV, eV, and :math:`\AA^{-1}`, respectively: @@ -80,15 +80,15 @@ respectively: itype chi eta gamma -where *itype* is the atom type from 1 to Ntypes. Note that eta is +where *itype* is the atom type from 1 to Ntypes. Note that eta is defined here as twice the eta value in the ReaxFF file. The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}` are computed by using normalized 1s Gaussian type orbitals. The Gaussian -orbital exponents, :math:`\alpha`, that are needed to compute the overlap -integrals are taken from the file given by *gfile*. +orbital exponents, :math:`\alpha`, that are needed to compute the overlap +integrals are taken from the file given by *gfile*. This file must contain one line for each atom type and provide the Gaussian -orbital exponent for each atom type in units of inverse square Bohr radius. +orbital exponent for each atom type in units of inverse square Bohr radius. Each line should be formatted as follows: .. parsed-literal:: @@ -100,7 +100,7 @@ Empty lines or any text following the pound sign (#) are ignored. An example .. parsed-literal:: - # An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009). + # An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009). # Theory and applications of fluctuating-charge models. # The units of the exponents are 1 / (Bohr radius)^2 . 1 0.2240 # O @@ -142,12 +142,12 @@ periodic cell dimensions less than 10 Angstroms. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, but there may be only one fix efield instance used and the electric field -must be applied to all atoms in the system. Consequently, `fix efield` must +must be applied to all atoms in the system. Consequently, `fix efield` must be used with *group-ID* all and must not be used with the keyword *region*. -Equal-style variables can be used for electric field vector -components without any further settings. Atom-style variables can be used -for spatially-varying electric field vector components, but the resulting -electric potential must be specified as an atom-style variable using +Equal-style variables can be used for electric field vector +components without any further settings. Atom-style variables can be used +for spatially-varying electric field vector components, but the resulting +electric potential must be specified as an atom-style variable using the *potential* keyword for `fix efield`. Related commands @@ -169,7 +169,7 @@ maxiter 200 .. _qtpie-Chen: -**(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models. +**(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models. University of Illinois at Urbana-Champaign, 2009. .. _qeq-Aktulga2: diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index c26c77c882..b08c6808ac 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -14,7 +14,7 @@ /* ---------------------------------------------------------------------- Contributing authors: - Efstratios M Kritikos, California Institute of Technology + Efstratios M Kritikos, California Institute of Technology (Implemented original version in LAMMMPS Aug 2019) Navraj S Lalli, Imperial College London (Reimplemented QTPIE as a new fix in LAMMPS Aug 2024 and extended functionality) From 96c776c51f6547a66431f7986213dc457959b9fb Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 13 Sep 2024 18:08:14 +0100 Subject: [PATCH 080/294] Add log files for qtpie/reaxff examples --- ...log.29Aug24.reaxff.water-qtpie-field.g++.1 | 127 ++++++++++++++++++ ...log.29Aug24.reaxff.water-qtpie-field.g++.4 | 127 ++++++++++++++++++ .../log.29Aug24.reaxff.water-qtpie.g++.1 | 126 +++++++++++++++++ .../log.29Aug24.reaxff.water-qtpie.g++.4 | 126 +++++++++++++++++ 4 files changed, 506 insertions(+) create mode 100644 examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 create mode 100644 examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 create mode 100644 examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 create mode 100644 examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 new file mode 100644 index 0000000000..33221ff080 --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.1 @@ -0,0 +1,127 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.056 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 10137.041 1 29915.273 + 10 296.09128 3564.7969 1 29915.273 + 20 293.04308 10299.201 1 29915.273 +Loop time of 10.7863 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.080 ns/day, 299.620 hours/ns, 1.854 timesteps/s, 5.563 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7275 | 4.7275 | 4.7275 | 0.0 | 43.83 +Neigh | 0.17533 | 0.17533 | 0.17533 | 0.0 | 1.63 +Comm | 0.0017376 | 0.0017376 | 0.0017376 | 0.0 | 0.02 +Output | 8.2065e-05 | 8.2065e-05 | 8.2065e-05 | 0.0 | 0.00 +Modify | 5.8812 | 5.8812 | 5.8812 | 0.0 | 54.52 +Other | | 0.0005226 | | | 0.00 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11077 ave 11077 max 11077 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971775 ave 971775 max 971775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971775 +Ave neighs/atom = 323.925 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:12 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 new file mode 100644 index 0000000000..07a348604e --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie-field.g++.4 @@ -0,0 +1,127 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 10137.041 1 29915.273 + 10 296.09128 3564.7969 1 29915.273 + 20 293.04308 10299.201 1 29915.273 +Loop time of 3.14492 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.275 ns/day, 87.359 hours/ns, 6.359 timesteps/s, 19.078 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.6557 | 1.6847 | 1.7281 | 2.1 | 53.57 +Neigh | 0.086503 | 0.086968 | 0.087627 | 0.2 | 2.77 +Comm | 0.003309 | 0.046699 | 0.075729 | 12.4 | 1.48 +Output | 5.0156e-05 | 5.483e-05 | 6.8111e-05 | 0.0 | 0.00 +Modify | 1.3254 | 1.3261 | 1.3266 | 0.0 | 42.16 +Other | | 0.0004552 | | | 0.01 + +Nlocal: 750 ave 760 max 735 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 6230.5 ave 6253 max 6193 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Neighs: 276995 ave 280886 max 271360 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 1107981 +Ave neighs/atom = 369.327 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:03 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 new file mode 100644 index 0000000000..1187a755ee --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.1 @@ -0,0 +1,126 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.055 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 10138.375 1 29915.273 + 10 295.97879 3575.2769 1 29915.273 + 20 292.76583 10309.128 1 29915.273 +Loop time of 10.8138 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.080 ns/day, 300.383 hours/ns, 1.849 timesteps/s, 5.548 katom-step/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7177 | 4.7177 | 4.7177 | 0.0 | 43.63 +Neigh | 0.17607 | 0.17607 | 0.17607 | 0.0 | 1.63 +Comm | 0.0017295 | 0.0017295 | 0.0017295 | 0.0 | 0.02 +Output | 8.5431e-05 | 8.5431e-05 | 8.5431e-05 | 0.0 | 0.00 +Modify | 5.9177 | 5.9177 | 5.9177 | 0.0 | 54.72 +Other | | 0.0004911 | | | 0.00 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11077 ave 11077 max 11077 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971830 ave 971830 max 971830 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971830 +Ave neighs/atom = 323.94333 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:12 diff --git a/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 new file mode 100644 index 0000000000..372156b6a2 --- /dev/null +++ b/examples/reaxff/water/log.29Aug24.reaxff.water-qtpie.g++.4 @@ -0,0 +1,126 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-76-g3f232caf9b) + using 1 OpenMP thread(s) per MPI task +# QTPIE Water + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:294) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qtpie/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 10138.375 1 29915.273 + 10 295.97879 3575.2769 1 29915.273 + 20 292.76583 10309.128 1 29915.273 +Loop time of 3.13598 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.276 ns/day, 87.111 hours/ns, 6.378 timesteps/s, 19.133 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.6622 | 1.695 | 1.7252 | 2.2 | 54.05 +Neigh | 0.086543 | 0.087117 | 0.087848 | 0.2 | 2.78 +Comm | 0.0048192 | 0.035002 | 0.067754 | 15.4 | 1.12 +Output | 4.8033e-05 | 5.3375e-05 | 6.6893e-05 | 0.0 | 0.00 +Modify | 1.3176 | 1.3183 | 1.3189 | 0.0 | 42.04 +Other | | 0.0004753 | | | 0.02 + +Nlocal: 750 ave 760 max 735 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 6229.5 ave 6253 max 6191 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Neighs: 277011 ave 280900 max 271380 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 1108044 +Ave neighs/atom = 369.348 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:03 From 861be216a14a1dbf50ddd83814c1abb6ada6676d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 13 Sep 2024 14:38:12 -0400 Subject: [PATCH 081/294] parallel_scan --- src/KOKKOS/fix_cmap_kokkos.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 0c506d4875..5c3f2091af 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -165,8 +165,12 @@ void FixCMAPKokkos::pre_neighbor() // then list will grow by LISTDELTA chunks if (maxcrossterm == 0) { - if (nprocs == 1) maxcrossterm = ncmap; - else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); + + // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code + //if (nprocs == 1) maxcrossterm = ncmap; + //else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); + maxcrossterm = ncmap; + memoryKK->create_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); d_crosstermlist = k_crosstermlist.template view(); } @@ -187,9 +191,7 @@ void FixCMAPKokkos::pre_neighbor() atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); - //ncrosstermlist = 0; - - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncrosstermlist) { + Kokkos::parallel_scan(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncrosstermlist, bool is_final) { for( int m = 0; m < d_num_crossterm(i); m++) { @@ -209,18 +211,16 @@ void FixCMAPKokkos::pre_neighbor() atom5 = closest_image(i,atom5); if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { - if (l_ncrosstermlist == maxcrossterm) { - //maxcrossterm += LISTDELTA; - //memoryKK->grow_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); - //d_crosstermlist = k_crosstermlist.template view(); - Kokkos::abort("ncrosstermlist == maxcrossterm"); - } + if (l_ncrosstermlist > maxcrossterm) Kokkos::abort("l_ncrosstermlist > maxcrossterm"); d_crosstermlist(l_ncrosstermlist,0) = atom1; d_crosstermlist(l_ncrosstermlist,1) = atom2; d_crosstermlist(l_ncrosstermlist,2) = atom3; d_crosstermlist(l_ncrosstermlist,3) = atom4; d_crosstermlist(l_ncrosstermlist,4) = atom5; d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); + + Kokkos::printf(" *** l_ncrosstermlist %i d_crosstermlist %i %i %i %i %i %i\n", l_ncrosstermlist, d_crossterm_atom1(i,m), d_crossterm_atom2(i,m), d_crossterm_atom3(i,m), d_crossterm_atom4(i,m), d_crossterm_atom5(i,m), d_crosstermlist(l_ncrosstermlist,5)); + l_ncrosstermlist++; } } From 2556788ed2186d2e1cf9369014b91b4e9640b4d2 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 15:02:55 -0400 Subject: [PATCH 082/294] thanks @crtrott --- src/KOKKOS/fix_cmap_kokkos.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 5c3f2091af..8c11bdb8f8 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -212,15 +212,14 @@ void FixCMAPKokkos::pre_neighbor() if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { if (l_ncrosstermlist > maxcrossterm) Kokkos::abort("l_ncrosstermlist > maxcrossterm"); - d_crosstermlist(l_ncrosstermlist,0) = atom1; - d_crosstermlist(l_ncrosstermlist,1) = atom2; - d_crosstermlist(l_ncrosstermlist,2) = atom3; - d_crosstermlist(l_ncrosstermlist,3) = atom4; - d_crosstermlist(l_ncrosstermlist,4) = atom5; - d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); - - Kokkos::printf(" *** l_ncrosstermlist %i d_crosstermlist %i %i %i %i %i %i\n", l_ncrosstermlist, d_crossterm_atom1(i,m), d_crossterm_atom2(i,m), d_crossterm_atom3(i,m), d_crossterm_atom4(i,m), d_crossterm_atom5(i,m), d_crosstermlist(l_ncrosstermlist,5)); - + if(is_final) { + d_crosstermlist(l_ncrosstermlist,0) = atom1; + d_crosstermlist(l_ncrosstermlist,1) = atom2; + d_crosstermlist(l_ncrosstermlist,2) = atom3; + d_crosstermlist(l_ncrosstermlist,3) = atom4; + d_crosstermlist(l_ncrosstermlist,4) = atom5; + d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); + } l_ncrosstermlist++; } } From 8dfc6207c2236b17c6e2d0579fc2c25a378dec31 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 16:04:15 -0400 Subject: [PATCH 083/294] fix warnings --- src/KOKKOS/fix_cmap_kokkos.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 8c11bdb8f8..e7b02ed68e 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -35,9 +35,6 @@ using namespace LAMMPS_NS; using namespace MathConst; -static constexpr int LISTDELTA = 10000; -static constexpr double LB_FACTOR = 1.5; - static constexpr int CMAPMAX = 6; // max # of CMAP terms stored by one atom static constexpr int CMAPDIM = 24; // grid map dimension is 24 x 24 static constexpr double CMAPXMIN2 = -180.0; @@ -157,18 +154,10 @@ void FixCMAPKokkos::init() template void FixCMAPKokkos::pre_neighbor() { - const int me = comm->me; - const int nprocs = comm->nprocs; - - // guesstimate initial length of local crossterm list - // if ncmap was not set (due to read_restart, no read_data), - // then list will grow by LISTDELTA chunks if (maxcrossterm == 0) { // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code - //if (nprocs == 1) maxcrossterm = ncmap; - //else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); maxcrossterm = ncmap; memoryKK->create_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); From 63483b3fc212876ee4c6e67507acbe2b7c5e96ab Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 16:09:56 -0400 Subject: [PATCH 084/294] KOKKOS_CLASS_LAMBDA --- src/KOKKOS/fix_cmap_kokkos.cpp | 8 ++++++-- src/KOKKOS/region_sphere_kokkos.cpp | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e7b02ed68e..7c4bd6972b 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio@gmail.com) + Contributing author: Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ #include "fix_cmap_kokkos.h" @@ -180,7 +180,11 @@ void FixCMAPKokkos::pre_neighbor() atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); - Kokkos::parallel_scan(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncrosstermlist, bool is_final) { + // FIXME: capture lambda reference to KOKKOS_INLINE_FUNCTION map_kokkos() + // workaround: KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA + // https://github.com/kokkos/kokkos/issues/695 + + Kokkos::parallel_scan(nlocal, KOKKOS_CLASS_LAMBDA(const int i, int &l_ncrosstermlist, bool is_final) { for( int m = 0; m < d_num_crossterm(i); m++) { diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 9b747b92df..13dd00957a 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail.com) + Contributing author: Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ #include "region_sphere_kokkos.h" @@ -64,7 +64,11 @@ void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_i copymode = 1; - Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA( const int &i ) { + // FIXME: capture lambda reference to KOKKOS_INLINE_FUNCTION match() + // workaround: KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA + // https://github.com/kokkos/kokkos/issues/695 + + Kokkos::parallel_for(atom->nlocal, KOKKOS_CLASS_LAMBDA( const int &i ) { if (d_mask[i] & l_groupbit) { double x_tmp = d_x(i,0); double y_tmp = d_x(i,1); From 813fd880cd09f7bc3cb1d255bbc15dadb21267c9 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 17:09:21 -0400 Subject: [PATCH 085/294] tagged operators --- src/KOKKOS/fix_cmap_kokkos.cpp | 80 +++++++++++++++++----------------- src/KOKKOS/fix_cmap_kokkos.h | 8 +++- 2 files changed, 48 insertions(+), 40 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 7c4bd6972b..96ac199567 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -180,48 +180,50 @@ void FixCMAPKokkos::pre_neighbor() atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); - // FIXME: capture lambda reference to KOKKOS_INLINE_FUNCTION map_kokkos() - // workaround: KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA - // https://github.com/kokkos/kokkos/issues/695 - - Kokkos::parallel_scan(nlocal, KOKKOS_CLASS_LAMBDA(const int i, int &l_ncrosstermlist, bool is_final) { - - for( int m = 0; m < d_num_crossterm(i); m++) { - - int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); - int atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); - int atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); - int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); - int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); - - if( atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) - Kokkos::abort("CMAP atoms missing on proc"); - - atom1 = closest_image(i,atom1); - atom2 = closest_image(i,atom2); - atom3 = closest_image(i,atom3); - atom4 = closest_image(i,atom4); - atom5 = closest_image(i,atom5); - - if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { - if (l_ncrosstermlist > maxcrossterm) Kokkos::abort("l_ncrosstermlist > maxcrossterm"); - if(is_final) { - d_crosstermlist(l_ncrosstermlist,0) = atom1; - d_crosstermlist(l_ncrosstermlist,1) = atom2; - d_crosstermlist(l_ncrosstermlist,2) = atom3; - d_crosstermlist(l_ncrosstermlist,3) = atom4; - d_crosstermlist(l_ncrosstermlist,4) = atom5; - d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); - } - l_ncrosstermlist++; - } - } - }, ncrosstermlist); + copymode = 1; + Kokkos::parallel_scan(Kokkos::RangePolicy(0,nlocal),*this,ncrosstermlist); + copymode = 0; std::cerr << fmt::format("*** pre_neighbor ncrosstermlist {} ncmap {}\n",ncrosstermlist, ncmap); } +template +KOKKOS_INLINE_FUNCTION +void FixCMAPKokkos::operator()(TagFixCmapPreNeighbor, const int i, int &l_ncrosstermlist, const bool is_final ) const +{ + + for( int m = 0; m < d_num_crossterm(i); m++) { + + int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); + int atom2 = AtomKokkos::map_kokkos(d_crossterm_atom2(i,m),map_style,k_map_array,k_map_hash); + int atom3 = AtomKokkos::map_kokkos(d_crossterm_atom3(i,m),map_style,k_map_array,k_map_hash); + int atom4 = AtomKokkos::map_kokkos(d_crossterm_atom4(i,m),map_style,k_map_array,k_map_hash); + int atom5 = AtomKokkos::map_kokkos(d_crossterm_atom5(i,m),map_style,k_map_array,k_map_hash); + + if( atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) + Kokkos::abort("CMAP atoms missing on proc"); + + atom1 = closest_image(i,atom1); + atom2 = closest_image(i,atom2); + atom3 = closest_image(i,atom3); + atom4 = closest_image(i,atom4); + atom5 = closest_image(i,atom5); + + if( i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { + if (l_ncrosstermlist > maxcrossterm) Kokkos::abort("l_ncrosstermlist > maxcrossterm"); + if(is_final) { + d_crosstermlist(l_ncrosstermlist,0) = atom1; + d_crosstermlist(l_ncrosstermlist,1) = atom2; + d_crosstermlist(l_ncrosstermlist,2) = atom3; + d_crosstermlist(l_ncrosstermlist,3) = atom4; + d_crosstermlist(l_ncrosstermlist,4) = atom5; + d_crosstermlist(l_ncrosstermlist,5) = d_crossterm_type(i,m); + } + l_ncrosstermlist++; + } + } +} /* ---------------------------------------------------------------------- compute CMAP terms as if newton_bond = OFF, even if actually ON @@ -241,7 +243,7 @@ void FixCMAPKokkos::post_force(int vflag) ev_init(eflag,vflag); copymode = 1; - Kokkos::parallel_for(ncrosstermlist, *this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,ncrosstermlist), *this); copymode = 0; atomKK->modified(execution_space,F_MASK); @@ -254,7 +256,7 @@ void FixCMAPKokkos::post_force(int vflag) template KOKKOS_INLINE_FUNCTION -void FixCMAPKokkos::operator()(const int n) const +void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) const { int i1,i2,i3,i4,i5,type; diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index fcf594bd31..84c447e0c8 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -28,6 +28,9 @@ FixStyle(cmap/kk/host,FixCMAPKokkos); namespace LAMMPS_NS { +struct TagFixCmapPreNeighbor{}; +struct TagFixCmapPostForce{}; + template class FixCMAPKokkos : public FixCMAP { typedef ArrayTypes AT; @@ -41,7 +44,10 @@ class FixCMAPKokkos : public FixCMAP { void post_force(int) override; KOKKOS_INLINE_FUNCTION - void operator()(const int) const; + void operator()(TagFixCmapPreNeighbor, const int, int&, const bool) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixCmapPostForce, const int) const; void grow_arrays(int) override; void copy_arrays(int, int, int) override; From f133b8a4a05126b783efb0afbdf159d47d3279ee Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 17:14:57 -0400 Subject: [PATCH 086/294] cleanup --- src/KOKKOS/fix_cmap_kokkos.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 96ac199567..492b77dd14 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -30,8 +30,6 @@ #include "update.h" #include "variable.h" -#include - using namespace LAMMPS_NS; using namespace MathConst; @@ -184,8 +182,6 @@ void FixCMAPKokkos::pre_neighbor() Kokkos::parallel_scan(Kokkos::RangePolicy(0,nlocal),*this,ncrosstermlist); copymode = 0; - std::cerr << fmt::format("*** pre_neighbor ncrosstermlist {} ncmap {}\n",ncrosstermlist, ncmap); - } template @@ -247,8 +243,6 @@ void FixCMAPKokkos::post_force(int vflag) copymode = 0; atomKK->modified(execution_space,F_MASK); - std::cerr << fmt::format("*** post_force ncrosstermlist {} vflag {} ecmap {}\n",ncrosstermlist,vflag,ecmap); - } From 02b6fa088d2ac1ccdcf636b70b16759f9153201b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 17:22:11 -0400 Subject: [PATCH 087/294] explicit warning(s) which fix sort_device=0 --- src/KOKKOS/atom_kokkos.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index e2ae9ffb19..4e95127120 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -208,6 +208,10 @@ void AtomKokkos::sort() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->sort_device) { flag = 0; + + if (comm->me == 0) + error->warning(FLERR,"Fix {} {} not compatible with Kokkos sorting on device", fix_iextra->id, fix_iextra->style); + break; } } From 78b2b7a0244f35a3771c26f6bb5ed1235822cf7d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 17:29:56 -0400 Subject: [PATCH 088/294] explicit warning(s) exchange_comm_device=0 --- src/KOKKOS/atom_kokkos.cpp | 4 ++-- src/KOKKOS/comm_kokkos.cpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 4e95127120..c07746b823 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -210,8 +210,8 @@ void AtomKokkos::sort() flag = 0; if (comm->me == 0) - error->warning(FLERR,"Fix {} {} not compatible with Kokkos sorting on device", fix_iextra->id, fix_iextra->style); - + error->warning(FLERR,"Fix {} not compatible with Kokkos sorting on device", fix_iextra->style); + break; } } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 8f821c3036..d8ccc34296 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -739,6 +739,10 @@ void CommKokkos::exchange() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->exchange_comm_device) { flag = 0; + + if (comm->me == 0) + error->warning(FLERR,"Fix {} not compatible with sending data in Kokkos communication", fix_iextra->style); + break; } } From 94eed6e10a4c565db9b16cd43686f1b17c599605 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 18:02:33 -0400 Subject: [PATCH 089/294] sort on device --- src/KOKKOS/fix_cmap_kokkos.cpp | 54 ++++++++++++++++++++++++++-------- src/KOKKOS/fix_cmap_kokkos.h | 7 +++-- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 492b77dd14..e8f6d7f443 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -46,6 +46,8 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : FixCMAP(lmp, narg, arg) { kokkosable = 1; + //exchange_comm_device = + sort_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -109,6 +111,10 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : k_d2cmapgrid.template sync(); k_d12cmapgrid.template sync(); + // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code + maxcrossterm = ncmap; + memoryKK->create_kokkos(d_crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); + } /* ---------------------------------------------------------------------- */ @@ -124,8 +130,6 @@ FixCMAPKokkos::~FixCMAPKokkos() memoryKK->destroy_kokkos(k_d2cmapgrid,d2cmapgrid); memoryKK->destroy_kokkos(k_d12cmapgrid,d12cmapgrid); - memoryKK->destroy_kokkos(k_crosstermlist,crosstermlist); - memoryKK->destroy_kokkos(k_num_crossterm,num_crossterm); memoryKK->destroy_kokkos(k_crossterm_type,crossterm_type); memoryKK->destroy_kokkos(k_crossterm_atom1,crossterm_atom1); @@ -134,6 +138,8 @@ FixCMAPKokkos::~FixCMAPKokkos() memoryKK->destroy_kokkos(k_crossterm_atom4,crossterm_atom4); memoryKK->destroy_kokkos(k_crossterm_atom5,crossterm_atom5); + memoryKK->destroy_kokkos(d_crosstermlist); + } /* ---------------------------------------------------------------------- */ @@ -153,15 +159,6 @@ template void FixCMAPKokkos::pre_neighbor() { - if (maxcrossterm == 0) { - - // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code - maxcrossterm = ncmap; - - memoryKK->create_kokkos(k_crosstermlist,crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); - d_crosstermlist = k_crosstermlist.template view(); - } - atomKK->sync(execution_space,X_MASK); d_x = atomKK->k_x.view(); int nlocal = atomKK->nlocal; @@ -232,7 +229,6 @@ void FixCMAPKokkos::post_force(int vflag) d_x = atomKK->k_x.template view(); d_f = atomKK->k_f.template view(); atomKK->sync(execution_space,X_MASK|F_MASK); - k_crosstermlist.template sync(); ecmap = 0.0; int eflag = eflag_caller; @@ -631,6 +627,40 @@ void FixCMAPKokkos::copy_arrays(int i, int j, int delflag) k_crossterm_atom5.template modify(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixCMAPKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_num_crossterm.sync_device(); + k_crossterm_type.sync_device(); + k_crossterm_atom1.sync_device(); + k_crossterm_atom2.sync_device(); + k_crossterm_atom3.sync_device(); + k_crossterm_atom4.sync_device(); + k_crossterm_atom5.sync_device(); + + Sorter.sort(LMPDeviceType(), k_num_crossterm.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_type.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom1.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom2.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom3.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom4.d_view); + Sorter.sort(LMPDeviceType(), k_crossterm_atom5.d_view); + + k_num_crossterm.modify_device(); + k_crossterm_type.modify_device(); + k_crossterm_atom1.modify_device(); + k_crossterm_atom2.modify_device(); + k_crossterm_atom3.modify_device(); + k_crossterm_atom4.modify_device(); + k_crossterm_atom5.modify_device(); +} + /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 84c447e0c8..162df667b8 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -24,6 +24,8 @@ FixStyle(cmap/kk/host,FixCMAPKokkos); #define LMP_FIX_CMAP_KOKKOS_H #include "fix_cmap.h" + +#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -32,7 +34,7 @@ struct TagFixCmapPreNeighbor{}; struct TagFixCmapPostForce{}; template -class FixCMAPKokkos : public FixCMAP { +class FixCMAPKokkos : public FixCMAP, public KokkosBase { typedef ArrayTypes AT; public: @@ -51,6 +53,7 @@ class FixCMAPKokkos : public FixCMAP { void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; void set_arrays(int) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; @@ -68,7 +71,7 @@ class FixCMAPKokkos : public FixCMAP { DAT::tdual_int_1d k_num_crossterm; typename AT::t_int_1d d_num_crossterm; - DAT::tdual_int_2d k_crosstermlist, k_crossterm_type; + DAT::tdual_int_2d k_crossterm_type; typename AT::t_int_2d d_crosstermlist, d_crossterm_type; DAT::tdual_tagint_2d k_crossterm_atom1, k_crossterm_atom2, k_crossterm_atom3; From ae7fa643fdb35a6a9f6b607125a2e8f30687b23d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 18:08:57 -0400 Subject: [PATCH 090/294] oops --- src/KOKKOS/fix_cmap_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e8f6d7f443..87301365e4 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -111,10 +111,6 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : k_d2cmapgrid.template sync(); k_d12cmapgrid.template sync(); - // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code - maxcrossterm = ncmap; - memoryKK->create_kokkos(d_crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); - } /* ---------------------------------------------------------------------- */ @@ -149,6 +145,10 @@ void FixCMAPKokkos::init() { if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot yet use respa with Kokkos"); + + // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code + maxcrossterm = ncmap; + memoryKK->create_kokkos(d_crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); } /* ---------------------------------------------------------------------- From 0921c8e08789c16a7853f22aeaaecdfffab17e5b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 19:51:43 -0400 Subject: [PATCH 091/294] pack/unpack on device --- src/KOKKOS/fix_cmap_kokkos.cpp | 147 ++++++++++++++++++++++++++++++++- src/KOKKOS/fix_cmap_kokkos.h | 10 +++ 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 87301365e4..ddb2a89e92 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -46,8 +46,11 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : FixCMAP(lmp, narg, arg) { kokkosable = 1; - //exchange_comm_device = + + // FIXME: test/bugfix pack_exchange_kokkos() and unpack_exchange_kokkos() + //exchange_comm_device = sort_device = 1; sort_device = 1; + atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -729,6 +732,148 @@ int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) return m; } +/* ---------------------------------------------------------------------- + (KOKKOS) pack values in local atom-based array for exchange +------------------------------------------------------------------------- */ + +/* +int FixCMAP::pack_exchange(int i, double *buf) +{ + int n = 0; + buf[n++] = ubuf(num_crossterm[i]).d; + for (int m = 0; m < num_crossterm[i]; m++) { + buf[n++] = ubuf(crossterm_type[i][m]).d; + buf[n++] = ubuf(crossterm_atom1[i][m]).d; + buf[n++] = ubuf(crossterm_atom2[i][m]).d; + buf[n++] = ubuf(crossterm_atom3[i][m]).d; + buf[n++] = ubuf(crossterm_atom4[i][m]).d; + buf[n++] = ubuf(crossterm_atom5[i][m]).d; + } + return n; +} +*/ + +template +int FixCMAPKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + + k_buf.template sync(); + //k_copylist.template sync(); + k_exchange_sendlist.template sync(); + + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + //d_copylist = k_copylist.view(); + auto d_exchange_sendlist = k_exchange_sendlist.view(); + //this->nsend = nsend; + + int n; + copymode = 1; + + Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { + + const int i = d_exchange_sendlist(mysend); + + if (!final) offset += d_num_crossterm(i); + else { + int j = nsend + offset; + d_buf(j) = ubuf(num_crossterm[i]).d; + for (int m = 0; m < num_crossterm[i]; m++) { + d_buf(j++) = ubuf(d_crossterm_type(i,m)).d; + d_buf(j++) = ubuf(d_crossterm_atom1(i,m)).d; + d_buf(j++) = ubuf(d_crossterm_atom2(i,m)).d; + d_buf(j++) = ubuf(d_crossterm_atom3(i,m)).d; + d_buf(j++) = ubuf(d_crossterm_atom4(i,m)).d; + d_buf(j++) = ubuf(d_crossterm_atom5(i,m)).d; + } + } + },n); + + copymode = 0; + + k_buf.modify(); + if (space == Host) k_buf.sync(); + else k_buf.sync(); + + return n; +} + +/* ---------------------------------------------------------------------- + (KOKKOS) unpack values in local atom-based array from exchange +------------------------------------------------------------------------- */ + +/* +int FixCMAP::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + num_crossterm[nlocal] = (int) ubuf(buf[n++]).i; + for (int m = 0; m < num_crossterm[nlocal]; m++) { + crossterm_type[nlocal][m] = (int) ubuf(buf[n++]).i; + crossterm_atom1[nlocal][m] = (tagint) ubuf(buf[n++]).i; + crossterm_atom2[nlocal][m] = (tagint) ubuf(buf[n++]).i; + crossterm_atom3[nlocal][m] = (tagint) ubuf(buf[n++]).i; + crossterm_atom4[nlocal][m] = (tagint) ubuf(buf[n++]).i; + crossterm_atom5[nlocal][m] = (tagint) ubuf(buf[n++]).i; + } + return n; +} +*/ + +template +void FixCMAPKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + int nrecv1, int nextrarecv1, + ExecutionSpace /*space*/) +{ + k_buf.template sync(); + k_indices.template sync(); + + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + auto d_indices = k_indices.view(); + + //this->nrecv1 = nrecv1; + //this->nextrarecv1 = nextrarecv1; + + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + + copymode = 1; + + Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { + int index = d_indices(i); + d_num_crossterm(index) = (int) ubuf(d_buf[i]).i; + for (int m = 0; m < d_num_crossterm(index); m++) { + d_crossterm_type(index,m) = (int) ubuf(d_buf[i*m+1]).i; + d_crossterm_atom1(index,m) = (tagint) ubuf(d_buf[i*m+2]).i; + d_crossterm_atom2(index,m) = (tagint) ubuf(d_buf[i*m+3]).i; + d_crossterm_atom3(index,m) = (tagint) ubuf(d_buf[i*m+4]).i; + d_crossterm_atom4(index,m) = (tagint) ubuf(d_buf[i*m+5]).i; + d_crossterm_atom5(index,m) = (tagint) ubuf(d_buf[i*m+6]).i; + } + }); + + copymode = 0; + + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); +} /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 162df667b8..8e2bf7e643 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -58,6 +58,16 @@ class FixCMAPKokkos : public FixCMAP, public KokkosBase { int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + int nrecv1,int nrecv1extra, + ExecutionSpace space) override; + protected: typename AT::t_x_array d_x; typename AT::t_f_array d_f; From 0a9137272d865dfc5df55630e697d84ad7c1e1fa Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 20:33:40 -0400 Subject: [PATCH 092/294] replace ubuf with static_cast --- src/KOKKOS/fix_cmap_kokkos.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index ddb2a89e92..843ee4bff5 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -781,14 +781,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( if (!final) offset += d_num_crossterm(i); else { int j = nsend + offset; - d_buf(j) = ubuf(num_crossterm[i]).d; + d_buf(j) = static_cast (num_crossterm[i]); for (int m = 0; m < num_crossterm[i]; m++) { - d_buf(j++) = ubuf(d_crossterm_type(i,m)).d; - d_buf(j++) = ubuf(d_crossterm_atom1(i,m)).d; - d_buf(j++) = ubuf(d_crossterm_atom2(i,m)).d; - d_buf(j++) = ubuf(d_crossterm_atom3(i,m)).d; - d_buf(j++) = ubuf(d_crossterm_atom4(i,m)).d; - d_buf(j++) = ubuf(d_crossterm_atom5(i,m)).d; + d_buf(j++) = static_cast (d_crossterm_type(i,m)); + d_buf(j++) = static_cast (d_crossterm_atom1(i,m)); + d_buf(j++) = static_cast (d_crossterm_atom2(i,m)); + d_buf(j++) = static_cast (d_crossterm_atom3(i,m)); + d_buf(j++) = static_cast (d_crossterm_atom4(i,m)); + d_buf(j++) = static_cast (d_crossterm_atom5(i,m)); } } },n); @@ -853,14 +853,14 @@ void FixCMAPKokkos::unpack_exchange_kokkos( Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); - d_num_crossterm(index) = (int) ubuf(d_buf[i]).i; + d_num_crossterm(index) = static_cast (d_buf[i]); for (int m = 0; m < d_num_crossterm(index); m++) { - d_crossterm_type(index,m) = (int) ubuf(d_buf[i*m+1]).i; - d_crossterm_atom1(index,m) = (tagint) ubuf(d_buf[i*m+2]).i; - d_crossterm_atom2(index,m) = (tagint) ubuf(d_buf[i*m+3]).i; - d_crossterm_atom3(index,m) = (tagint) ubuf(d_buf[i*m+4]).i; - d_crossterm_atom4(index,m) = (tagint) ubuf(d_buf[i*m+5]).i; - d_crossterm_atom5(index,m) = (tagint) ubuf(d_buf[i*m+6]).i; + d_crossterm_type(index,m) = static_cast(d_buf[i*m+1]); + d_crossterm_atom1(index,m) = static_cast (d_buf[i*m+2]); + d_crossterm_atom2(index,m) = static_cast (d_buf[i*m+3]); + d_crossterm_atom3(index,m) = static_cast (d_buf[i*m+4]); + d_crossterm_atom4(index,m) = static_cast (d_buf[i*m+5]); + d_crossterm_atom5(index,m) = static_cast (d_buf[i*m+6]); } }); From 0db92ea135fb9d8a9098847a792868e58b227858 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 20:48:20 -0400 Subject: [PATCH 093/294] auto variables for lambda capture --- src/KOKKOS/fix_cmap_kokkos.cpp | 48 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 843ee4bff5..20178edc1e 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -774,6 +774,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( int n; copymode = 1; + auto l_num_crossterm = d_num_crossterm; + auto l_crossterm_type = d_crossterm_type; + auto l_crossterm_atom1 = d_crossterm_atom1; + auto l_crossterm_atom2 = d_crossterm_atom2; + auto l_crossterm_atom3 = d_crossterm_atom3; + auto l_crossterm_atom4 = d_crossterm_atom4; + auto l_crossterm_atom5 = d_crossterm_atom5; + Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { const int i = d_exchange_sendlist(mysend); @@ -781,14 +789,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( if (!final) offset += d_num_crossterm(i); else { int j = nsend + offset; - d_buf(j) = static_cast (num_crossterm[i]); - for (int m = 0; m < num_crossterm[i]; m++) { - d_buf(j++) = static_cast (d_crossterm_type(i,m)); - d_buf(j++) = static_cast (d_crossterm_atom1(i,m)); - d_buf(j++) = static_cast (d_crossterm_atom2(i,m)); - d_buf(j++) = static_cast (d_crossterm_atom3(i,m)); - d_buf(j++) = static_cast (d_crossterm_atom4(i,m)); - d_buf(j++) = static_cast (d_crossterm_atom5(i,m)); + d_buf(j) = static_cast (l_num_crossterm(i)); + for (int m = 0; m < l_num_crossterm(i); m++) { + d_buf(j++) = static_cast (l_crossterm_type(i,m)); + d_buf(j++) = static_cast (l_crossterm_atom1(i,m)); + d_buf(j++) = static_cast (l_crossterm_atom2(i,m)); + d_buf(j++) = static_cast (l_crossterm_atom3(i,m)); + d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); + d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); } } },n); @@ -851,16 +859,24 @@ void FixCMAPKokkos::unpack_exchange_kokkos( copymode = 1; + auto l_num_crossterm = d_num_crossterm; + auto l_crossterm_type = d_crossterm_type; + auto l_crossterm_atom1 = d_crossterm_atom1; + auto l_crossterm_atom2 = d_crossterm_atom2; + auto l_crossterm_atom3 = d_crossterm_atom3; + auto l_crossterm_atom4 = d_crossterm_atom4; + auto l_crossterm_atom5 = d_crossterm_atom5; + Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); - d_num_crossterm(index) = static_cast (d_buf[i]); - for (int m = 0; m < d_num_crossterm(index); m++) { - d_crossterm_type(index,m) = static_cast(d_buf[i*m+1]); - d_crossterm_atom1(index,m) = static_cast (d_buf[i*m+2]); - d_crossterm_atom2(index,m) = static_cast (d_buf[i*m+3]); - d_crossterm_atom3(index,m) = static_cast (d_buf[i*m+4]); - d_crossterm_atom4(index,m) = static_cast (d_buf[i*m+5]); - d_crossterm_atom5(index,m) = static_cast (d_buf[i*m+6]); + l_num_crossterm(index) = static_cast (d_buf(i)); + for (int m = 0; m < l_num_crossterm(index); m++) { + l_crossterm_type(index,m) = static_cast(d_buf(i*m+1)); + l_crossterm_atom1(index,m) = static_cast (d_buf(i*m+2)); + l_crossterm_atom2(index,m) = static_cast (d_buf(i*m+3)); + l_crossterm_atom3(index,m) = static_cast (d_buf(i*m+4)); + l_crossterm_atom4(index,m) = static_cast (d_buf(i*m+5)); + l_crossterm_atom5(index,m) = static_cast (d_buf(i*m+6)); } }); From 1f28ac4f54dda1a62f7dedc801e94a4958b77829 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 20:50:30 -0400 Subject: [PATCH 094/294] oops --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 20178edc1e..faf8614bda 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -786,7 +786,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( const int i = d_exchange_sendlist(mysend); - if (!final) offset += d_num_crossterm(i); + if (!final) offset += l_num_crossterm(i); else { int j = nsend + offset; d_buf(j) = static_cast (l_num_crossterm(i)); From 4d55c53c109a5af4032a1001b87398e91c9c4e34 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 14 Sep 2024 21:01:06 -0400 Subject: [PATCH 095/294] update rst kk --- doc/src/fix_cmap.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/fix_cmap.rst b/doc/src/fix_cmap.rst index 316ad5d038..493bdb774f 100644 --- a/doc/src/fix_cmap.rst +++ b/doc/src/fix_cmap.rst @@ -3,6 +3,8 @@ fix cmap command ================ +Accelerator Variants: *cmap/kk* + Syntax """""" From 0db24828a5b18e5c874fba29bb5e0b545b556195 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 15 Sep 2024 12:20:12 -0400 Subject: [PATCH 096/294] first draft delete_atoms/kk --- src/KOKKOS/delete_atoms_kokkos.cpp | 238 +++++++++++++++++++++++++++++ src/KOKKOS/delete_atoms_kokkos.h | 50 ++++++ src/command.h | 4 + src/delete_atoms.h | 2 +- 4 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 src/KOKKOS/delete_atoms_kokkos.cpp create mode 100644 src/KOKKOS/delete_atoms_kokkos.h diff --git a/src/KOKKOS/delete_atoms_kokkos.cpp b/src/KOKKOS/delete_atoms_kokkos.cpp new file mode 100644 index 0000000000..f5b112f647 --- /dev/null +++ b/src/KOKKOS/delete_atoms_kokkos.cpp @@ -0,0 +1,238 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy, alphataubio at gmail +------------------------------------------------------------------------- */ + +#include "delete_atoms_kokkos.h" + +#include "angle.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "dihedral.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "group.h" +#include "improper.h" +#include "kokkos.h" +#include "kspace.h" +#include "neighbor_kokkos.h" +#include "memory_kokkos.h" +#include "modify.h" +#include "neighbor.h" +#include "pair.h" +#include "timer.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +DeleteAtomsKokkos::DeleteAtomsKokkos(LAMMPS *lmp) : DeleteAtoms(lmp) +{ + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- */ + +template +void DeleteAtomsKokkos::command(int narg, char **arg) +{ + atomKK->sync(Host, X_MASK|RMASS_MASK|TYPE_MASK); + DeleteAtoms::command(narg, arg); +} + +/* ---------------------------------------------------------------------- + delete atoms so there are no pairs within cutoff + which atoms are deleted depends on ordering of atoms within proc + deletions can vary with processor count + no guarantee that minimium number of atoms will be deleted +------------------------------------------------------------------------- */ + +template +void DeleteAtomsKokkos::delete_overlap(int narg, char **arg) +{ + if (narg < 4) utils::missing_cmd_args(FLERR, "delete_atoms overlap", error); + + // read args + + const double cut = utils::numeric(FLERR, arg[1], false, lmp); + const double cutsq = cut * cut; + + int igroup1 = group->find(arg[2]); + if (igroup1 < 0) + error->all(FLERR, "Could not find delete_atoms overlap first group ID {}", arg[2]); + int igroup2 = group->find(arg[3]); + if (igroup2 < 0) + error->all(FLERR, "Could not find delete_atoms overlap second group ID {}", arg[3]); + options(narg - 4, &arg[4]); + + const int group1bit = group->bitmask[igroup1]; + const int group2bit = group->bitmask[igroup2]; + + if (comm->me == 0) utils::logmesg(lmp, "System init for delete_atoms/kk ...\n"); + + // request a full neighbor list for use by this command + + neighbor->add_request(this, "delete_atoms/kk", NeighConst::REQ_FULL); + + // init entire system since comm->borders and neighbor->build is done + // comm::init needs neighbor::init needs pair::init needs kspace::init, etc + + lmp->init(); + + // error check on cutoff + // if no pair style, neighbor list will be empty + + if (force->pair == nullptr) error->all(FLERR, "Delete_atoms requires a pair style be defined"); + if (cut > neighbor->cutneighmax) error->all(FLERR, "Delete_atoms cutoff > max neighbor cutoff"); + if (cut > neighbor->cutneighmin && comm->me == 0) + error->warning(FLERR, "Delete_atoms cutoff > minimum neighbor cutoff"); + + // setup domain, communication and neighboring + // acquire ghosts and build standard neighbor lists + + if (domain->triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + comm->borders(); + if (domain->triclinic) domain->lamda2x(atom->nlocal + atom->nghost); + neighbor->build(1); + + // build neighbor list this command needs based on the earlier request + + auto list = neighbor->find_list(this); + neighbor->build_one(list); + + auto inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + auto d_numneigh = k_list->d_numneigh; + auto d_neighbors = k_list->d_neighbors; + auto d_ilist = k_list->d_ilist; + + // allocate and initialize deletion list + // must be after exchange potentially changes nlocal + + int nlocal = atom->nlocal; + memoryKK->create_kokkos(k_dlist, dlist, nlocal, "delete_atoms:dlist"); + for (int i = 0; i < nlocal; i++) dlist[i] = 0; + k_dlist.template sync(); + + + + // double loop over owned atoms and their full neighbor list + // at end of loop, there are no more overlaps + // only ever delete owned atom I in I loop iteration, never J even if owned + + auto d_x = atomKK->k_x.template view(); + auto d_tag = atomKK->k_tag.template view(); + auto d_mask = atomKK->k_mask.template view(); + + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + + int i, j, ii, jj, jnum; + + int *ilist, *jlist, *numneigh, **firstneigh; + double factor_lj, factor_coul; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + copymode = 1; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if (!(d_mask[i] & (group1bit | group2bit))) continue; + double xtmp = d_x(i,0); + double ytmp = d_x(i,1); + double ztmp = d_x(i,2); + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + if (!(d_mask[j] & (group1bit | group2bit))) continue; + + // if both weighting factors are 0, skip this pair + // could be 0 and still be in neigh list for long-range Coulombics + // want consistency with non-charged pairs which wouldn't be in list + + if (factor_lj == 0.0 && factor_coul == 0.0) continue; + + // only consider deletion if I,J distance < cutoff + // compute rsq identically on both I,J loop iterations + // ignoring possibility that I,J tags are equal + + double delx, dely, delz; + + if (d_tag(i) < d_tag(j)) { + delx = xtmp - d_x(j,0); + dely = ytmp - d_x(j,1); + delz = ztmp - d_x(j,2); + } else { + delx = d_x(j,0) - xtmp; + dely = d_x(j,1) - ytmp; + delz = d_x(j,2) - ztmp; + } + double rsq = delx * delx + dely * dely + delz * delz; + if (rsq >= cutsq) continue; + + // only consider deletion if I,J are in groups 1,2 respectively + // true whether J is owned or ghost atom + + if (!(d_mask[i] & group1bit)) continue; + if (!(d_mask[j] & group2bit)) continue; + + // J is owned atom: + // delete atom I if atom J has not already been deleted + // J is ghost atom: + // delete atom I if J,I is not a candidate deletion pair + // due to being in groups 1,2 respectively + // if they are candidate pair, then either: + // another proc owns J and could delete J + // J is a ghost of another of my owned atoms, and I could delete J + // test on tags of I,J ensures that only I or J is deleted + + if (j < nlocal) { + if (dlist[j]) continue; + } else if ((d_mask[i] & group2bit) && (d_mask[j] & group1bit)) { + if (d_tag(i) > d_tag(j)) continue; + } + + dlist[i] = 1; + break; + } + } + neighbor->init(); + k_dlist.template modify(); +} + +namespace LAMMPS_NS { +template class DeleteAtomsKokkos; +#ifdef LMP_KOKKOS_GPU +template class DeleteAtomsKokkos; +#endif +} diff --git a/src/KOKKOS/delete_atoms_kokkos.h b/src/KOKKOS/delete_atoms_kokkos.h new file mode 100644 index 0000000000..5c396ee934 --- /dev/null +++ b/src/KOKKOS/delete_atoms_kokkos.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMMAND_CLASS +// clang-format off +CommandStyle(delete_atoms/kk,DeleteAtomsKokkos); +CommandStyle(delete_atoms/kk/device,DeleteAtomsKokkos); +CommandStyle(delete_atoms/kk/host,DeleteAtomsKokkos); +// clang-format on +#else + +#ifndef LMP_DELETE_ATOMS_KOKKOS_H +#define LMP_DELETE_ATOMS_KOKKOS_H + +#include "delete_atoms.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class DeleteAtomsKokkos : public DeleteAtoms { + public: + DeleteAtomsKokkos(class LAMMPS *); + + void command(int, char **) override; + + void delete_overlap(int, char **); + + //KOKKOS_INLINE_FUNCTION + //void operator()(const int &i) const; + + protected: + + DAT::tdual_int_1d k_dlist; + +}; +} // namespace LAMMPS_NS + +#endif //LMP_DELETE_ATOMS_KOKKOS_H +#endif diff --git a/src/command.h b/src/command.h index 1b5cea5ccb..37196a2927 100644 --- a/src/command.h +++ b/src/command.h @@ -22,6 +22,10 @@ class Command : protected Pointers { public: Command(class LAMMPS *lmp) : Pointers(lmp){}; virtual void command(int, char **) = 0; + + protected: + int copymode; // if set, do not deallocate during destruction + // required when classes are used as functors by Kokkos }; } // namespace LAMMPS_NS diff --git a/src/delete_atoms.h b/src/delete_atoms.h index 0aef095327..a1825dab93 100644 --- a/src/delete_atoms.h +++ b/src/delete_atoms.h @@ -30,7 +30,7 @@ class DeleteAtoms : public Command { DeleteAtoms(class LAMMPS *); void command(int, char **) override; - private: + protected: int *dlist; int allflag, compress_flag, bond_flag, mol_flag; std::map *hash; From f30842ca11f18d47ec1d399ef3a55321db73f384 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 25 Sep 2024 20:57:09 -0400 Subject: [PATCH 097/294] first draft --- src/KOKKOS/fix_tgnh_drude_kokkos.cpp | 2326 +++++++++++++++++++++++++ src/KOKKOS/fix_tgnh_drude_kokkos.h | 161 ++ src/KOKKOS/fix_tgnpt_drude_kokkos.cpp | 53 + src/KOKKOS/fix_tgnpt_drude_kokkos.h | 37 + src/KOKKOS/fix_tgnvt_drude_kokkos.cpp | 44 + src/KOKKOS/fix_tgnvt_drude_kokkos.h | 37 + 6 files changed, 2658 insertions(+) create mode 100644 src/KOKKOS/fix_tgnh_drude_kokkos.cpp create mode 100644 src/KOKKOS/fix_tgnh_drude_kokkos.h create mode 100644 src/KOKKOS/fix_tgnpt_drude_kokkos.cpp create mode 100644 src/KOKKOS/fix_tgnpt_drude_kokkos.h create mode 100644 src/KOKKOS/fix_tgnvt_drude_kokkos.cpp create mode 100644 src/KOKKOS/fix_tgnvt_drude_kokkos.h diff --git a/src/KOKKOS/fix_tgnh_drude_kokkos.cpp b/src/KOKKOS/fix_tgnh_drude_kokkos.cpp new file mode 100644 index 0000000000..3b35b13929 --- /dev/null +++ b/src/KOKKOS/fix_tgnh_drude_kokkos.cpp @@ -0,0 +1,2326 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_tgnh_drude_kokkos.h" + +#include "atom.h" +#include "comm.h" +#include "compute.h" +#include "domain.h" +#include "error.h" +#include "fix_deform.h" +#include "fix_drude.h" +#include "force.h" +#include "irregular.h" +#include "kspace.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +static constexpr double DELTAFLIP = 0.1; +static constexpr double TILTMAX = 1.5; + +enum{NOBIAS,BIAS}; +enum{NONE,XYZ,XY,YZ,XZ}; +enum{ISO,ANISO,TRICLINIC}; + +/* ---------------------------------------------------------------------- + NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion + ---------------------------------------------------------------------- */ + +FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), irregular(nullptr), id_temp(nullptr), id_press(nullptr), etamol(nullptr), + etamol_dot(nullptr), etamol_dotdot(nullptr), etamol_mass(nullptr), etaint(nullptr), + etaint_dot(nullptr), etaint_dotdot(nullptr), etaint_mass(nullptr), etadrude(nullptr), + etadrude_dot(nullptr), etadrude_dotdot(nullptr), etadrude_mass(nullptr), etap(nullptr), + etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr) +{ + if (narg < 4) error->all(FLERR, "Illegal fix {} command", style); + + restart_global = 1; + dynamic_group_allow = 0; + time_integrate = 1; + scalar_flag = 1; + vector_flag = 1; + global_freq = 1; + extscalar = 1; + extvector = 0; + ecouple_flag = 1; + + // default values + + pcouple = NONE; + mtchain = mpchain = 3; + nc_tchain = nc_pchain = 1; + mtk_flag = 1; + deviatoric_flag = 0; + nreset_h0 = 0; + flipflag = 1; + + tcomputeflag = 0; + pcomputeflag = 0; + id_temp = nullptr; + id_press = nullptr; + + // turn on tilt factor scaling, whenever applicable + + dimension = domain->dimension; + + scaleyz = scalexz = scalexy = 0; + if (domain->yperiodic && domain->xy != 0.0) scalexy = 1; + if (domain->zperiodic && dimension == 3) { + if (domain->yz != 0.0) scaleyz = 1; + if (domain->xz != 0.0) scalexz = 1; + } + + // set fixed-point to default = center of cell + + fixedpoint[0] = 0.5*(domain->boxlo[0]+domain->boxhi[0]); + fixedpoint[1] = 0.5*(domain->boxlo[1]+domain->boxhi[1]); + fixedpoint[2] = 0.5*(domain->boxlo[2]+domain->boxhi[2]); + + tstat_flag = 0; + double t_period = 0.0, tdrude_period = 0.0; + + double p_period[6]; + for (int i = 0; i < 6; i++) { + p_start[i] = p_stop[i] = p_period[i] = p_target[i] = 0.0; + p_flag[i] = 0; + } + + // process keywords + + int iarg = 3; + + while (iarg < narg) { + if (strcmp(arg[iarg],"temp") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + tstat_flag = 1; + t_start = utils::numeric(FLERR,arg[iarg+1],false,lmp); + t_target = t_start; + t_stop = utils::numeric(FLERR,arg[iarg+2],false,lmp); + t_period = utils::numeric(FLERR,arg[iarg+3],false,lmp); + if (t_start <= 0.0 || t_stop <= 0.0) + error->all(FLERR, + "Target temperature for fix nvt/npt/nph cannot be 0.0"); + tdrude_target = utils::numeric(FLERR,arg[iarg+4],false,lmp); + tdrude_period = utils::numeric(FLERR,arg[iarg+5],false,lmp); + iarg += 6; + + } else if (strcmp(arg[iarg],"iso") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + pcouple = XYZ; + p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[0] = p_flag[1] = p_flag[2] = 1; + if (dimension == 2) { + p_start[2] = p_stop[2] = p_period[2] = 0.0; + p_flag[2] = 0; + } + iarg += 4; + } else if (strcmp(arg[iarg],"aniso") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + pcouple = NONE; + p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[0] = p_flag[1] = p_flag[2] = 1; + if (dimension == 2) { + p_start[2] = p_stop[2] = p_period[2] = 0.0; + p_flag[2] = 0; + } + iarg += 4; + } else if (strcmp(arg[iarg],"tri") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + pcouple = NONE; + scalexy = scalexz = scaleyz = 0; + p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[0] = p_flag[1] = p_flag[2] = 1; + p_start[3] = p_start[4] = p_start[5] = 0.0; + p_stop[3] = p_stop[4] = p_stop[5] = 0.0; + p_period[3] = p_period[4] = p_period[5] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[3] = p_flag[4] = p_flag[5] = 1; + if (dimension == 2) { + p_start[2] = p_stop[2] = p_period[2] = 0.0; + p_flag[2] = 0; + p_start[3] = p_stop[3] = p_period[3] = 0.0; + p_flag[3] = 0; + p_start[4] = p_stop[4] = p_period[4] = 0.0; + p_flag[4] = 0; + } + iarg += 4; + } else if (strcmp(arg[iarg],"x") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[0] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[0] = 1; + deviatoric_flag = 1; + iarg += 4; + } else if (strcmp(arg[iarg],"y") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[1] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[1] = 1; + deviatoric_flag = 1; + iarg += 4; + } else if (strcmp(arg[iarg],"z") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[2] = 1; + deviatoric_flag = 1; + iarg += 4; + if (dimension == 2) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + + } else if (strcmp(arg[iarg],"yz") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[3] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[3] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[3] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[3] = 1; + deviatoric_flag = 1; + scaleyz = 0; + iarg += 4; + if (dimension == 2) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + } else if (strcmp(arg[iarg],"xz") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[4] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[4] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[4] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[4] = 1; + deviatoric_flag = 1; + scalexz = 0; + iarg += 4; + if (dimension == 2) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + } else if (strcmp(arg[iarg],"xy") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + p_start[5] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + p_stop[5] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + p_period[5] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + p_flag[5] = 1; + deviatoric_flag = 1; + scalexy = 0; + iarg += 4; + + } else if (strcmp(arg[iarg],"couple") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ; + else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY; + else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ; + else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ; + else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE; + else error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"tchain") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + mtchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (mtchain < 1) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"pchain") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + mpchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (mpchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"mtk") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + mtk_flag = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"tloop") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + nc_tchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (nc_tchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"ploop") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + nc_pchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (nc_pchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"nreset") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + nreset_h0 = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + if (nreset_h0 < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + iarg += 2; + } else if (strcmp(arg[iarg],"scalexy") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + scalexy = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"scalexz") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + scalexz = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"scaleyz") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + scaleyz = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"flip") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + flipflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else if (strcmp(arg[iarg],"fixedpoint") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); + fixedpoint[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); + fixedpoint[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); + fixedpoint[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); + iarg += 4; + } else error->all(FLERR,"Illegal fix nvt/npt/nph command"); + } + + // error checks + + if (dimension == 2 && (p_flag[2] || p_flag[3] || p_flag[4])) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + if (dimension == 2 && (pcouple == YZ || pcouple == XZ)) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + if (dimension == 2 && (scalexz == 1 || scaleyz == 1 )) + error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); + + if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0)) + error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); + if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0) + error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); + if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0)) + error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); + if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0)) + error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); + if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0)) + error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); + + // require periodicity in tensile dimension + + if (p_flag[0] && domain->xperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); + if (p_flag[1] && domain->yperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); + if (p_flag[2] && domain->zperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); + + // require periodicity in 2nd dim of off-diagonal tilt component + + if (p_flag[3] && domain->zperiodic == 0) + error->all(FLERR, + "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); + if (p_flag[4] && domain->zperiodic == 0) + error->all(FLERR, + "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); + if (p_flag[5] && domain->yperiodic == 0) + error->all(FLERR, + "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); + + if (scaleyz == 1 && domain->zperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph " + "with yz scaling when z is non-periodic dimension"); + if (scalexz == 1 && domain->zperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph " + "with xz scaling when z is non-periodic dimension"); + if (scalexy == 1 && domain->yperiodic == 0) + error->all(FLERR,"Cannot use fix nvt/npt/nph " + "with xy scaling when y is non-periodic dimension"); + + if (p_flag[3] && scaleyz == 1) + error->all(FLERR,"Cannot use fix nvt/npt/nph with " + "both yz dynamics and yz scaling"); + if (p_flag[4] && scalexz == 1) + error->all(FLERR,"Cannot use fix nvt/npt/nph with " + "both xz dynamics and xz scaling"); + if (p_flag[5] && scalexy == 1) + error->all(FLERR,"Cannot use fix nvt/npt/nph with " + "both xy dynamics and xy scaling"); + + if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5])) + error->all(FLERR,"Can not specify Pxy/Pxz/Pyz in " + "fix nvt/npt/nph with non-triclinic box"); + + if (pcouple == XYZ && dimension == 3 && + (p_start[0] != p_start[1] || p_start[0] != p_start[2] || + p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] || + p_period[0] != p_period[1] || p_period[0] != p_period[2])) + error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); + if (pcouple == XYZ && dimension == 2 && + (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || + p_period[0] != p_period[1])) + error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); + if (pcouple == XY && + (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || + p_period[0] != p_period[1])) + error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); + if (pcouple == YZ && + (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || + p_period[1] != p_period[2])) + error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); + if (pcouple == XZ && + (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || + p_period[0] != p_period[2])) + error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); + + if ((tstat_flag && t_period <= 0.0) || + (p_flag[0] && p_period[0] <= 0.0) || + (p_flag[1] && p_period[1] <= 0.0) || + (p_flag[2] && p_period[2] <= 0.0) || + (p_flag[3] && p_period[3] <= 0.0) || + (p_flag[4] && p_period[4] <= 0.0) || + (p_flag[5] && p_period[5] <= 0.0)) + error->all(FLERR,"Fix nvt/npt/nph damping parameters must be > 0.0"); + + // set pstat_flag and box change and restart_pbc variables + + pre_exchange_flag = 0; + pstat_flag = 0; + pstyle = ISO; + + for (int i = 0; i < 6; i++) + if (p_flag[i]) pstat_flag = 1; + + if (pstat_flag) { + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if (p_flag[3]) box_change |= BOX_CHANGE_YZ; + if (p_flag[4]) box_change |= BOX_CHANGE_XZ; + if (p_flag[5]) box_change |= BOX_CHANGE_XY; + no_change_box = 1; + + // pstyle = TRICLINIC if any off-diagonal term is controlled -> 6 dof + // else pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof + // else pstyle = ANISO -> 3 dof + + if (p_flag[3] || p_flag[4] || p_flag[5]) pstyle = TRICLINIC; + else if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO; + else pstyle = ANISO; + + // pre_exchange only required if flips can occur due to shape changes + + if (flipflag && (p_flag[3] || p_flag[4] || p_flag[5])) + pre_exchange_flag = pre_exchange_migrate = 1; + if (flipflag && (domain->yz != 0.0 || domain->xz != 0.0 || domain->xy != 0.0)) + pre_exchange_flag = pre_exchange_migrate = 1; + } + + // convert input periods to frequencies + + t_freq = tdrude_freq = 0.0; + p_freq[0] = p_freq[1] = p_freq[2] = p_freq[3] = p_freq[4] = p_freq[5] = 0.0; + + if (tstat_flag) { + t_freq = 1.0 / t_period; + tdrude_freq = 1.0 / tdrude_period; + } + if (p_flag[0]) p_freq[0] = 1.0 / p_period[0]; + if (p_flag[1]) p_freq[1] = 1.0 / p_period[1]; + if (p_flag[2]) p_freq[2] = 1.0 / p_period[2]; + if (p_flag[3]) p_freq[3] = 1.0 / p_period[3]; + if (p_flag[4]) p_freq[4] = 1.0 / p_period[4]; + if (p_flag[5]) p_freq[5] = 1.0 / p_period[5]; + + // Nose/Hoover temp and pressure init + + size_vector = 3; + + if (tstat_flag) { + int ich; + + etaint = new double[mtchain]; + // add one extra dummy thermostat for eta_dot, set to zero + etaint_dot = new double[mtchain+1]; + etaint_dot[mtchain] = 0.0; + etaint_dotdot = new double[mtchain]; + for (ich = 0; ich < mtchain; ich++) { + etaint[ich] = etaint_dot[ich] = etaint_dotdot[ich] = 0.0; + } + etaint_mass = new double[mtchain]; + + etamol = new double[mtchain]; + // add one extra dummy thermostat for eta_dot, set to zero + etamol_dot = new double[mtchain+1]; + etamol_dot[mtchain] = 0.0; + etamol_dotdot = new double[mtchain]; + for (ich = 0; ich < mtchain; ich++) { + etamol[ich] = etamol_dot[ich] = etamol_dotdot[ich] = 0.0; + } + etamol_mass = new double[mtchain]; + + etadrude = new double[mtchain]; + // add one extra dummy thermostat for eta_dot, set to zero + etadrude_dot = new double[mtchain+1]; + etadrude_dot[mtchain] = 0.0; + etadrude_dotdot = new double[mtchain]; + for (ich = 0; ich < mtchain; ich++) { + etadrude[ich] = etadrude_dot[ich] = etadrude_dotdot[ich] = 0.0; + } + etadrude_mass = new double[mtchain]; + } + + if (pstat_flag) { + omega[0] = omega[1] = omega[2] = 0.0; + omega_dot[0] = omega_dot[1] = omega_dot[2] = 0.0; + omega_mass[0] = omega_mass[1] = omega_mass[2] = 0.0; + omega[3] = omega[4] = omega[5] = 0.0; + omega_dot[3] = omega_dot[4] = omega_dot[5] = 0.0; + omega_mass[3] = omega_mass[4] = omega_mass[5] = 0.0; + + if (mpchain) { + int ich; + etap = new double[mpchain]; + + // add one extra dummy thermostat, set to zero + + etap_dot = new double[mpchain+1]; + etap_dot[mpchain] = 0.0; + etap_dotdot = new double[mpchain]; + for (ich = 0; ich < mpchain; ich++) { + etap[ich] = etap_dot[ich] = + etap_dotdot[ich] = 0.0; + } + etap_mass = new double[mpchain]; + } + } + + if (pre_exchange_flag) irregular = new Irregular(lmp); + else irregular = nullptr; + + // initialize vol0,t0 to zero to signal uninitialized + // values then assigned in init(), if necessary + + vol0 = t0 = 0.0; + + // find fix drude + + auto fdrude = modify->get_fix_by_style("^drude$"); + if (fdrude.size() < 1) error->all(FLERR, "Fix {} requires fix drude", style); + fix_drude = dynamic_cast(fdrude[0]); + if (!fix_drude) error->all(FLERR, "Fix {} requires fix drude", style); + + // make sure ghost atoms have velocity + if (!comm->ghost_velocity) + error->all(FLERR,"Fix {} requires ghost velocities. Use comm_modify vel yes", style); +} + +/* ---------------------------------------------------------------------- */ + +FixTGNHDrude::~FixTGNHDrude() +{ + if (copymode) return; + + delete irregular; + + // delete temperature and pressure if fix created them + + if (tcomputeflag) modify->delete_compute(id_temp); + delete[] id_temp; + + if (tstat_flag) { + delete[] etaint; + delete[] etaint_dot; + delete[] etaint_dotdot; + delete[] etaint_mass; + delete[] etamol; + delete[] etamol_dot; + delete[] etamol_dotdot; + delete[] etamol_mass; + delete[] etadrude; + delete[] etadrude_dot; + delete[] etadrude_dotdot; + delete[] etadrude_mass; + } + + if (pstat_flag) { + if (pcomputeflag) modify->delete_compute(id_press); + delete[] id_press; + if (mpchain) { + delete[] etap; + delete[] etap_dot; + delete[] etap_dotdot; + delete[] etap_mass; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixTGNHDrude::setmask() +{ + int mask = 0; + mask |= INITIAL_INTEGRATE; + mask |= FINAL_INTEGRATE; + mask |= INITIAL_INTEGRATE_RESPA; + mask |= PRE_FORCE_RESPA; + mask |= FINAL_INTEGRATE_RESPA; + if (pre_exchange_flag) mask |= PRE_EXCHANGE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::init() +{ + // ensure no conflict with fix deform + + if (pstat_flag) + for (int i = 0; i < modify->nfix; i++) + if (strcmp(modify->fix[i]->style,"deform") == 0) { + int *dimflag = (dynamic_cast(modify->fix[i]))->dimflag; + if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || + (p_flag[2] && dimflag[2]) || (p_flag[3] && dimflag[3]) || + (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5])) + error->all(FLERR,"Cannot use fix npt and fix deform on " + "same component of stress tensor"); + } + + // set temperature and pressure ptrs + + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Temperature ID for fix {} does not exist", style); + + if (temperature->tempbias) which = BIAS; + else which = NOBIAS; + + if (pstat_flag) { + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID for fix {} does not exist", id_press); + } + + // set timesteps and frequencies + + dtv = update->dt; + dtf = 0.5 * update->dt * force->ftm2v; + dthalf = 0.5 * update->dt; + dt4 = 0.25 * update->dt; + dt8 = 0.125 * update->dt; + dto = dthalf; + + p_freq_max = 0.0; + if (pstat_flag) { + p_freq_max = MAX(p_freq[0],p_freq[1]); + p_freq_max = MAX(p_freq_max,p_freq[2]); + if (pstyle == TRICLINIC) { + p_freq_max = MAX(p_freq_max,p_freq[3]); + p_freq_max = MAX(p_freq_max,p_freq[4]); + p_freq_max = MAX(p_freq_max,p_freq[5]); + } + } + + // tally the number of dimensions that are barostatted + // set initial volume and reference cell, if not already done + + if (pstat_flag) { + pdim = p_flag[0] + p_flag[1] + p_flag[2]; + if (vol0 == 0.0) { + if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; + else vol0 = domain->xprd * domain->yprd; + h0_inv[0] = domain->h_inv[0]; + h0_inv[1] = domain->h_inv[1]; + h0_inv[2] = domain->h_inv[2]; + h0_inv[3] = domain->h_inv[3]; + h0_inv[4] = domain->h_inv[4]; + h0_inv[5] = domain->h_inv[5]; + } + } + + boltz = force->boltz; + nktv2p = force->nktv2p; + + if (force->kspace) kspace_flag = 1; + else kspace_flag = 0; + + if (utils::strmatch(update->integrate_style,"^respa")) { + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; + step_respa = (dynamic_cast(update->integrate))->step; + dto = 0.5*step_respa[0]; + } + + // detect if any rigid fixes exist so rigid bodies move when box is remapped + + rfix.clear(); + for (auto &ifix : modify->get_fix_list()) + if (ifix->rigid_flag) rfix.push_back(ifix); +} + +/* ---------------------------------------------------------------------- + compute T,P before integrator starts +------------------------------------------------------------------------- */ + +void FixTGNHDrude::setup_mol_mass_dof() { + double *mass = atom->mass; + int *mask = atom->mask; + tagint *molecule = atom->molecule; + int *type = atom->type; + int *drudetype = fix_drude->drudetype; + int n_drude, n_drude_tmp = 0; + tagint id_mol = 0, n_mol_in_group = 0; + + for (int i = 0; i < atom->nlocal; i++) { + // molecule id starts from 1. max(id_mol) equals to the number of molecules in the system + id_mol = std::max(id_mol, molecule[i]); + if (mask[i] & groupbit) { + if (drudetype[type[i]] == DRUDE_TYPE) + n_drude_tmp++; + } + } + MPI_Allreduce(&n_drude_tmp, &n_drude, 1, MPI_LMP_TAGINT, MPI_SUM, world); + MPI_Allreduce(&id_mol, &n_mol, 1, MPI_LMP_TAGINT, MPI_MAX, world); + + // use flag_mol to determine the number of molecules in the fix group + int *flag_mol = new int[n_mol + 1]; + int *flag_mol_tmp = new int[n_mol + 1]; + memset(flag_mol_tmp, 0, sizeof(int) * (n_mol + 1)); + for (int i = 0; i < atom->nlocal; i++) { + if (mask[i] & groupbit) { + flag_mol_tmp[molecule[i]] = 1; + } + } + MPI_Allreduce(flag_mol_tmp, flag_mol, n_mol + 1, MPI_INT, MPI_SUM, world); + for (int i = 1; i < n_mol + 1; i++) { + if (flag_mol[i]) + n_mol_in_group++; + } + delete[] flag_mol; + delete[] flag_mol_tmp; + + // length of v_mol set to n_mol+1, so that the subscript start from 1, we can call v_mol[n_mol] + memory->create(v_mol, n_mol + 1, 3, "fix_tgnh_drude::v_mol"); + memory->create(v_mol_tmp, n_mol + 1, 3, "fix_tgnh_drude::v_mol_tmp"); + memory->create(mass_mol, n_mol + 1, "fix_tgnh_drude::mass_mol"); + + auto mass_tmp = new double[n_mol + 1]; + memset(mass_tmp, 0, sizeof(double) * (n_mol + 1)); + for (int i = 0; i < atom->nlocal; i++) { + id_mol = molecule[i]; + mass_tmp[id_mol] += mass[type[i]]; + } + MPI_Allreduce(mass_tmp, mass_mol, n_mol + 1, MPI_DOUBLE, MPI_SUM, world); + delete[] mass_tmp; + + // DOFs + t_current = temperature->compute_scalar(); + tdof = temperature->dof; + // remove DOFs of COM translational motion based on the number of molecules in the group + dof_mol = 3.0 * n_mol_in_group - 3.0 * n_mol_in_group / n_mol; + dof_drude = 3.0 * n_drude; + dof_int = tdof - dof_mol - dof_drude; + + if (comm->me == 0) { + if (screen) { + fprintf(screen, "TGNHC thermostat for Drude model\n"); + fprintf(screen, " DOFs of molecules, atoms and dipoles: %.1f %.1f %.1f\n", + dof_mol, dof_int, dof_drude); + } + if (logfile) { + fprintf(logfile, "TGNHC thermostat for Drude model\n"); + fprintf(logfile, " DOFs of molecules, atoms and dipoles: %.1f %.1f %.1f\n", + dof_mol, dof_int, dof_drude); + } + } + if (dof_mol <=0 || dof_int <=0 || dof_drude <=0) + error->all(FLERR, "TGNHC thermostat requires DOFs of molecules, atoms and dipoles larger than 0"); +} + +void FixTGNHDrude::setup(int /*vflag*/) +{ + setup_mol_mass_dof(); + // t_target is needed by NVT and NPT in compute_scalar() + // If no thermostat or using fix nphug, + // t_target must be defined by other means. + + if (tstat_flag && strstr(style,"nphug") == nullptr) { + compute_temp_target(); + } else if (pstat_flag) { + + // t0 = reference temperature for masses + // cannot be done in init() b/c temperature cannot be called there + // is b/c Modify::init() inits computes after fixes due to dof dependence + // guesstimate a unit-dependent t0 if actual T = 0.0 + // if it was read in from a restart file, leave it be + + if (t0 == 0.0) { + t0 = temperature->compute_scalar(); + if (t0 == 0.0) { + if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0; + else t0 = 300.0; + } + } + t_target = t0; + } + + if (pstat_flag) compute_press_target(); + + if (pstat_flag) { + if (pstyle == ISO) pressure->compute_scalar(); + else pressure->compute_vector(); + couple(); + pressure->addstep(update->ntimestep+1); + } + + // masses and initial forces on thermostat variables + + if (tstat_flag) { + etaint_mass[0] = ke2int_target / (t_freq * t_freq); + etamol_mass[0] = ke2mol_target / (t_freq * t_freq); + etadrude_mass[0] = ke2drude_target / (tdrude_freq * tdrude_freq); + for (int ich = 1; ich < mtchain; ich++) { + etaint_mass[ich] = boltz * t_target / (t_freq * t_freq); + etamol_mass[ich] = boltz * t_target / (t_freq * t_freq); + etadrude_mass[ich] = boltz * tdrude_target / (tdrude_freq * tdrude_freq); + + etaint_dotdot[ich] = (etaint_mass[ich - 1] * etaint_dot[ich - 1] * etaint_dot[ich - 1] - + boltz * t_target) / etaint_mass[ich]; + etamol_dotdot[ich] = (etamol_mass[ich - 1] * etamol_dot[ich - 1] * etamol_dot[ich - 1] - + boltz * t_target) / etamol_mass[ich]; + etadrude_dotdot[ich] = (etadrude_mass[ich - 1] * etadrude_dot[ich - 1] * etadrude_dot[ich - 1] - + boltz * tdrude_target) / etadrude_mass[ich]; + } + } + + // masses and initial forces on barostat variables + + if (pstat_flag) { + double kt = boltz * t_target; + double nkt = (atom->natoms + 1) * kt; + + for (int i = 0; i < 3; i++) + if (p_flag[i]) + omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); + + if (pstyle == TRICLINIC) { + for (int i = 3; i < 6; i++) + if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); + } + + // masses and initial forces on barostat thermostat variables + + if (mpchain) { + etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max); + for (int ich = 1; ich < mpchain; ich++) + etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max); + for (int ich = 1; ich < mpchain; ich++) + etap_dotdot[ich] = + (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - + boltz * t_target) / etap_mass[ich]; + } + } +} + +/* ---------------------------------------------------------------------- + 1st half of Verlet update +------------------------------------------------------------------------- */ + +void FixTGNHDrude::initial_integrate(int /*vflag*/) +{ + // update eta_press_dot + + if (pstat_flag && mpchain) nhc_press_integrate(); + + // update eta_dot + + if (tstat_flag) { + compute_temp_target(); + nhc_temp_integrate(); + } + + // need to recompute pressure to account for change in KE + // t_current is up-to-date, but compute_temperature is not + // compute appropriately coupled elements of mvv_current + + if (pstat_flag) { + if (pstyle == ISO) { + temperature->compute_scalar(); + pressure->compute_scalar(); + } else { + temperature->compute_vector(); + pressure->compute_vector(); + } + couple(); + pressure->addstep(update->ntimestep+1); + } + + if (pstat_flag) { + compute_press_target(); + nh_omega_dot(); + nh_v_press(); + } + + nve_v(); + + // remap simulation box by 1/2 step + + if (pstat_flag) remap(); + + nve_x(); + + // remap simulation box by 1/2 step + // redo KSpace coeffs since volume has changed + + if (pstat_flag) { + remap(); + if (kspace_flag) force->kspace->setup(); + } +} + +/* ---------------------------------------------------------------------- + 2nd half of Verlet update +------------------------------------------------------------------------- */ + +void FixTGNHDrude::final_integrate() +{ + nve_v(); + + // re-compute temp before nh_v_press() + // only needed for temperature computes with BIAS on reneighboring steps: + // b/c some biases store per-atom values (e.g. temp/profile) + // per-atom values are invalid if reneigh/comm occurred + // since temp->compute() in initial_integrate() + + if (which == BIAS && neighbor->ago == 0) + t_current = temperature->compute_scalar(); + + if (pstat_flag) nh_v_press(); + + // compute new T,P after velocities rescaled by nh_v_press() + // compute appropriately coupled elements of mvv_current + + t_current = temperature->compute_scalar(); + tdof = temperature->dof; + + // need to recompute pressure to account for change in KE + // t_current is up-to-date, but compute_temperature is not + // compute appropriately coupled elements of mvv_current + + if (pstat_flag) { + if (pstyle == ISO) pressure->compute_scalar(); + else { + temperature->compute_vector(); + pressure->compute_vector(); + } + couple(); + pressure->addstep(update->ntimestep+1); + } + + if (pstat_flag) nh_omega_dot(); + + // update eta_dot + // update eta_press_dot + + if (tstat_flag) nhc_temp_integrate(); + if (pstat_flag && mpchain) nhc_press_integrate(); +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ + // set timesteps by level + + dtv = step_respa[ilevel]; + dtf = 0.5 * step_respa[ilevel] * force->ftm2v; + dthalf = 0.5 * step_respa[ilevel]; + + // outermost level - update eta_dot and omega_dot, apply to v + // all other levels - NVE update of v + // x,v updates only performed for atoms in group + + if (ilevel == nlevels_respa-1) { + + // update eta_press_dot + + if (pstat_flag && mpchain) nhc_press_integrate(); + + // update eta_dot + + if (tstat_flag) { + compute_temp_target(); + nhc_temp_integrate(); + } + + // recompute pressure to account for change in KE + // t_current is up-to-date, but compute_temperature is not + // compute appropriately coupled elements of mvv_current + + if (pstat_flag) { + if (pstyle == ISO) { + temperature->compute_scalar(); + pressure->compute_scalar(); + } else { + temperature->compute_vector(); + pressure->compute_vector(); + } + couple(); + pressure->addstep(update->ntimestep+1); + } + + if (pstat_flag) { + compute_press_target(); + nh_omega_dot(); + nh_v_press(); + } + + nve_v(); + + } else nve_v(); + + // innermost level - also update x only for atoms in group + // if barostat, perform 1/2 step remap before and after + + if (ilevel == 0) { + if (pstat_flag) remap(); + nve_x(); + if (pstat_flag) remap(); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) +{ + // if barostat, redo KSpace coeffs at outermost level, + // since volume has changed + + if (ilevel == nlevels_respa-1 && kspace_flag && pstat_flag) + force->kspace->setup(); +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::final_integrate_respa(int ilevel, int /*iloop*/) +{ + // set timesteps by level + + dtf = 0.5 * step_respa[ilevel] * force->ftm2v; + dthalf = 0.5 * step_respa[ilevel]; + + // outermost level - update eta_dot and omega_dot, apply via final_integrate + // all other levels - NVE update of v + + if (ilevel == nlevels_respa-1) final_integrate(); + else nve_v(); +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::couple() +{ + double *tensor = pressure->vector; + + if (pstyle == ISO) + p_current[0] = p_current[1] = p_current[2] = pressure->scalar; + else if (pcouple == XYZ) { + double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); + p_current[0] = p_current[1] = p_current[2] = ave; + } else if (pcouple == XY) { + double ave = 0.5 * (tensor[0] + tensor[1]); + p_current[0] = p_current[1] = ave; + p_current[2] = tensor[2]; + } else if (pcouple == YZ) { + double ave = 0.5 * (tensor[1] + tensor[2]); + p_current[1] = p_current[2] = ave; + p_current[0] = tensor[0]; + } else if (pcouple == XZ) { + double ave = 0.5 * (tensor[0] + tensor[2]); + p_current[0] = p_current[2] = ave; + p_current[1] = tensor[1]; + } else { + p_current[0] = tensor[0]; + p_current[1] = tensor[1]; + p_current[2] = tensor[2]; + } + + if (!std::isfinite(p_current[0]) || !std::isfinite(p_current[1]) || !std::isfinite(p_current[2])) + error->all(FLERR,"Non-numeric pressure - simulation unstable"); + + // switch order from xy-xz-yz to Voigt + + if (pstyle == TRICLINIC) { + p_current[3] = tensor[5]; + p_current[4] = tensor[4]; + p_current[5] = tensor[3]; + + if (!std::isfinite(p_current[3]) || !std::isfinite(p_current[4]) || !std::isfinite(p_current[5])) + error->all(FLERR,"Non-numeric pressure - simulation unstable"); + } +} + +/* ---------------------------------------------------------------------- + change box size + remap all atoms or dilate group atoms depending on allremap flag + if rigid bodies exist, scale rigid body centers-of-mass +------------------------------------------------------------------------- */ + +void FixTGNHDrude::remap() +{ + double oldlo,oldhi; + double expfac; + + int nlocal = atom->nlocal; + double *h = domain->h; + + // omega is not used, except for book-keeping + + for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i]; + + // convert pertinent atoms and rigid bodies to lamda coords + + domain->x2lamda(nlocal); + + for (auto &ifix : rfix) ifix->deform(0); + + // reset global and local box to new size/shape + + // this operation corresponds to applying the + // translate and scale operations + // corresponding to the solution of the following ODE: + // + // h_dot = omega_dot * h + // + // where h_dot, omega_dot and h are all upper-triangular + // 3x3 tensors. In Voigt notation, the elements of the + // RHS product tensor are: + // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1] + // + // Ordering of operations preserves time symmetry. + + double dto2 = dto/2.0; + double dto4 = dto/4.0; + double dto8 = dto/8.0; + + // off-diagonal components, first half + + if (pstyle == TRICLINIC) { + + if (p_flag[4]) { + expfac = exp(dto8*omega_dot[0]); + h[4] *= expfac; + h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); + h[4] *= expfac; + } + + if (p_flag[3]) { + expfac = exp(dto4*omega_dot[1]); + h[3] *= expfac; + h[3] += dto2*(omega_dot[3]*h[2]); + h[3] *= expfac; + } + + if (p_flag[5]) { + expfac = exp(dto4*omega_dot[0]); + h[5] *= expfac; + h[5] += dto2*(omega_dot[5]*h[1]); + h[5] *= expfac; + } + + if (p_flag[4]) { + expfac = exp(dto8*omega_dot[0]); + h[4] *= expfac; + h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); + h[4] *= expfac; + } + } + + // scale diagonal components + // scale tilt factors with cell, if set + + if (p_flag[0]) { + oldlo = domain->boxlo[0]; + oldhi = domain->boxhi[0]; + expfac = exp(dto*omega_dot[0]); + domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0]; + domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0]; + } + + if (p_flag[1]) { + oldlo = domain->boxlo[1]; + oldhi = domain->boxhi[1]; + expfac = exp(dto*omega_dot[1]); + domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1]; + domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1]; + if (scalexy) h[5] *= expfac; + } + + if (p_flag[2]) { + oldlo = domain->boxlo[2]; + oldhi = domain->boxhi[2]; + expfac = exp(dto*omega_dot[2]); + domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2]; + domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2]; + if (scalexz) h[4] *= expfac; + if (scaleyz) h[3] *= expfac; + } + + // off-diagonal components, second half + + if (pstyle == TRICLINIC) { + + if (p_flag[4]) { + expfac = exp(dto8*omega_dot[0]); + h[4] *= expfac; + h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); + h[4] *= expfac; + } + + if (p_flag[3]) { + expfac = exp(dto4*omega_dot[1]); + h[3] *= expfac; + h[3] += dto2*(omega_dot[3]*h[2]); + h[3] *= expfac; + } + + if (p_flag[5]) { + expfac = exp(dto4*omega_dot[0]); + h[5] *= expfac; + h[5] += dto2*(omega_dot[5]*h[1]); + h[5] *= expfac; + } + + if (p_flag[4]) { + expfac = exp(dto8*omega_dot[0]); + h[4] *= expfac; + h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); + h[4] *= expfac; + } + + } + + domain->yz = h[3]; + domain->xz = h[4]; + domain->xy = h[5]; + + // tilt factor to cell length ratio can not exceed TILTMAX in one step + + if (domain->yz < -TILTMAX*domain->yprd || + domain->yz > TILTMAX*domain->yprd || + domain->xz < -TILTMAX*domain->xprd || + domain->xz > TILTMAX*domain->xprd || + domain->xy < -TILTMAX*domain->xprd || + domain->xy > TILTMAX*domain->xprd) + error->all(FLERR,"Fix npt/nph has tilted box too far in one step - " + "periodic cell is too far from equilibrium state"); + + domain->set_global_box(); + domain->set_local_box(); + + // convert pertinent atoms and rigid bodies back to box coords + + domain->lamda2x(nlocal); + + for (auto &ifix : rfix) ifix->deform(1); +} + +/* ---------------------------------------------------------------------- + pack entire state of Fix into one write +------------------------------------------------------------------------- */ + +void FixTGNHDrude::write_restart(FILE *fp) +{ + int nsize = size_restart_global(); + + double *list; + memory->create(list,nsize,"nh:list"); + + pack_restart_data(list); + + if (comm->me == 0) { + int size = nsize * sizeof(double); + fwrite(&size,sizeof(int),1,fp); + fwrite(list,sizeof(double),nsize,fp); + } + + memory->destroy(list); +} + +/* ---------------------------------------------------------------------- + calculate the number of data to be packed +------------------------------------------------------------------------- */ + +int FixTGNHDrude::size_restart_global() +{ + int nsize = 2; + if (tstat_flag) nsize += 1 + 6*mtchain; + if (pstat_flag) { + nsize += 16 + 2*mpchain; + if (deviatoric_flag) nsize += 6; + } + + return nsize; +} + +/* ---------------------------------------------------------------------- + pack restart data +------------------------------------------------------------------------- */ + +int FixTGNHDrude::pack_restart_data(double *list) +{ + int n = 0; + + list[n++] = tstat_flag; + if (tstat_flag) { + list[n++] = mtchain; + for (int ich = 0; ich < mtchain; ich++) { + list[n++] = etamol[ich]; + list[n++] = etaint[ich]; + list[n++] = etadrude[ich]; + } + for (int ich = 0; ich < mtchain; ich++) { + list[n++] = etamol_dot[ich]; + list[n++] = etaint_dot[ich]; + list[n++] = etadrude_dot[ich]; + } + } + + list[n++] = pstat_flag; + if (pstat_flag) { + list[n++] = omega[0]; + list[n++] = omega[1]; + list[n++] = omega[2]; + list[n++] = omega[3]; + list[n++] = omega[4]; + list[n++] = omega[5]; + list[n++] = omega_dot[0]; + list[n++] = omega_dot[1]; + list[n++] = omega_dot[2]; + list[n++] = omega_dot[3]; + list[n++] = omega_dot[4]; + list[n++] = omega_dot[5]; + list[n++] = vol0; + list[n++] = t0; + list[n++] = mpchain; + if (mpchain) { + for (int ich = 0; ich < mpchain; ich++) + list[n++] = etap[ich]; + for (int ich = 0; ich < mpchain; ich++) + list[n++] = etap_dot[ich]; + } + + list[n++] = deviatoric_flag; + if (deviatoric_flag) { + list[n++] = h0_inv[0]; + list[n++] = h0_inv[1]; + list[n++] = h0_inv[2]; + list[n++] = h0_inv[3]; + list[n++] = h0_inv[4]; + list[n++] = h0_inv[5]; + } + } + + return n; +} + +/* ---------------------------------------------------------------------- + use state info from restart file to restart the Fix +------------------------------------------------------------------------- */ + +void FixTGNHDrude::restart(char *buf) +{ + int n = 0; + auto list = (double *) buf; + int flag = static_cast (list[n++]); + if (flag) { + int m = static_cast (list[n++]); + if (tstat_flag && m == mtchain) { + for (int ich = 0; ich < mtchain; ich++) { + etamol[ich] = list[n++]; + etaint[ich] = list[n++]; + etadrude[ich] = list[n++]; + } + for (int ich = 0; ich < mtchain; ich++) { + etamol_dot[ich] = list[n++]; + etaint_dot[ich] = list[n++]; + etadrude_dot[ich] = list[n++]; + } + } else n += 2*m; + } + flag = static_cast (list[n++]); + if (flag) { + omega[0] = list[n++]; + omega[1] = list[n++]; + omega[2] = list[n++]; + omega[3] = list[n++]; + omega[4] = list[n++]; + omega[5] = list[n++]; + omega_dot[0] = list[n++]; + omega_dot[1] = list[n++]; + omega_dot[2] = list[n++]; + omega_dot[3] = list[n++]; + omega_dot[4] = list[n++]; + omega_dot[5] = list[n++]; + vol0 = list[n++]; + t0 = list[n++]; + int m = static_cast (list[n++]); + if (pstat_flag && m == mpchain) { + for (int ich = 0; ich < mpchain; ich++) + etap[ich] = list[n++]; + for (int ich = 0; ich < mpchain; ich++) + etap_dot[ich] = list[n++]; + } else n+=2*m; + flag = static_cast (list[n++]); + if (flag) { + h0_inv[0] = list[n++]; + h0_inv[1] = list[n++]; + h0_inv[2] = list[n++]; + h0_inv[3] = list[n++]; + h0_inv[4] = list[n++]; + h0_inv[5] = list[n++]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixTGNHDrude::modify_param(int narg, char **arg) +{ + if (strcmp(arg[0],"temp") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (tcomputeflag) { + modify->delete_compute(id_temp); + tcomputeflag = 0; + } + delete[] id_temp; + id_temp = utils::strdup(arg[1]); + + temperature = modify->get_compute_by_id(id_temp); + if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); + + if (temperature->tempflag == 0) + error->all(FLERR, "Fix_modify temperature ID {} does not compute temperature", id_temp); + if (temperature->igroup != 0 && comm->me == 0) + error->warning(FLERR,"Temperature for fix modify is not for group all"); + + // reset id_temp of pressure to new temperature ID + + if (pstat_flag) { + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Pressure ID {} for fix modify does not exist", id_press); + pressure->reset_extra_compute_fix(id_temp); + } + + return 2; + + } else if (strcmp(arg[0],"press") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command"); + if (pcomputeflag) { + modify->delete_compute(id_press); + pcomputeflag = 0; + } + delete[] id_press; + id_press = utils::strdup(arg[1]); + + pressure = modify->get_compute_by_id(id_press); + if (!pressure) error->all(FLERR,"Could not find fix_modify pressure ID {}", id_press); + + if (pressure->pressflag == 0) + error->all(FLERR,"Fix_modify pressure ID {} does not compute pressure", id_press); + return 2; + } + + return 0; +} + +/* ---------------------------------------------------------------------- */ + +double FixTGNHDrude::compute_scalar() +{ + int i; + double volume; + double energy; + double kt = boltz * t_target; + double kt_drude = boltz * tdrude_target; + double lkt_press = 0.0; + int ich; + if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd; + else volume = domain->xprd * domain->yprd; + + energy = 0.0; + + // thermostat chain energy is equivalent to Eq. (2) in + // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 + // Sum(0.5*p_eta_k^2/Q_k,k=1,M) + L*k*T*eta_1 + Sum(k*T*eta_k,k=2,M), + // where L = tdof + // M = mtchain + // p_eta_k = Q_k*eta_dot[k-1] + // Q_1 = L*k*T/t_freq^2 + // Q_k = k*T/t_freq^2, k > 1 + + if (tstat_flag) { + energy += ke2mol_target * etamol[0] + 0.5 * etamol_mass[0] * etamol_dot[0] * etamol_dot[0]; + energy += ke2int_target * etaint[0] + 0.5 * etaint_mass[0] * etaint_dot[0] * etaint_dot[0]; + energy += ke2drude_target * etadrude[0] + 0.5 * etadrude_mass[0] * etadrude_dot[0] * etadrude_dot[0]; + for (ich = 1; ich < mtchain; ich++) { + energy += kt * etamol[ich] + 0.5*etamol_mass[ich]*etamol_dot[ich]*etamol_dot[ich]; + energy += kt * etaint[ich] + 0.5*etaint_mass[ich]*etaint_dot[ich]*etaint_dot[ich]; + energy += kt_drude * etadrude[ich] + 0.5*etadrude_mass[ich]*etadrude_dot[ich]*etadrude_dot[ich]; + } + } + + // barostat energy is equivalent to Eq. (8) in + // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 + // Sum(0.5*p_omega^2/W + P*V), + // where N = natoms + // p_omega = W*omega_dot + // W = N*k*T/p_freq^2 + // sum is over barostatted dimensions + + if (pstat_flag) { + for (i = 0; i < 3; i++) { + if (p_flag[i]) { + energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i] + + p_hydro*(volume-vol0) / (pdim*nktv2p); + lkt_press += kt; + } + } + + if (pstyle == TRICLINIC) { + for (i = 3; i < 6; i++) { + if (p_flag[i]) { + energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i]; + lkt_press += kt; + } + } + } + + // extra contributions from thermostat chain for barostat + + if (mpchain) { + energy += lkt_press * etap[0] + 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0]; + for (ich = 1; ich < mpchain; ich++) + energy += kt * etap[ich] + + 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich]; + } + + // extra contribution from strain energy + + if (deviatoric_flag) energy += compute_strain_energy(); + } + + return energy; +} + +/* ---------------------------------------------------------------------- */ + +double FixTGNHDrude::compute_vector(int n) +{ + if (!temp_computed_end_of_step) + compute_temp_mol_int_drude(true); + switch (n) { + case 0: + return t_mol; + case 1: + return t_int; + case 2: + return t_drude; + default: + return 0.0; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::reset_target(double t_new) +{ + t_target = t_start = t_stop = t_new; +} + +/* ---------------------------------------------------------------------- */ + +void FixTGNHDrude::reset_dt() +{ + dtv = update->dt; + dtf = 0.5 * update->dt * force->ftm2v; + dthalf = 0.5 * update->dt; + dt4 = 0.25 * update->dt; + dt8 = 0.125 * update->dt; + dto = dthalf; + + // If using respa, then remap is performed in innermost level + + if (utils::strmatch(update->integrate_style,"^respa")) + dto = 0.5*step_respa[0]; +} + +void FixTGNHDrude::compute_temp_mol_int_drude(bool end_of_step) { + double **v = atom->v; + double *mass = atom->mass; + tagint *molecule = atom->molecule; + int *type = atom->type; + int *mask = atom->mask; + int *drudetype = fix_drude->drudetype; + tagint *drudeid = fix_drude->drudeid; + int imol, ci, di; + double mass_com, mass_reduced, mass_core, mass_drude; + double vint, vcom, vrel; + // use array instead of two numbers to save MPI_Allreduce() + double ke2_int_drude_tmp[2] = {0.0, 0.0}; + double ke2_int_drude[2]; + + memset(*v_mol_tmp, 0, sizeof(double) * (n_mol + 1) * 3); // the length of v_mol is n_mol+1 + + /** + * If there are velocity bias, need to remove them before calculate kinetic energies + */ + for (int i = 0; i < atom->nlocal; i++) { + if (mask[i] & groupbit) { + if (which == BIAS) + temperature->remove_bias(i, v[i]); + + imol = molecule[i]; + for (int k = 0; k < 3; k++) + v_mol_tmp[imol][k] += v[i][k] * mass[type[i]]; + + if (which == BIAS) + temperature->restore_bias(i, v[i]); + } + } + MPI_Allreduce(*v_mol_tmp, *v_mol, (n_mol + 1) * 3, MPI_DOUBLE, MPI_SUM, world); + + ke2mol = 0; + for (int i = 1; i < n_mol + 1; i++) { + for (int k = 0; k < 3; k++) { + v_mol[i][k] /= mass_mol[i]; + ke2mol += mass_mol[i] * (v_mol[i][k] * v_mol[i][k]); + } + } + ke2mol *= force->mvv2e; + t_mol = ke2mol / dof_mol / boltz; + + /** + * Have to call remove_bias at the innermost loop, because drude atom may be a ghost + */ + for (int i = 0; i < atom->nlocal; i++) { + if (mask[i] & groupbit) { + imol = molecule[i]; + if (drudetype[type[i]] == NOPOL_TYPE) { + if (which == BIAS) + temperature->remove_bias(i, v[i]); + for (int k = 0; k < 3; k++) { + vint = v[i][k] - v_mol[imol][k]; + ke2_int_drude_tmp[0] += mass[type[i]] * vint * vint; + } + if (which == BIAS) + temperature->restore_bias(i, v[i]); + } else if (drudetype[type[i]] == CORE_TYPE) { + /** + * have to use closet_image() + * even though all images have the same velocity and it's sort of read-only + * but the bias velocity may depends on it's position like in compute vis/pp + */ + ci = i; + di = domain->closest_image(i, atom->map(drudeid[i])); + if (which == BIAS) { + temperature->remove_bias(ci, v[ci]); + temperature->remove_bias(di, v[di]); + } + mass_core = mass[type[ci]]; + mass_drude = mass[type[di]]; + mass_com = mass_core + mass_drude; + mass_reduced = mass_core * mass_drude / mass_com; + for (int k = 0; k < 3; k++) { + vcom = (mass_core * v[ci][k] + mass_drude * v[di][k]) / mass_com; + vint = vcom - v_mol[imol][k]; + ke2_int_drude_tmp[0] += mass_com * vint * vint; + vrel = v[di][k] - v[ci][k]; + ke2_int_drude_tmp[1] += mass_reduced * vrel * vrel; + } + if (which == BIAS) { + temperature->restore_bias(ci, v[ci]); + temperature->restore_bias(di, v[di]); + } + } + } + } + MPI_Allreduce(ke2_int_drude_tmp, ke2_int_drude, 2, MPI_DOUBLE, MPI_SUM, world); + ke2int = ke2_int_drude[0] * force->mvv2e; + ke2drude = ke2_int_drude[1] * force->mvv2e; + t_int = ke2int / dof_int / boltz; + t_drude = ke2drude / dof_drude / boltz; + + temp_computed_end_of_step = end_of_step; +} + +/* ---------------------------------------------------------------------- + perform half-step update of chain thermostat variables +------------------------------------------------------------------------- */ + +void FixTGNHDrude::nhc_temp_integrate() +{ + compute_temp_mol_int_drude(false); + + // update masses of thermostat in case target temperature changes + etamol_mass[0] = ke2mol_target / (t_freq*t_freq); + etaint_mass[0] = ke2int_target / (t_freq*t_freq); + for (int ich = 1; ich < mtchain; ich++) { + etamol_mass[ich] = boltz * t_target / (t_freq*t_freq); + etaint_mass[ich] = boltz * t_target / (t_freq*t_freq); + } + + // thermostat for molecular COM + factor_eta_mol = propagate(etamol, etamol_dot, etamol_dotdot, etamol_mass, + ke2mol, ke2mol_target, t_target); + factor_eta_int = propagate(etaint, etaint_dot, etaint_dotdot, etaint_mass, + ke2int, ke2int_target, t_target); + factor_eta_drude = propagate(etadrude, etadrude_dot, etadrude_dotdot, etadrude_mass, + ke2drude, ke2drude_target, tdrude_target); + + nh_v_temp(); +} + +double FixTGNHDrude::propagate(double *eta, double *eta_dot, double *eta_dotdot, const double *eta_mass, + const double &ke2, const double &ke2_target, const double &tt) const { + int ich; + double expfac; + double ncfac = 1.0 / nc_tchain; + double factor_eta = 1.0; + + eta_dotdot[0] = (ke2 - ke2_target) / eta_mass[0]; + for (int iloop = 0; iloop < nc_tchain; iloop++) { + for (ich = mtchain - 1; ich > 0; ich--) { + expfac = exp(-ncfac * dt8 * eta_dot[ich + 1]); + eta_dot[ich] *= expfac; + eta_dot[ich] += eta_dotdot[ich] * ncfac * dt4; + eta_dot[ich] *= expfac; + } + expfac = exp(-ncfac * dt8 * eta_dot[1]); + eta_dot[0] *= expfac; + eta_dot[0] += eta_dotdot[0] * ncfac * dt4; + eta_dot[0] *= expfac; + factor_eta *= exp(-ncfac * dthalf * eta_dot[0]); + + for (ich = 0; ich < mtchain; ich++) + eta[ich] += ncfac * dthalf * eta_dot[ich]; + + eta_dotdot[0] = (ke2 * factor_eta * factor_eta - ke2_target) / eta_mass[0]; + eta_dot[0] *= expfac; + eta_dot[0] += eta_dotdot[0] * ncfac * dt4; + eta_dot[0] *= expfac; + for (ich = 1; ich < mtchain; ich++) { + expfac = exp(-ncfac * dt8 * eta_dot[ich + 1]); + eta_dot[ich] *= expfac; + eta_dotdot[ich] = (eta_mass[ich - 1] * eta_dot[ich - 1] * eta_dot[ich - 1] + - boltz * tt) / eta_mass[ich]; + eta_dot[ich] += eta_dotdot[ich] * ncfac * dt4; + eta_dot[ich] *= expfac; + } + } + return factor_eta; +} + +/* ---------------------------------------------------------------------- + perform half-step update of chain thermostat variables for barostat + scale barostat velocities +------------------------------------------------------------------------- */ + +void FixTGNHDrude::nhc_press_integrate() +{ + int ich,i,pdof; + double expfac,factor_etap,kecurrent; + double kt = boltz * t_target; + double lkt_press; + + // Update masses, to preserve initial freq, if t_target changed + double nkt = (atom->natoms + 1) * kt; + for (int i = 0; i < 3; i++) + if (p_flag[i]) + omega_mass[i] = nkt / (p_freq[i] * p_freq[i]); + if (pstyle == TRICLINIC) { + for (int i = 3; i < 6; i++) + if (p_flag[i]) omega_mass[i] = nkt / (p_freq[i] * p_freq[i]); + } + if (mpchain) { + etap_mass[0] = kt / (p_freq_max * p_freq_max); + for (int ich = 1; ich < mpchain; ich++) + etap_mass[ich] = kt / (p_freq_max * p_freq_max); + for (int ich = 1; ich < mpchain; ich++) + etap_dotdot[ich] = (etap_mass[ich - 1] * etap_dot[ich - 1] * etap_dot[ich - 1] + - kt) / etap_mass[ich]; + } + + kecurrent = 0.0; + pdof = 0; + for (i = 0; i < 3; i++) + if (p_flag[i]) { + kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; + pdof++; + } + + if (pstyle == TRICLINIC) { + for (i = 3; i < 6; i++) + if (p_flag[i]) { + kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; + pdof++; + } + } + + if (pstyle == ISO) lkt_press = kt; + else lkt_press = pdof * kt; + etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; + + double ncfac = 1.0/nc_pchain; + for (int iloop = 0; iloop < nc_pchain; iloop++) { + + for (ich = mpchain-1; ich > 0; ich--) { + expfac = exp(-ncfac*dt8*etap_dot[ich+1]); + etap_dot[ich] *= expfac; + etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; + etap_dot[ich] *= expfac; + } + + expfac = exp(-ncfac*dt8*etap_dot[1]); + etap_dot[0] *= expfac; + etap_dot[0] += etap_dotdot[0] * ncfac*dt4; + etap_dot[0] *= expfac; + + for (ich = 0; ich < mpchain; ich++) + etap[ich] += ncfac*dthalf*etap_dot[ich]; + + factor_etap = exp(-ncfac*dthalf*etap_dot[0]); + for (i = 0; i < 3; i++) + if (p_flag[i]) omega_dot[i] *= factor_etap; + + if (pstyle == TRICLINIC) { + for (i = 3; i < 6; i++) + if (p_flag[i]) omega_dot[i] *= factor_etap; + } + + kecurrent = 0.0; + for (i = 0; i < 3; i++) + if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; + + if (pstyle == TRICLINIC) { + for (i = 3; i < 6; i++) + if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; + } + + etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; + + etap_dot[0] *= expfac; + etap_dot[0] += etap_dotdot[0] * ncfac*dt4; + etap_dot[0] *= expfac; + + for (ich = 1; ich < mpchain; ich++) { + expfac = exp(-ncfac*dt8*etap_dot[ich+1]); + etap_dot[ich] *= expfac; + etap_dotdot[ich] = + (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - kt) / etap_mass[ich]; + etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; + etap_dot[ich] *= expfac; + } + } +} + +/* ---------------------------------------------------------------------- + perform half-step barostat scaling of velocities +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::nh_v_press() +{ + double factor[3]; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); + factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); + factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); + + if (which == NOBIAS) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + v[i][0] *= factor[0]; + v[i][1] *= factor[1]; + v[i][2] *= factor[2]; + if (pstyle == TRICLINIC) { + v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); + v[i][1] += -dthalf*v[i][2]*omega_dot[3]; + } + v[i][0] *= factor[0]; + v[i][1] *= factor[1]; + v[i][2] *= factor[2]; + } + } + } else if (which == BIAS) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + temperature->remove_bias(i,v[i]); + v[i][0] *= factor[0]; + v[i][1] *= factor[1]; + v[i][2] *= factor[2]; + if (pstyle == TRICLINIC) { + v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); + v[i][1] += -dthalf*v[i][2]*omega_dot[3]; + } + v[i][0] *= factor[0]; + v[i][1] *= factor[1]; + v[i][2] *= factor[2]; + temperature->restore_bias(i,v[i]); + } + } + } +} + +/* ---------------------------------------------------------------------- + perform half-step update of velocities +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::nve_v() +{ + double dtfm; + double **v = atom->v; + double **f = atom->f; + double *rmass = atom->rmass; + double *mass = atom->mass; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + if (rmass) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + dtfm = dtf / rmass[i]; + v[i][0] += dtfm*f[i][0]; + v[i][1] += dtfm*f[i][1]; + v[i][2] += dtfm*f[i][2]; + } + } + } else { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + dtfm = dtf / mass[type[i]]; + v[i][0] += dtfm*f[i][0]; + v[i][1] += dtfm*f[i][1]; + v[i][2] += dtfm*f[i][2]; + } + } + } +} + +/* ---------------------------------------------------------------------- + perform full-step update of positions +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::nve_x() +{ + double **x = atom->x; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + // x update by full step only for atoms in group + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + } + } +} + +/* ---------------------------------------------------------------------- + perform half-step thermostat scaling of velocities +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::nh_v_temp() +{ + double **v = atom->v; + double *mass = atom->mass; + int *mask = atom->mask; + int *type = atom->type; + tagint *molecule = atom->molecule; + int *drudetype = fix_drude->drudetype; + tagint *drudeid = fix_drude->drudeid; + + int imol, i, j, ci, di, itype; + double mass_com, mass_core, mass_drude; + double vint, vcom, vrel; + + /** + * If there are velocity bias, need to remove them before scale velocity + * Have to call remove_bias at the innermost loop, because drude atom may be a ghost + */ + for (i = 0; i < atom->nlocal; i++) { + if (mask[i] & groupbit) { + imol = molecule[i]; + itype = drudetype[type[i]]; + if (itype == NOPOL_TYPE) { + if (which == BIAS) + temperature->remove_bias(i, v[i]); + for (int k = 0; k < 3; k++) { + vint = v[i][k] - v_mol[imol][k]; + vint *= factor_eta_int; + v[i][k] = v_mol[imol][k] * factor_eta_mol + vint; + } + if (which == BIAS) + temperature->restore_bias(i, v[i]); + } else { + // have to use closest_image() because we are manipulating the velocity + j = domain->closest_image(i, atom->map(drudeid[i])); + if (itype == DRUDE_TYPE && j < atom->nlocal) continue; + if (itype == CORE_TYPE) { + ci = i; + di = j; + } else { + ci = j; + di = i; + } + if (which == BIAS) { + temperature->remove_bias(ci, v[ci]); + temperature->remove_bias(di, v[di]); + } + mass_core = mass[type[ci]]; + mass_drude = mass[type[di]]; + mass_com = mass_core + mass_drude; + for (int k = 0; k < 3; k++) { + vcom = (mass_core * v[ci][k] + mass_drude * v[di][k]) / mass_com; + vint = vcom - v_mol[imol][k]; + vrel = v[di][k] - v[ci][k]; + vint *= factor_eta_int; + vrel *= factor_eta_drude; + v[ci][k] = v_mol[imol][k] * factor_eta_mol + vint - vrel * mass_drude / mass_com; + v[di][k] = v_mol[imol][k] * factor_eta_mol + vint + vrel * mass_core / mass_com; + } + if (which == BIAS) { + temperature->restore_bias(ci, v[ci]); + temperature->restore_bias(di, v[di]); + } + } + } + } +} + +/* ---------------------------------------------------------------------- + compute sigma tensor + needed whenever p_target or h0_inv changes +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::compute_sigma() +{ + // if nreset_h0 > 0, reset vol0 and h0_inv + // every nreset_h0 timesteps + + if (nreset_h0 > 0) { + int delta = update->ntimestep - update->beginstep; + if (delta % nreset_h0 == 0) { + if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; + else vol0 = domain->xprd * domain->yprd; + h0_inv[0] = domain->h_inv[0]; + h0_inv[1] = domain->h_inv[1]; + h0_inv[2] = domain->h_inv[2]; + h0_inv[3] = domain->h_inv[3]; + h0_inv[4] = domain->h_inv[4]; + h0_inv[5] = domain->h_inv[5]; + } + } + + // generate upper-triangular half of + // sigma = vol0*h0inv*(p_target-p_hydro)*h0inv^t + // units of sigma are are PV/L^2 e.g. atm.A + // + // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] + // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] + // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] + + sigma[0] = + vol0*(h0_inv[0]*((p_target[0]-p_hydro)*h0_inv[0] + + p_target[5]*h0_inv[5]+p_target[4]*h0_inv[4]) + + h0_inv[5]*(p_target[5]*h0_inv[0] + + (p_target[1]-p_hydro)*h0_inv[5]+p_target[3]*h0_inv[4]) + + h0_inv[4]*(p_target[4]*h0_inv[0]+p_target[3]*h0_inv[5] + + (p_target[2]-p_hydro)*h0_inv[4])); + sigma[1] = + vol0*(h0_inv[1]*((p_target[1]-p_hydro)*h0_inv[1] + + p_target[3]*h0_inv[3]) + + h0_inv[3]*(p_target[3]*h0_inv[1] + + (p_target[2]-p_hydro)*h0_inv[3])); + sigma[2] = + vol0*(h0_inv[2]*((p_target[2]-p_hydro)*h0_inv[2])); + sigma[3] = + vol0*(h0_inv[1]*(p_target[3]*h0_inv[2]) + + h0_inv[3]*((p_target[2]-p_hydro)*h0_inv[2])); + sigma[4] = + vol0*(h0_inv[0]*(p_target[4]*h0_inv[2]) + + h0_inv[5]*(p_target[3]*h0_inv[2]) + + h0_inv[4]*((p_target[2]-p_hydro)*h0_inv[2])); + sigma[5] = + vol0*(h0_inv[0]*(p_target[5]*h0_inv[1]+p_target[4]*h0_inv[3]) + + h0_inv[5]*((p_target[1]-p_hydro)*h0_inv[1]+p_target[3]*h0_inv[3]) + + h0_inv[4]*(p_target[3]*h0_inv[1]+(p_target[2]-p_hydro)*h0_inv[3])); +} + +/* ---------------------------------------------------------------------- + compute strain energy +-----------------------------------------------------------------------*/ + +double FixTGNHDrude::compute_strain_energy() +{ + // compute strain energy = 0.5*Tr(sigma*h*h^t) in energy units + + double* h = domain->h; + double d0,d1,d2; + + d0 = + sigma[0]*(h[0]*h[0]+h[5]*h[5]+h[4]*h[4]) + + sigma[5]*( h[1]*h[5]+h[3]*h[4]) + + sigma[4]*( h[2]*h[4]); + d1 = + sigma[5]*( h[5]*h[1]+h[4]*h[3]) + + sigma[1]*( h[1]*h[1]+h[3]*h[3]) + + sigma[3]*( h[2]*h[3]); + d2 = + sigma[4]*( h[4]*h[2]) + + sigma[3]*( h[3]*h[2]) + + sigma[2]*( h[2]*h[2]); + + double energy = 0.5*(d0+d1+d2)/nktv2p; + return energy; +} + +/* ---------------------------------------------------------------------- + compute deviatoric barostat force = h*sigma*h^t +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::compute_deviatoric() +{ + // generate upper-triangular part of h*sigma*h^t + // units of fdev are are PV, e.g. atm*A^3 + // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] + // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] + // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] + + double* h = domain->h; + + fdev[0] = + h[0]*(sigma[0]*h[0]+sigma[5]*h[5]+sigma[4]*h[4]) + + h[5]*(sigma[5]*h[0]+sigma[1]*h[5]+sigma[3]*h[4]) + + h[4]*(sigma[4]*h[0]+sigma[3]*h[5]+sigma[2]*h[4]); + fdev[1] = + h[1]*( sigma[1]*h[1]+sigma[3]*h[3]) + + h[3]*( sigma[3]*h[1]+sigma[2]*h[3]); + fdev[2] = + h[2]*( sigma[2]*h[2]); + fdev[3] = + h[1]*( sigma[3]*h[2]) + + h[3]*( sigma[2]*h[2]); + fdev[4] = + h[0]*( sigma[4]*h[2]) + + h[5]*( sigma[3]*h[2]) + + h[4]*( sigma[2]*h[2]); + fdev[5] = + h[0]*( sigma[5]*h[1]+sigma[4]*h[3]) + + h[5]*( sigma[1]*h[1]+sigma[3]*h[3]) + + h[4]*( sigma[3]*h[1]+sigma[2]*h[3]); +} + +/* ---------------------------------------------------------------------- + compute target temperature and kinetic energy +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::compute_temp_target() +{ + double delta = update->ntimestep - update->beginstep; + if (delta != 0.0) delta /= update->endstep - update->beginstep; + + t_target = t_start + delta * (t_stop-t_start); + ke2mol_target = dof_mol * boltz * t_target; + ke2int_target = dof_int * boltz * t_target; + ke2drude_target = dof_drude * boltz * tdrude_target; +} + +/* ---------------------------------------------------------------------- + compute hydrostatic target pressure +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::compute_press_target() +{ + double delta = update->ntimestep - update->beginstep; + if (delta != 0.0) delta /= update->endstep - update->beginstep; + + p_hydro = 0.0; + for (int i = 0; i < 3; i++) + if (p_flag[i]) { + p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); + p_hydro += p_target[i]; + } + if (pdim > 0) p_hydro /= pdim; + + if (pstyle == TRICLINIC) + for (int i = 3; i < 6; i++) + p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); + + // if deviatoric, recompute sigma each time p_target changes + + if (deviatoric_flag) compute_sigma(); +} + +/* ---------------------------------------------------------------------- + update omega_dot, omega +-----------------------------------------------------------------------*/ + +void FixTGNHDrude::nh_omega_dot() +{ + double f_omega,volume; + + if (dimension == 3) volume = domain->xprd*domain->yprd*domain->zprd; + else volume = domain->xprd*domain->yprd; + + if (deviatoric_flag) compute_deviatoric(); + + mtk_term1 = 0.0; + if (mtk_flag) { + if (pstyle == ISO) { + mtk_term1 = tdof * boltz * t_current; + mtk_term1 /= pdim * atom->natoms; + } else { + double *mvv_current = temperature->vector; + for (int i = 0; i < 3; i++) + if (p_flag[i]) + mtk_term1 += mvv_current[i]; + mtk_term1 /= pdim * atom->natoms; + } + } + + for (int i = 0; i < 3; i++) + if (p_flag[i]) { + f_omega = (p_current[i]-p_hydro)*volume / + (omega_mass[i] * nktv2p) + mtk_term1 / omega_mass[i]; + if (deviatoric_flag) f_omega -= fdev[i]/(omega_mass[i] * nktv2p); + omega_dot[i] += f_omega*dthalf; + } + + mtk_term2 = 0.0; + if (mtk_flag) { + for (int i = 0; i < 3; i++) + if (p_flag[i]) + mtk_term2 += omega_dot[i]; + if (pdim > 0) mtk_term2 /= pdim * atom->natoms; + } + + if (pstyle == TRICLINIC) { + for (int i = 3; i < 6; i++) { + if (p_flag[i]) { + f_omega = p_current[i]*volume/(omega_mass[i] * nktv2p); + if (deviatoric_flag) + f_omega -= fdev[i]/(omega_mass[i] * nktv2p); + omega_dot[i] += f_omega*dthalf; + } + } + } +} + +/* ---------------------------------------------------------------------- + if any tilt ratios exceed limits, set flip = 1 and compute new tilt values + do not flip in x or y if non-periodic (can tilt but not flip) + this is b/c the box length would be changed (dramatically) by flip + if yz tilt exceeded, adjust C vector by one B vector + if xz tilt exceeded, adjust C vector by one A vector + if xy tilt exceeded, adjust B vector by one A vector + check yz first since it may change xz, then xz check comes after + if any flip occurs, create new box in domain + image_flip() adjusts image flags due to box shape change induced by flip + remap() puts atoms outside the new box back into the new box + perform irregular on atoms in lamda coords to migrate atoms to new procs + important that image_flip comes before remap, since remap may change + image flags to new values, making eqs in doc of Domain:image_flip incorrect +------------------------------------------------------------------------- */ + +void FixTGNHDrude::pre_exchange() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + + // flip is only triggered when tilt exceeds 0.5 by DELTAFLIP + // this avoids immediate re-flipping due to tilt oscillations + + double xtiltmax = (0.5+DELTAFLIP)*xprd; + double ytiltmax = (0.5+DELTAFLIP)*yprd; + + int flipxy,flipxz,flipyz; + flipxy = flipxz = flipyz = 0; + + if (domain->yperiodic) { + if (domain->yz < -ytiltmax) { + domain->yz += yprd; + domain->xz += domain->xy; + flipyz = 1; + } else if (domain->yz >= ytiltmax) { + domain->yz -= yprd; + domain->xz -= domain->xy; + flipyz = -1; + } + } + + if (domain->xperiodic) { + if (domain->xz < -xtiltmax) { + domain->xz += xprd; + flipxz = 1; + } else if (domain->xz >= xtiltmax) { + domain->xz -= xprd; + flipxz = -1; + } + if (domain->xy < -xtiltmax) { + domain->xy += xprd; + flipxy = 1; + } else if (domain->xy >= xtiltmax) { + domain->xy -= xprd; + flipxy = -1; + } + } + + int flip = 0; + if (flipxy || flipxz || flipyz) flip = 1; + + if (flip) { + domain->set_global_box(); + domain->set_local_box(); + + domain->image_flip(flipxy,flipxz,flipyz); + + double **x = atom->x; + imageint *image = atom->image; + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) domain->remap(x[i],image[i]); + + domain->x2lamda(atom->nlocal); + irregular->migrate_atoms(); + domain->lamda2x(atom->nlocal); + } +} + +/* ---------------------------------------------------------------------- + memory usage of Irregular +------------------------------------------------------------------------- */ + +double FixTGNHDrude::memory_usage() +{ + double bytes = 0.0; + if (irregular) bytes += irregular->memory_usage(); + return bytes; +} diff --git a/src/KOKKOS/fix_tgnh_drude_kokkos.h b/src/KOKKOS/fix_tgnh_drude_kokkos.h new file mode 100644 index 0000000000..f87642f188 --- /dev/null +++ b/src/KOKKOS/fix_tgnh_drude_kokkos.h @@ -0,0 +1,161 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_FIX_TGNH_DRUDE_KOKKOS_H +#define LMP_FIX_TGNH_DRUDE_KOKKOS_H + +#include "fix_tgnh_drude_kokkos.h" + +namespace LAMMPS_NS { + +class FixTGNHDrudeKokkos : public FixTGNHDrude { + public: + FixTGNHDrudeKokkos(class LAMMPS *, int, char **); + ~FixTGNHDrudeKokkos() override; + int setmask() override; + void init() override; + void setup(int) override; + void initial_integrate(int) override; + void final_integrate() override; + void pre_force_respa(int, int, int) override; + void initial_integrate_respa(int, int, int) override; + void final_integrate_respa(int, int) override; + void pre_exchange() override; + double compute_scalar() override; + double compute_vector(int) override; + void write_restart(FILE *) override; + virtual int pack_restart_data(double *); // pack restart data + void restart(char *) override; + int modify_param(int, char **) override; + void reset_target(double) override; + void reset_dt() override; + double memory_usage() override; + + protected: + int dimension, which; + double dtv, dtf, dthalf, dt4, dt8, dto; + double boltz, nktv2p, tdof; + double vol0; // reference volume + double t0; // reference temperature + // used for barostat mass + double t_start, t_stop; + double t_current, t_target; + double t_freq; + + int tstat_flag; // 1 if control T + int pstat_flag; // 1 if control P + + int pstyle, pcouple; + int p_flag[6]; // 1 if control P on this dim, 0 if not + double p_start[6], p_stop[6]; + double p_freq[6], p_target[6]; + double omega[6], omega_dot[6]; + double omega_mass[6]; + double p_current[6]; + int kspace_flag; // 1 if KSpace invoked, 0 if not + std::vector rfix; // indices of rigid fixes + class Irregular *irregular; // for migrating atoms after box flips + + int nlevels_respa; + double *step_respa; + + char *id_temp, *id_press; + class Compute *temperature, *pressure; + int tcomputeflag, pcomputeflag; // 1 = compute was created by fix + // 0 = created externally + + double *etamol; + double *etamol_dot; // chain thermostat for motion of whole molecules + double *etamol_dotdot; + double *etamol_mass; + + double *etaint; + double *etaint_dot; // chain thermostat for internal DOFs + double *etaint_dotdot; + double *etaint_mass; + + double *etadrude; + double *etadrude_dot; // chain thermostat for Drude relative motions + double *etadrude_dotdot; + double *etadrude_mass; + + double *etap; // chain thermostat for barostat + double *etap_dot; + double *etap_dotdot; + double *etap_mass; + + int mtchain; // length of chain + int mpchain; // length of chain + + int mtk_flag; // 0 if using Hoover barostat + int pdim; // number of barostatted dims + double p_freq_max; // maximum barostat frequency + + double p_hydro; // hydrostatic target pressure + + int nc_tchain, nc_pchain; + double sigma[6]; // scaled target stress + double fdev[6]; // deviatoric force on barostat + int deviatoric_flag; // 0 if target stress tensor is hydrostatic + double h0_inv[6]; // h_inv of reference (zero strain) box + int nreset_h0; // interval for resetting h0 + + double mtk_term1, mtk_term2; // Martyna-Tobias-Klein corrections + + int scaleyz; // 1 if yz scaled with lz + int scalexz; // 1 if xz scaled with lz + int scalexy; // 1 if xy scaled with ly + int flipflag; // 1 if box flips are invoked as needed + + int pre_exchange_flag; // set if pre_exchange needed for box flips + + double fixedpoint[3]; // location of dilation fixed-point + + void couple(); + virtual void remap(); + void nhc_temp_integrate(); + void nhc_press_integrate(); + + virtual void nve_x(); // may be overwritten by child classes + virtual void nve_v(); + virtual void nh_v_press(); + virtual void nh_v_temp(); + virtual void compute_temp_target(); + virtual int size_restart_global(); + + void compute_sigma(); + void compute_deviatoric(); + double compute_strain_energy(); + void compute_press_target(); + void nh_omega_dot(); + + class FixDrude *fix_drude; + int n_mol; // number of molecules in the system + double *mass_mol; + double dof_mol, dof_int, dof_drude; // DOFs of different modes in the fix group + void setup_mol_mass_dof(); + double **v_mol, **v_mol_tmp; + void compute_temp_mol_int_drude(bool); // calculate the temperatures of three sets of DOFs + bool temp_computed_end_of_step = false; + double tdrude_target, tdrude_freq; + double t_mol, t_int, t_drude; + double ke2mol, ke2int, ke2drude; + double ke2mol_target, ke2int_target, ke2drude_target; + double factor_eta_mol, factor_eta_int, factor_eta_drude; + double propagate(double *, double *, double *, const double *, const double &, const double &, + const double &) const; +}; + +} // namespace LAMMPS_NS + +#endif diff --git a/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp b/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp new file mode 100644 index 0000000000..d014dd51ce --- /dev/null +++ b/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp @@ -0,0 +1,53 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_tgnpt_drude_kokkos.h" + +#include "error.h" +#include "modify.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixTGNPTDrudeKokkos::FixTGNPTDrudeKokkos(LAMMPS *lmp, int narg, char **arg) : + FixTGNHDrudeKokkos(lmp, narg, arg) +{ + if (!tstat_flag) + error->all(FLERR,"Temperature control must be used with fix npt"); + if (!pstat_flag) + error->all(FLERR,"Pressure control must be used with fix npt"); + + // create a new compute temp style + // id = fix-ID + temp + // compute group = all since pressure is always global (group all) + // and thus its KE/temperature contribution should use group all + + id_temp = utils::strdup(std::string(id) + "_temp"); + modify->add_compute(fmt::format("{} all temp",id_temp)); + tcomputeflag = 1; + + // create a new compute pressure style + // id = fix-ID + press, compute group = all + // pass id_temp as 4th arg to pressure constructor + + id_press = utils::strdup(std::string(id) + "_press"); + modify->add_compute(fmt::format("{} all pressure {}",id_press, id_temp)); + pcomputeflag = 1; +} diff --git a/src/KOKKOS/fix_tgnpt_drude_kokkos.h b/src/KOKKOS/fix_tgnpt_drude_kokkos.h new file mode 100644 index 0000000000..7a57ddad14 --- /dev/null +++ b/src/KOKKOS/fix_tgnpt_drude_kokkos.h @@ -0,0 +1,37 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(tgnpt/drude/kk,FixTGNPTDrudeKokkos); +FixStyle(tgnpt/drude/kk/device,FixTGNPTDrudeKokkos); +FixStyle(tgnpt/drude/kk/host,FixTGNPTDrudeKokkos); +// clang-format on +#else + +#ifndef LMP_FIX_TGNPT_DRUDE_KOKKOS_H +#define LMP_FIX_TGNPT_DRUDE_KOKKOS_H + +#include "fix_tgnh_drude_kokkos.h" + +namespace LAMMPS_NS { + +class FixTGNPTDrudeKokkos : public FixTGNHDrudeKokkos { + public: + FixTGNPTDrudeKokkos(class LAMMPS *, int, char **); +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp b/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp new file mode 100644 index 0000000000..93a3e104fe --- /dev/null +++ b/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp @@ -0,0 +1,44 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_tgnvt_drude_kokkos.h" + +#include "error.h" +#include "group.h" +#include "modify.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixTGNVTDrudeKokkos::FixTGNVTDrudeKokkos(LAMMPS *lmp, int narg, char **arg) : + FixTGNHDrudeKokkos(lmp, narg, arg) +{ + if (!tstat_flag) + error->all(FLERR,"Temperature control must be used with fix nvt"); + if (pstat_flag) + error->all(FLERR,"Pressure control can not be used with fix nvt"); + + // create a new compute temp style + // id = fix-ID + temp + + id_temp = utils::strdup(std::string(id) + "_temp"); + modify->add_compute(fmt::format("{} {} temp",id_temp,group->names[igroup])); + tcomputeflag = 1; +} diff --git a/src/KOKKOS/fix_tgnvt_drude_kokkos.h b/src/KOKKOS/fix_tgnvt_drude_kokkos.h new file mode 100644 index 0000000000..9830d3f5f0 --- /dev/null +++ b/src/KOKKOS/fix_tgnvt_drude_kokkos.h @@ -0,0 +1,37 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(tgnvt/drude/kk,FixTGNVTDrudeKokkos); +FixStyle(tgnvt/drude/kk/device,FixTGNVTDrudeKokkos); +FixStyle(tgnvt/drude/kk/host,FixTGNVTDrudeKokkos); +// clang-format on +#else + +#ifndef LMP_FIX_TGNVT_DRUDE_KOKKOS_H +#define LMP_FIX_TGNVT_DRUDE_KOKKOS_H + +#include "fix_tgnh_drude_kokkos.h" + +namespace LAMMPS_NS { + +class FixTGNVTDrudeKokkos : public FixTGNHDrudeKokkos { + public: + FixTGNVTDrudeKokkos(class LAMMPS *, int, char **); +}; + +} // namespace LAMMPS_NS + +#endif +#endif From d64be895e638ffb1ac6ec389e3c66ce87c68eae4 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 26 Sep 2024 16:42:01 +0100 Subject: [PATCH 098/294] Allow for output of effective electronegativities --- doc/src/fix_qtpie_reaxff.rst | 3 ++- src/REAXFF/fix_qtpie_reaxff.cpp | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index 5900c3c6e7..e0c2de1432 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -122,7 +122,8 @@ Restart, fix_modify, output, run start/stop, minimize info No information about this fix is written to :doc:`binary restart files `. This fix computes a global scalar (the number of -iterations) for access by various :doc:`output commands `. +iterations) and a per-atom vector (the effective electronegativity), which +can be accessed by various :doc:`output commands `. No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index b08c6808ac..30a7b7f71f 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -58,8 +58,14 @@ static constexpr double ANGSTROM_TO_BOHRRADIUS = 1.8897261259; FixQtpieReaxFF::FixQtpieReaxFF(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), matvecs(0), pertype_option(nullptr), gauss_file(nullptr) { + // this fix returns a global scalar (the number of iterations) scalar_flag = 1; extscalar = 0; + + // this fix returns a per-atom vector (the effective electronegativity) + peratom_flag = 1; + size_peratom_cols = 0; + imax = 200; maxwarn = 1; @@ -312,6 +318,7 @@ void FixQtpieReaxFF::allocate_storage() memory->create(Hdia_inv,nmax,"qtpie:Hdia_inv"); memory->create(b_s,nmax,"qtpie:b_s"); memory->create(chi_eff,nmax,"qtpie:chi_eff"); + vector_atom = chi_eff; memory->create(b_t,nmax,"qtpie:b_t"); memory->create(b_prc,nmax,"qtpie:b_prc"); memory->create(b_prm,nmax,"qtpie:b_prm"); From 5d0f1aeeafee9f831e4f67fba7ec978d960953d8 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 26 Sep 2024 17:21:16 +0100 Subject: [PATCH 099/294] Expand documentation --- doc/src/fix_qtpie_reaxff.rst | 31 ++++++++++++++++----- doc/utils/sphinx-config/false_positives.txt | 4 +++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index e0c2de1432..2d2b183491 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -38,21 +38,33 @@ Description The QTPIE charge equilibration method is an extension of the QEq charge equilibration method. With QTPIE, the partial charges on individual atoms are computed by minimizing the electrostatic energy of the system in the -same way as the QEq method but where the Mulliken electronegativity, +same way as the QEq method but where the absolute electronegativity, :math:`\chi_i`, of each atom in the QEq charge equilibration scheme :ref:`(Rappe and Goddard) ` is replaced with an effective electronegativity given by :ref:`(Chen) ` .. math:: - \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_j - \phi_i) S_{ij}} + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}} {\sum_{m=1}^{N}S_{im}}, which acts to penalize long-range charge transfer seen with the QEq charge equilibration scheme. In this equation, :math:`N` is the number of atoms in -the system, :math:`S_{ij}` is the overlap integral between atom :math:`i` -and atom :math:`j`, and :math:`\phi_i` and :math:`\phi_j` are the electric -potentials at the position of atom :math:`i` and :math:`j` due to -an external electric field, respectively. +the system and :math:`S_{ij}` is the overlap integral between atom :math:`i` +and atom :math:`j`. + +The effect of an external electric field can be incorporated into the QTPIE +method by modifying the absolute or effective electronegativities of each +atom :ref:`(Chen) `. This fix models the effect of an external +electric field by using the effective electronegativity given in +:ref:`(Gergs) `: + +.. math:: + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_j - \phi_i) S_{ij}} + {\sum_{m=1}^{N}S_{im}}, + +where :math:`\phi_i` and :math:`\phi_j` are the electric +potentials at the positions of atom :math:`i` and :math:`j` +due to the external electric field. This fix is typically used in conjunction with the ReaxFF force field model as implemented in the :doc:`pair_style reaxff ` @@ -64,7 +76,7 @@ charge equilibration performed by `fix qtpie/reaxff`, which is the same as in To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in :ref:`(Aktulga) ` with :math:`\chi_{\mathrm{eff},k}`. -This fix requires the Mulliken electronegativity, :math:`\chi`, in eV, the +This fix requires the absolute electronegativity, :math:`\chi`, in eV, the self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* setting above is the word "reaxff", then these are extracted from the @@ -173,6 +185,11 @@ maxiter 200 **(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models. University of Illinois at Urbana-Champaign, 2009. +.. _Gergs: + +**(Gergs)** Gergs, Dirkmann and Mussenbrock. +Journal of Applied Physics 123.24 (2018). + .. _qeq-Aktulga2: **(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38, diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 65c1031fcf..12e952d551 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -814,6 +814,7 @@ dipoleflag dir Direc dirname +Dirkmann discoverable discretization discretized @@ -975,6 +976,7 @@ elaplong elastance Electroneg electronegative +electronegativities electronegativity electroneutral electroneutrality @@ -1291,6 +1293,7 @@ Geocomputing georg Georg Geotechnica +Gergs germain Germann Germano @@ -2390,6 +2393,7 @@ Murdick Murtola Murty Muser +Mussenbrock mutexes Muto muVT From 350551ecac5f07aef7a08b3cb24e3c8c8e64edaf Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Thu, 26 Sep 2024 21:27:53 +0100 Subject: [PATCH 100/294] Fix whitespace --- doc/src/fix_qtpie_reaxff.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index 2d2b183491..0075c573e1 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -187,7 +187,7 @@ University of Illinois at Urbana-Champaign, 2009. .. _Gergs: -**(Gergs)** Gergs, Dirkmann and Mussenbrock. +**(Gergs)** Gergs, Dirkmann and Mussenbrock. Journal of Applied Physics 123.24 (2018). .. _qeq-Aktulga2: From d86de2862b66e2749b5c1592970ca60825e8db6d Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Mon, 30 Sep 2024 12:10:33 +0100 Subject: [PATCH 101/294] Make signs consistent with efield = -grad(phi) --- doc/src/fix_qtpie_reaxff.rst | 2 +- src/REAXFF/fix_qtpie_reaxff.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index 0075c573e1..b7faa772af 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -59,7 +59,7 @@ electric field by using the effective electronegativity given in :ref:`(Gergs) `: .. math:: - \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_j - \phi_i) S_{ij}} + \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_i - \phi_j) S_{ij}} {\sum_{m=1}^{N}S_{im}}, where :math:`\phi_i` and :math:`\phi_j` are the electric diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 30a7b7f71f..946457a4da 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -1166,7 +1166,7 @@ void FixQtpieReaxFF::calc_chi_eff() chia = chi[type[i]]; if (efield) { if (efield->varflag != FixEfield::ATOM) { - phia = factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); + phia = -factor*(x[i][0]*efield->ex + x[i][1]*efield->ey + x[i][2]*efield->ez); } else { // atom-style potential from FixEfield phia = efield->efield[i][3]; } @@ -1189,11 +1189,11 @@ void FixQtpieReaxFF::calc_chi_eff() if (efield) { if (efield->varflag != FixEfield::ATOM) { - phib = factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); + phib = -factor*(x[j][0]*efield->ex + x[j][1]*efield->ey + x[j][2]*efield->ez); } else { // atom-style potential from FixEfield phib = efield->efield[j][3]; } - sum_n += (chia - chib + phib - phia) * overlap; + sum_n += (chia - chib + phia - phib) * overlap; } else { sum_n += (chia - chib) * overlap; } From d5f630db6c034fa568bd0e35b4f2ba24e2fc6744 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Mon, 30 Sep 2024 12:28:16 +0100 Subject: [PATCH 102/294] Fix sign used for atom-style potential A positive sign in front of the electric potential is consistent with E = -grad(electric potential). --- src/REAXFF/fix_qeq_reaxff.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REAXFF/fix_qeq_reaxff.cpp b/src/REAXFF/fix_qeq_reaxff.cpp index adaf5be031..37e90f582e 100644 --- a/src/REAXFF/fix_qeq_reaxff.cpp +++ b/src/REAXFF/fix_qeq_reaxff.cpp @@ -1158,7 +1158,7 @@ void FixQEqReaxFF::get_chi_field() for (int i = 0; i < nlocal; i++) { if (mask[i] & efgroupbit) { if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; - chi_field[i] = -efield->efield[i][3]; + chi_field[i] = efield->efield[i][3]; } } } From 9f33efc1b9ac58e89c18cb23fef7fd5babf4c732 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 02:12:49 -0400 Subject: [PATCH 103/294] resolve conflict --- unittest/force-styles/test_fix_timestep.cpp | 103 +++++++++++--------- 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index 973b946295..7d12d0b20c 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -244,6 +244,20 @@ void generate_yaml_file(const char *outfile, const TestConfig &config) block += fmt::format("{:3} {:23.16e} {:23.16e} {:23.16e}\n", i, v[j][0], v[j][1], v[j][2]); } writer.emit_block("run_vel", block); + + // run_torque + + if (lmp->atom->torque_flag) { + block.clear(); + auto *t = lmp->atom->torque; + for (int i = 1; i <= natoms; ++i) { + const int j = lmp->atom->map(i); + block += + fmt::format("{:3} {:23.16e} {:23.16e} {:23.16e}\n", i, t[j][0], t[j][1], t[j][2]); + } + writer.emit_block("run_torque", block); + } + cleanup_lammps(lmp, config); } @@ -288,6 +302,9 @@ TEST(FixTimestep, plain) EXPECT_POSITIONS("run_pos (normal run, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (normal run, verlet)", lmp->atom, test_config.run_vel, epsilon); + if (lmp->atom->torque_flag) + EXPECT_TORQUES("run_torques (normal run, verlet)", lmp->atom, test_config.run_torque, + epsilon); auto *ifix = lmp->modify->get_fix_by_id("test"); if (!ifix) { @@ -335,14 +352,12 @@ TEST(FixTimestep, plain) restart_lammps(lmp, test_config, false, false); if (!verbose) ::testing::internal::GetCapturedStdout(); - ifix = lmp->modify->get_fix_by_id("test"); - - if (utils::strmatch(ifix->style, "^cmap") ) - return; - EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); + if (lmp->atom->torque_flag) + EXPECT_TORQUES("run_torque (restart, verlet)", lmp->atom, test_config.run_torque, epsilon); + ifix = lmp->modify->get_fix_by_id("test"); if (!ifix) { FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; } else { @@ -844,7 +859,7 @@ TEST(FixTimestep, kokkos_omp) if (!Info::has_accelerator_feature("KOKKOS", "api", "openmp")) GTEST_SKIP(); LAMMPS::argv args = {"FixTimestep", "-log", "none", "-echo", "screen", "-nocite", - "-k", "on", "t", "4", "-sf", "kk"}; + "-k", "on", "t", "4", "-sf", "kk"}; ::testing::internal::CaptureStdout(); LAMMPS *lmp = init_lammps(args, test_config); @@ -868,7 +883,7 @@ TEST(FixTimestep, kokkos_omp) ASSERT_EQ(lmp->atom->natoms, nlocal); // relax error a bit for KOKKOS package - double epsilon = 5.0 * test_config.epsilon; + double epsilon = 10.0 * test_config.epsilon; // relax test precision when using pppm and single precision FFTs #if defined(FFT_SINGLE) if (lmp->force->kspace && utils::strmatch(lmp->force->kspace_style, "^pppm")) epsilon *= 2.0e8; @@ -878,38 +893,42 @@ TEST(FixTimestep, kokkos_omp) EXPECT_POSITIONS("run_pos (normal run, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (normal run, verlet)", lmp->atom, test_config.run_vel, epsilon); + if (lmp->atom->torque_flag) + EXPECT_TORQUES("run_torque (normal run, verlet)", lmp->atom, test_config.run_torque, + epsilon); - int ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { + auto *ifix = lmp->modify->get_fix_by_id("test"); + + if (!ifix) { FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (normal run, verlet)", fix->virial, test_config.run_stress, + + if (ifix->thermo_virial) { + EXPECT_STRESS("run_stress (normal run, verlet)", ifix->virial, test_config.run_stress, epsilon); } stats.reset(); // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); + if (ifix->scalar_flag) { + double value = ifix->compute_scalar(); EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); } // global vector - if (fix->vector_flag) { - int num = fix->size_vector; + if (ifix->vector_flag) { + int num = ifix->size_vector; EXPECT_EQ(num, test_config.global_vector.size()); for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], ifix->compute_vector(i), epsilon); } // check t_target for thermostats int dim = -1; - double *ptr = (double *)fix->extract("t_target", dim); + double *ptr = (double *)ifix->extract("t_target", dim); if ((ptr != nullptr) && (dim == 0)) { int ivar = lmp->input->variable->find("t_target"); if (ivar >= 0) { @@ -922,44 +941,39 @@ TEST(FixTimestep, kokkos_omp) std::cerr << "global_data, normal run, verlet: " << stats << std::endl; } - - // FIXME: remove after debugging - if (utils::strmatch(lmp->modify->fix[ifix]->style, "^cmap") ) - return; - if (!verbose) ::testing::internal::CaptureStdout(); restart_lammps(lmp, test_config, false, false); if (!verbose) ::testing::internal::GetCapturedStdout(); - EXPECT_POSITIONS("run_pos (restart, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (restart, verlet)", lmp->atom, test_config.run_vel, epsilon); + if (lmp->atom->torque_flag) + EXPECT_TORQUES("run_torque (restart, verlet)", lmp->atom, test_config.run_torque, epsilon); - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { + ifix = lmp->modify->get_fix_by_id("test"); + if (!ifix) { FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (restart, verlet)", fix->virial, test_config.run_stress, + if (ifix->thermo_virial) { + EXPECT_STRESS("run_stress (restart, verlet)", ifix->virial, test_config.run_stress, epsilon); } stats.reset(); // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); + if (ifix->scalar_flag) { + double value = ifix->compute_scalar(); EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); } // global vector - if (fix->vector_flag) { - int num = fix->size_vector; + if (ifix->vector_flag) { + int num = ifix->size_vector; EXPECT_EQ(num, test_config.global_vector.size()); for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], ifix->compute_vector(i), epsilon); } if (print_stats && stats.has_data()) @@ -974,31 +988,30 @@ TEST(FixTimestep, kokkos_omp) EXPECT_POSITIONS("run_pos (rmass, verlet)", lmp->atom, test_config.run_pos, epsilon); EXPECT_VELOCITIES("run_vel (rmass, verlet)", lmp->atom, test_config.run_vel, epsilon); - ifix = lmp->modify->find_fix("test"); - if (ifix < 0) { + ifix = lmp->modify->get_fix_by_id("test"); + if (!ifix) { FAIL() << "ERROR: no fix defined with fix ID 'test'\n"; } else { - Fix *fix = lmp->modify->fix[ifix]; - if (fix->thermo_virial) { - EXPECT_STRESS("run_stress (rmass, verlet)", fix->virial, test_config.run_stress, + if (ifix->thermo_virial) { + EXPECT_STRESS("run_stress (rmass, verlet)", ifix->virial, test_config.run_stress, epsilon); } stats.reset(); // global scalar - if (fix->scalar_flag) { - double value = fix->compute_scalar(); + if (ifix->scalar_flag) { + double value = ifix->compute_scalar(); EXPECT_FP_LE_WITH_EPS(test_config.global_scalar, value, epsilon); } // global vector - if (fix->vector_flag) { - int num = fix->size_vector; + if (ifix->vector_flag) { + int num = ifix->size_vector; EXPECT_EQ(num, test_config.global_vector.size()); for (int i = 0; i < num; ++i) - EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], fix->compute_vector(i), + EXPECT_FP_LE_WITH_EPS(test_config.global_vector[i], ifix->compute_vector(i), epsilon); } if (print_stats && stats.has_data()) @@ -1012,5 +1025,3 @@ TEST(FixTimestep, kokkos_omp) cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); }; - - From 9330ccf6f92340fea2de0bad1f22552f04b5e4d6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 02:30:01 -0400 Subject: [PATCH 104/294] punt to later --- src/KOKKOS/delete_atoms_kokkos.cpp | 238 ----------------------------- src/KOKKOS/delete_atoms_kokkos.h | 50 ------ 2 files changed, 288 deletions(-) delete mode 100644 src/KOKKOS/delete_atoms_kokkos.cpp delete mode 100644 src/KOKKOS/delete_atoms_kokkos.h diff --git a/src/KOKKOS/delete_atoms_kokkos.cpp b/src/KOKKOS/delete_atoms_kokkos.cpp deleted file mode 100644 index f5b112f647..0000000000 --- a/src/KOKKOS/delete_atoms_kokkos.cpp +++ /dev/null @@ -1,238 +0,0 @@ -// clang-format off -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy, alphataubio at gmail -------------------------------------------------------------------------- */ - -#include "delete_atoms_kokkos.h" - -#include "angle.h" -#include "atom_kokkos.h" -#include "atom_masks.h" -#include "comm.h" -#include "dihedral.h" -#include "domain.h" -#include "error.h" -#include "force.h" -#include "group.h" -#include "improper.h" -#include "kokkos.h" -#include "kspace.h" -#include "neighbor_kokkos.h" -#include "memory_kokkos.h" -#include "modify.h" -#include "neighbor.h" -#include "pair.h" -#include "timer.h" -#include "update.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -template -DeleteAtomsKokkos::DeleteAtomsKokkos(LAMMPS *lmp) : DeleteAtoms(lmp) -{ - atomKK = (AtomKokkos *) atom; -} - -/* ---------------------------------------------------------------------- */ - -template -void DeleteAtomsKokkos::command(int narg, char **arg) -{ - atomKK->sync(Host, X_MASK|RMASS_MASK|TYPE_MASK); - DeleteAtoms::command(narg, arg); -} - -/* ---------------------------------------------------------------------- - delete atoms so there are no pairs within cutoff - which atoms are deleted depends on ordering of atoms within proc - deletions can vary with processor count - no guarantee that minimium number of atoms will be deleted -------------------------------------------------------------------------- */ - -template -void DeleteAtomsKokkos::delete_overlap(int narg, char **arg) -{ - if (narg < 4) utils::missing_cmd_args(FLERR, "delete_atoms overlap", error); - - // read args - - const double cut = utils::numeric(FLERR, arg[1], false, lmp); - const double cutsq = cut * cut; - - int igroup1 = group->find(arg[2]); - if (igroup1 < 0) - error->all(FLERR, "Could not find delete_atoms overlap first group ID {}", arg[2]); - int igroup2 = group->find(arg[3]); - if (igroup2 < 0) - error->all(FLERR, "Could not find delete_atoms overlap second group ID {}", arg[3]); - options(narg - 4, &arg[4]); - - const int group1bit = group->bitmask[igroup1]; - const int group2bit = group->bitmask[igroup2]; - - if (comm->me == 0) utils::logmesg(lmp, "System init for delete_atoms/kk ...\n"); - - // request a full neighbor list for use by this command - - neighbor->add_request(this, "delete_atoms/kk", NeighConst::REQ_FULL); - - // init entire system since comm->borders and neighbor->build is done - // comm::init needs neighbor::init needs pair::init needs kspace::init, etc - - lmp->init(); - - // error check on cutoff - // if no pair style, neighbor list will be empty - - if (force->pair == nullptr) error->all(FLERR, "Delete_atoms requires a pair style be defined"); - if (cut > neighbor->cutneighmax) error->all(FLERR, "Delete_atoms cutoff > max neighbor cutoff"); - if (cut > neighbor->cutneighmin && comm->me == 0) - error->warning(FLERR, "Delete_atoms cutoff > minimum neighbor cutoff"); - - // setup domain, communication and neighboring - // acquire ghosts and build standard neighbor lists - - if (domain->triclinic) domain->x2lamda(atom->nlocal); - domain->pbc(); - domain->reset_box(); - comm->setup(); - if (neighbor->style) neighbor->setup_bins(); - comm->exchange(); - comm->borders(); - if (domain->triclinic) domain->lamda2x(atom->nlocal + atom->nghost); - neighbor->build(1); - - // build neighbor list this command needs based on the earlier request - - auto list = neighbor->find_list(this); - neighbor->build_one(list); - - auto inum = list->inum; - NeighListKokkos* k_list = static_cast*>(list); - auto d_numneigh = k_list->d_numneigh; - auto d_neighbors = k_list->d_neighbors; - auto d_ilist = k_list->d_ilist; - - // allocate and initialize deletion list - // must be after exchange potentially changes nlocal - - int nlocal = atom->nlocal; - memoryKK->create_kokkos(k_dlist, dlist, nlocal, "delete_atoms:dlist"); - for (int i = 0; i < nlocal; i++) dlist[i] = 0; - k_dlist.template sync(); - - - - // double loop over owned atoms and their full neighbor list - // at end of loop, there are no more overlaps - // only ever delete owned atom I in I loop iteration, never J even if owned - - auto d_x = atomKK->k_x.template view(); - auto d_tag = atomKK->k_tag.template view(); - auto d_mask = atomKK->k_mask.template view(); - - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - - int i, j, ii, jj, jnum; - - int *ilist, *jlist, *numneigh, **firstneigh; - double factor_lj, factor_coul; - - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - copymode = 1; - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - if (!(d_mask[i] & (group1bit | group2bit))) continue; - double xtmp = d_x(i,0); - double ytmp = d_x(i,1); - double ztmp = d_x(i,2); - jlist = firstneigh[i]; - jnum = numneigh[i]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; - factor_coul = special_coul[sbmask(j)]; - j &= NEIGHMASK; - if (!(d_mask[j] & (group1bit | group2bit))) continue; - - // if both weighting factors are 0, skip this pair - // could be 0 and still be in neigh list for long-range Coulombics - // want consistency with non-charged pairs which wouldn't be in list - - if (factor_lj == 0.0 && factor_coul == 0.0) continue; - - // only consider deletion if I,J distance < cutoff - // compute rsq identically on both I,J loop iterations - // ignoring possibility that I,J tags are equal - - double delx, dely, delz; - - if (d_tag(i) < d_tag(j)) { - delx = xtmp - d_x(j,0); - dely = ytmp - d_x(j,1); - delz = ztmp - d_x(j,2); - } else { - delx = d_x(j,0) - xtmp; - dely = d_x(j,1) - ytmp; - delz = d_x(j,2) - ztmp; - } - double rsq = delx * delx + dely * dely + delz * delz; - if (rsq >= cutsq) continue; - - // only consider deletion if I,J are in groups 1,2 respectively - // true whether J is owned or ghost atom - - if (!(d_mask[i] & group1bit)) continue; - if (!(d_mask[j] & group2bit)) continue; - - // J is owned atom: - // delete atom I if atom J has not already been deleted - // J is ghost atom: - // delete atom I if J,I is not a candidate deletion pair - // due to being in groups 1,2 respectively - // if they are candidate pair, then either: - // another proc owns J and could delete J - // J is a ghost of another of my owned atoms, and I could delete J - // test on tags of I,J ensures that only I or J is deleted - - if (j < nlocal) { - if (dlist[j]) continue; - } else if ((d_mask[i] & group2bit) && (d_mask[j] & group1bit)) { - if (d_tag(i) > d_tag(j)) continue; - } - - dlist[i] = 1; - break; - } - } - neighbor->init(); - k_dlist.template modify(); -} - -namespace LAMMPS_NS { -template class DeleteAtomsKokkos; -#ifdef LMP_KOKKOS_GPU -template class DeleteAtomsKokkos; -#endif -} diff --git a/src/KOKKOS/delete_atoms_kokkos.h b/src/KOKKOS/delete_atoms_kokkos.h deleted file mode 100644 index 5c396ee934..0000000000 --- a/src/KOKKOS/delete_atoms_kokkos.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef COMMAND_CLASS -// clang-format off -CommandStyle(delete_atoms/kk,DeleteAtomsKokkos); -CommandStyle(delete_atoms/kk/device,DeleteAtomsKokkos); -CommandStyle(delete_atoms/kk/host,DeleteAtomsKokkos); -// clang-format on -#else - -#ifndef LMP_DELETE_ATOMS_KOKKOS_H -#define LMP_DELETE_ATOMS_KOKKOS_H - -#include "delete_atoms.h" -#include "kokkos_type.h" - -namespace LAMMPS_NS { - -template -class DeleteAtomsKokkos : public DeleteAtoms { - public: - DeleteAtomsKokkos(class LAMMPS *); - - void command(int, char **) override; - - void delete_overlap(int, char **); - - //KOKKOS_INLINE_FUNCTION - //void operator()(const int &i) const; - - protected: - - DAT::tdual_int_1d k_dlist; - -}; -} // namespace LAMMPS_NS - -#endif //LMP_DELETE_ATOMS_KOKKOS_H -#endif From e29b06095db1915383bd3be2d684f581b2bec0bd Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 02:36:22 -0400 Subject: [PATCH 105/294] remove whitespace --- src/fix_wall_region.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fix_wall_region.cpp b/src/fix_wall_region.cpp index 7dac494160..542e1cc445 100644 --- a/src/fix_wall_region.cpp +++ b/src/fix_wall_region.cpp @@ -51,7 +51,7 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) : respa_level_support = 1; ilevel_respa = 0; ewall = new double[4]; - + // parse args region = domain->get_region_by_id(arg[3]); From e32a2fa71a4e0187fd993f29c1301d190e33f340 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 02:39:17 -0400 Subject: [PATCH 106/294] punt again --- .../force-styles/tests/fix-timestep-cmap.yaml | 80 ------------------- 1 file changed, 80 deletions(-) delete mode 100644 unittest/force-styles/tests/fix-timestep-cmap.yaml diff --git a/unittest/force-styles/tests/fix-timestep-cmap.yaml b/unittest/force-styles/tests/fix-timestep-cmap.yaml deleted file mode 100644 index 86e21a891d..0000000000 --- a/unittest/force-styles/tests/fix-timestep-cmap.yaml +++ /dev/null @@ -1,80 +0,0 @@ ---- -lammps_version: 27 Jun 2024 -tags: generated -date_generated: Thu Aug 1 00:19:04 2024 -epsilon: 2e-14 -skip_tests: -prerequisites: ! | - atom full - fix cmap -pre_commands: ! "" -post_commands: ! | - fix move all nve - fix test all cmap charmm36.cmap - fix_modify test energy yes -input_file: in.cmap -natoms: 29 -run_stress: ! |2- - 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -global_scalar: 0 -run_pos: ! |2 - 1 -2.7045576931365384e-01 2.4912153915127910e+00 -1.6695660174193144e-01 - 2 3.1006650885653392e-01 2.9612066384818774e+00 -8.5468359240877056e-01 - 3 -7.0398718670996596e-01 1.2305509689997693e+00 -6.2777234357568623e-01 - 4 -1.5814449350624078e+00 1.4843404724798535e+00 -1.2538273456433655e+00 - 5 -9.0783243756685006e-01 9.2526534460922938e-01 3.9949965943785426e-01 - 6 2.4859337464110062e-01 2.8395437677292801e-01 -1.2315849919995718e+00 - 7 3.4129121643837462e-01 -2.3102788529791828e-02 -2.5291407998329900e+00 - 8 1.1743406680717965e+00 -4.8860189094234913e-01 -6.3780684414657063e-01 - 9 1.3800528609303513e+00 -2.5274652114108015e-01 2.8354186861628400e-01 - 10 2.0510776838326117e+00 -1.4602212365720617e+00 -9.8289749648832170e-01 - 11 1.7878063062190042e+00 -1.9921840498876129e+00 -1.8890528117809133e+00 - 12 3.0062653102416346e+00 -4.9030348819064951e-01 -1.6234817573863822e+00 - 13 4.0515398561601499e+00 -8.9202280298994308e-01 -1.6400070473765287e+00 - 14 2.6066954671851068e+00 -4.1789389390575277e-01 -2.6634066414774398e+00 - 15 2.9697386898129197e+00 5.5405474601205984e-01 -1.2343532907729176e+00 - 16 2.6747029564056741e+00 -2.4124117273842192e+00 -2.3434860532736367e-02 - 17 2.2153579387356999e+00 -2.0897987524705992e+00 1.1963152377872239e+00 - 18 2.1369285315978819e+00 3.0156108277459790e+00 -3.5183940657539963e+00 - 19 1.5355811460020863e+00 2.6255306350384799e+00 -4.2354168844939002e+00 - 20 2.7727385869610495e+00 3.6933911950960656e+00 -3.9313456335665453e+00 - 21 4.9040149976454908e+00 -4.0752342739930612e+00 -3.6210280393155685e+00 - 22 4.3584283483000474e+00 -4.2126170417598745e+00 -4.4609852540369923e+00 - 23 5.7439382608753773e+00 -3.5821957713386881e+00 -3.8766362488100117e+00 - 24 2.0689237180918769e+00 3.1513348704499196e+00 3.1550384095102570e+00 - 25 1.3045090135211659e+00 3.2665689836321810e+00 2.5111204914634193e+00 - 26 2.5809239161761726e+00 4.0117601377202847e+00 3.2212062405016724e+00 - 27 -1.9611007896081207e+00 -4.3563573211261462e+00 2.1098614022771494e+00 - 28 -2.7473545914982185e+00 -4.0200829741975630e+00 1.5830064034427631e+00 - 29 -1.3125994707851243e+00 -3.5962514442513154e+00 2.2746344518754498e+00 -run_vel: ! |2 - 1 8.1702074645354263e-03 1.6515202117650986e-02 4.7941469336088534e-03 - 2 5.4793033469769841e-03 5.1464824735920319e-03 -1.4591356769853548e-03 - 3 -8.2335058988087587e-03 -1.2926663429897282e-02 -4.0922324315820231e-03 - 4 -3.3968512986425896e-03 -5.9586658523124144e-03 -1.0756650770777734e-03 - 5 -1.1658826051209279e-02 -1.1193993209032561e-02 -2.8787337175040129e-03 - 6 -3.9380810946715861e-02 4.7658078376635958e-02 3.6984166084389578e-02 - 7 7.6859250359065487e-04 -1.0596844346008383e-02 -5.1474108818315739e-02 - 8 7.8658367472618524e-03 -3.3020166182538553e-03 3.4576616847351263e-02 - 9 1.5651759825792155e-03 3.7379367797178631e-03 1.5051270508251535e-02 - 10 2.9209115027410210e-02 -2.9079636336334622e-02 -1.4693353834959967e-02 - 11 -4.7791036632847351e-03 -3.7420517412193981e-03 -2.3314072395415053e-03 - 12 2.2371540751205394e-03 -5.1178724927781723e-04 -3.3452354485908379e-03 - 13 2.7521535591674470e-03 5.8111658360700744e-03 -8.0472550021755981e-04 - 14 3.5228591162840489e-03 -5.7968032496769481e-03 -3.9605392790085618e-03 - 15 -1.6484125488189587e-03 -6.0254944267966555e-03 6.1452231213603593e-03 - 16 1.8681533496201778e-02 -1.3262182433081761e-02 -4.5636846184444781e-02 - 17 -1.2895956946875307e-02 9.7523042076136327e-03 3.7296862271497824e-02 - 18 -8.3844604949622544e-04 -1.0959331979630918e-03 -1.8883231141051712e-03 - 19 1.2400642737389216e-03 -2.5031680516680289e-03 7.2633208828032422e-03 - 20 3.5619032388416552e-03 4.6664333858458936e-03 4.9145981095079193e-03 - 21 -1.4645071159471662e-03 -2.7242209180838758e-04 7.1272665704736585e-04 - 22 -6.8856986268787035e-03 -4.2649670960839070e-03 5.6565289286038072e-04 - 23 6.0446701004681610e-03 -1.3999558207043038e-03 2.5817272842782119e-03 - 24 3.1797174259548137e-04 -9.9409313510120316e-04 1.4885702447403561e-04 - 25 1.1518919433985852e-04 -4.3777019831790272e-03 -8.8058800262529524e-04 - 26 2.0489472664324440e-03 2.7810807643201753e-03 4.3249553258435623e-03 - 27 4.8891848045180331e-04 -1.0464891567315256e-03 2.4353637884337831e-04 - 28 -6.2510920436768891e-03 1.4107986848621819e-03 -1.8406053609070112e-03 - 29 6.4221263686005782e-04 3.1280619277518889e-03 3.7257842641040153e-03 -... From 51e273affdad9d624efa41bc624f847e10b01755 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 03:30:05 -0400 Subject: [PATCH 107/294] punt drude too --- src/KOKKOS/fix_tgnh_drude_kokkos.cpp | 2326 ------------------------- src/KOKKOS/fix_tgnh_drude_kokkos.h | 161 -- src/KOKKOS/fix_tgnpt_drude_kokkos.cpp | 53 - src/KOKKOS/fix_tgnpt_drude_kokkos.h | 37 - src/KOKKOS/fix_tgnvt_drude_kokkos.cpp | 44 - src/KOKKOS/fix_tgnvt_drude_kokkos.h | 37 - 6 files changed, 2658 deletions(-) delete mode 100644 src/KOKKOS/fix_tgnh_drude_kokkos.cpp delete mode 100644 src/KOKKOS/fix_tgnh_drude_kokkos.h delete mode 100644 src/KOKKOS/fix_tgnpt_drude_kokkos.cpp delete mode 100644 src/KOKKOS/fix_tgnpt_drude_kokkos.h delete mode 100644 src/KOKKOS/fix_tgnvt_drude_kokkos.cpp delete mode 100644 src/KOKKOS/fix_tgnvt_drude_kokkos.h diff --git a/src/KOKKOS/fix_tgnh_drude_kokkos.cpp b/src/KOKKOS/fix_tgnh_drude_kokkos.cpp deleted file mode 100644 index 3b35b13929..0000000000 --- a/src/KOKKOS/fix_tgnh_drude_kokkos.cpp +++ /dev/null @@ -1,2326 +0,0 @@ -// clang-format off -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail) -------------------------------------------------------------------------- */ - -#include "fix_tgnh_drude_kokkos.h" - -#include "atom.h" -#include "comm.h" -#include "compute.h" -#include "domain.h" -#include "error.h" -#include "fix_deform.h" -#include "fix_drude.h" -#include "force.h" -#include "irregular.h" -#include "kspace.h" -#include "memory.h" -#include "modify.h" -#include "neighbor.h" -#include "respa.h" -#include "update.h" - -#include -#include - -using namespace LAMMPS_NS; -using namespace FixConst; - -static constexpr double DELTAFLIP = 0.1; -static constexpr double TILTMAX = 1.5; - -enum{NOBIAS,BIAS}; -enum{NONE,XYZ,XY,YZ,XZ}; -enum{ISO,ANISO,TRICLINIC}; - -/* ---------------------------------------------------------------------- - NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion - ---------------------------------------------------------------------- */ - -FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), irregular(nullptr), id_temp(nullptr), id_press(nullptr), etamol(nullptr), - etamol_dot(nullptr), etamol_dotdot(nullptr), etamol_mass(nullptr), etaint(nullptr), - etaint_dot(nullptr), etaint_dotdot(nullptr), etaint_mass(nullptr), etadrude(nullptr), - etadrude_dot(nullptr), etadrude_dotdot(nullptr), etadrude_mass(nullptr), etap(nullptr), - etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr) -{ - if (narg < 4) error->all(FLERR, "Illegal fix {} command", style); - - restart_global = 1; - dynamic_group_allow = 0; - time_integrate = 1; - scalar_flag = 1; - vector_flag = 1; - global_freq = 1; - extscalar = 1; - extvector = 0; - ecouple_flag = 1; - - // default values - - pcouple = NONE; - mtchain = mpchain = 3; - nc_tchain = nc_pchain = 1; - mtk_flag = 1; - deviatoric_flag = 0; - nreset_h0 = 0; - flipflag = 1; - - tcomputeflag = 0; - pcomputeflag = 0; - id_temp = nullptr; - id_press = nullptr; - - // turn on tilt factor scaling, whenever applicable - - dimension = domain->dimension; - - scaleyz = scalexz = scalexy = 0; - if (domain->yperiodic && domain->xy != 0.0) scalexy = 1; - if (domain->zperiodic && dimension == 3) { - if (domain->yz != 0.0) scaleyz = 1; - if (domain->xz != 0.0) scalexz = 1; - } - - // set fixed-point to default = center of cell - - fixedpoint[0] = 0.5*(domain->boxlo[0]+domain->boxhi[0]); - fixedpoint[1] = 0.5*(domain->boxlo[1]+domain->boxhi[1]); - fixedpoint[2] = 0.5*(domain->boxlo[2]+domain->boxhi[2]); - - tstat_flag = 0; - double t_period = 0.0, tdrude_period = 0.0; - - double p_period[6]; - for (int i = 0; i < 6; i++) { - p_start[i] = p_stop[i] = p_period[i] = p_target[i] = 0.0; - p_flag[i] = 0; - } - - // process keywords - - int iarg = 3; - - while (iarg < narg) { - if (strcmp(arg[iarg],"temp") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - tstat_flag = 1; - t_start = utils::numeric(FLERR,arg[iarg+1],false,lmp); - t_target = t_start; - t_stop = utils::numeric(FLERR,arg[iarg+2],false,lmp); - t_period = utils::numeric(FLERR,arg[iarg+3],false,lmp); - if (t_start <= 0.0 || t_stop <= 0.0) - error->all(FLERR, - "Target temperature for fix nvt/npt/nph cannot be 0.0"); - tdrude_target = utils::numeric(FLERR,arg[iarg+4],false,lmp); - tdrude_period = utils::numeric(FLERR,arg[iarg+5],false,lmp); - iarg += 6; - - } else if (strcmp(arg[iarg],"iso") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = XYZ; - p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - } - iarg += 4; - } else if (strcmp(arg[iarg],"aniso") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = NONE; - p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - } - iarg += 4; - } else if (strcmp(arg[iarg],"tri") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = NONE; - scalexy = scalexz = scaleyz = 0; - p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - p_start[3] = p_start[4] = p_start[5] = 0.0; - p_stop[3] = p_stop[4] = p_stop[5] = 0.0; - p_period[3] = p_period[4] = p_period[5] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[3] = p_flag[4] = p_flag[5] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - p_start[3] = p_stop[3] = p_period[3] = 0.0; - p_flag[3] = 0; - p_start[4] = p_stop[4] = p_period[4] = 0.0; - p_flag[4] = 0; - } - iarg += 4; - } else if (strcmp(arg[iarg],"x") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[0] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[0] = 1; - deviatoric_flag = 1; - iarg += 4; - } else if (strcmp(arg[iarg],"y") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[1] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[1] = 1; - deviatoric_flag = 1; - iarg += 4; - } else if (strcmp(arg[iarg],"z") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[2] = 1; - deviatoric_flag = 1; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - - } else if (strcmp(arg[iarg],"yz") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[3] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[3] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[3] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[3] = 1; - deviatoric_flag = 1; - scaleyz = 0; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - } else if (strcmp(arg[iarg],"xz") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[4] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[4] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[4] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[4] = 1; - deviatoric_flag = 1; - scalexz = 0; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - } else if (strcmp(arg[iarg],"xy") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[5] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - p_stop[5] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - p_period[5] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - p_flag[5] = 1; - deviatoric_flag = 1; - scalexy = 0; - iarg += 4; - - } else if (strcmp(arg[iarg],"couple") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ; - else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY; - else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ; - else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ; - else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE; - else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"tchain") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - mtchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (mtchain < 1) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"pchain") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - mpchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (mpchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"mtk") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - mtk_flag = utils::logical(FLERR, arg[iarg + 1], false, lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"tloop") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nc_tchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (nc_tchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"ploop") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nc_pchain = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (nc_pchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"nreset") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nreset_h0 = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (nreset_h0 < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"scalexy") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - scalexy = utils::logical(FLERR, arg[iarg + 1], false, lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"scalexz") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - scalexz = utils::logical(FLERR, arg[iarg + 1], false, lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"scaleyz") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - scaleyz = utils::logical(FLERR, arg[iarg + 1], false, lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"flip") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - flipflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); - iarg += 2; - } else if (strcmp(arg[iarg],"fixedpoint") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - fixedpoint[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp); - fixedpoint[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp); - fixedpoint[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp); - iarg += 4; - } else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - } - - // error checks - - if (dimension == 2 && (p_flag[2] || p_flag[3] || p_flag[4])) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - if (dimension == 2 && (pcouple == YZ || pcouple == XZ)) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - if (dimension == 2 && (scalexz == 1 || scaleyz == 1 )) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - - if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - - // require periodicity in tensile dimension - - if (p_flag[0] && domain->xperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - if (p_flag[1] && domain->yperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - if (p_flag[2] && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - - // require periodicity in 2nd dim of off-diagonal tilt component - - if (p_flag[3] && domain->zperiodic == 0) - error->all(FLERR, - "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - if (p_flag[4] && domain->zperiodic == 0) - error->all(FLERR, - "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - if (p_flag[5] && domain->yperiodic == 0) - error->all(FLERR, - "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - - if (scaleyz == 1 && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph " - "with yz scaling when z is non-periodic dimension"); - if (scalexz == 1 && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph " - "with xz scaling when z is non-periodic dimension"); - if (scalexy == 1 && domain->yperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph " - "with xy scaling when y is non-periodic dimension"); - - if (p_flag[3] && scaleyz == 1) - error->all(FLERR,"Cannot use fix nvt/npt/nph with " - "both yz dynamics and yz scaling"); - if (p_flag[4] && scalexz == 1) - error->all(FLERR,"Cannot use fix nvt/npt/nph with " - "both xz dynamics and xz scaling"); - if (p_flag[5] && scalexy == 1) - error->all(FLERR,"Cannot use fix nvt/npt/nph with " - "both xy dynamics and xy scaling"); - - if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5])) - error->all(FLERR,"Can not specify Pxy/Pxz/Pyz in " - "fix nvt/npt/nph with non-triclinic box"); - - if (pcouple == XYZ && dimension == 3 && - (p_start[0] != p_start[1] || p_start[0] != p_start[2] || - p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] || - p_period[0] != p_period[1] || p_period[0] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XYZ && dimension == 2 && - (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || - p_period[0] != p_period[1])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XY && - (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || - p_period[0] != p_period[1])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == YZ && - (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || - p_period[1] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XZ && - (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || - p_period[0] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - - if ((tstat_flag && t_period <= 0.0) || - (p_flag[0] && p_period[0] <= 0.0) || - (p_flag[1] && p_period[1] <= 0.0) || - (p_flag[2] && p_period[2] <= 0.0) || - (p_flag[3] && p_period[3] <= 0.0) || - (p_flag[4] && p_period[4] <= 0.0) || - (p_flag[5] && p_period[5] <= 0.0)) - error->all(FLERR,"Fix nvt/npt/nph damping parameters must be > 0.0"); - - // set pstat_flag and box change and restart_pbc variables - - pre_exchange_flag = 0; - pstat_flag = 0; - pstyle = ISO; - - for (int i = 0; i < 6; i++) - if (p_flag[i]) pstat_flag = 1; - - if (pstat_flag) { - if (p_flag[0]) box_change |= BOX_CHANGE_X; - if (p_flag[1]) box_change |= BOX_CHANGE_Y; - if (p_flag[2]) box_change |= BOX_CHANGE_Z; - if (p_flag[3]) box_change |= BOX_CHANGE_YZ; - if (p_flag[4]) box_change |= BOX_CHANGE_XZ; - if (p_flag[5]) box_change |= BOX_CHANGE_XY; - no_change_box = 1; - - // pstyle = TRICLINIC if any off-diagonal term is controlled -> 6 dof - // else pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof - // else pstyle = ANISO -> 3 dof - - if (p_flag[3] || p_flag[4] || p_flag[5]) pstyle = TRICLINIC; - else if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO; - else pstyle = ANISO; - - // pre_exchange only required if flips can occur due to shape changes - - if (flipflag && (p_flag[3] || p_flag[4] || p_flag[5])) - pre_exchange_flag = pre_exchange_migrate = 1; - if (flipflag && (domain->yz != 0.0 || domain->xz != 0.0 || domain->xy != 0.0)) - pre_exchange_flag = pre_exchange_migrate = 1; - } - - // convert input periods to frequencies - - t_freq = tdrude_freq = 0.0; - p_freq[0] = p_freq[1] = p_freq[2] = p_freq[3] = p_freq[4] = p_freq[5] = 0.0; - - if (tstat_flag) { - t_freq = 1.0 / t_period; - tdrude_freq = 1.0 / tdrude_period; - } - if (p_flag[0]) p_freq[0] = 1.0 / p_period[0]; - if (p_flag[1]) p_freq[1] = 1.0 / p_period[1]; - if (p_flag[2]) p_freq[2] = 1.0 / p_period[2]; - if (p_flag[3]) p_freq[3] = 1.0 / p_period[3]; - if (p_flag[4]) p_freq[4] = 1.0 / p_period[4]; - if (p_flag[5]) p_freq[5] = 1.0 / p_period[5]; - - // Nose/Hoover temp and pressure init - - size_vector = 3; - - if (tstat_flag) { - int ich; - - etaint = new double[mtchain]; - // add one extra dummy thermostat for eta_dot, set to zero - etaint_dot = new double[mtchain+1]; - etaint_dot[mtchain] = 0.0; - etaint_dotdot = new double[mtchain]; - for (ich = 0; ich < mtchain; ich++) { - etaint[ich] = etaint_dot[ich] = etaint_dotdot[ich] = 0.0; - } - etaint_mass = new double[mtchain]; - - etamol = new double[mtchain]; - // add one extra dummy thermostat for eta_dot, set to zero - etamol_dot = new double[mtchain+1]; - etamol_dot[mtchain] = 0.0; - etamol_dotdot = new double[mtchain]; - for (ich = 0; ich < mtchain; ich++) { - etamol[ich] = etamol_dot[ich] = etamol_dotdot[ich] = 0.0; - } - etamol_mass = new double[mtchain]; - - etadrude = new double[mtchain]; - // add one extra dummy thermostat for eta_dot, set to zero - etadrude_dot = new double[mtchain+1]; - etadrude_dot[mtchain] = 0.0; - etadrude_dotdot = new double[mtchain]; - for (ich = 0; ich < mtchain; ich++) { - etadrude[ich] = etadrude_dot[ich] = etadrude_dotdot[ich] = 0.0; - } - etadrude_mass = new double[mtchain]; - } - - if (pstat_flag) { - omega[0] = omega[1] = omega[2] = 0.0; - omega_dot[0] = omega_dot[1] = omega_dot[2] = 0.0; - omega_mass[0] = omega_mass[1] = omega_mass[2] = 0.0; - omega[3] = omega[4] = omega[5] = 0.0; - omega_dot[3] = omega_dot[4] = omega_dot[5] = 0.0; - omega_mass[3] = omega_mass[4] = omega_mass[5] = 0.0; - - if (mpchain) { - int ich; - etap = new double[mpchain]; - - // add one extra dummy thermostat, set to zero - - etap_dot = new double[mpchain+1]; - etap_dot[mpchain] = 0.0; - etap_dotdot = new double[mpchain]; - for (ich = 0; ich < mpchain; ich++) { - etap[ich] = etap_dot[ich] = - etap_dotdot[ich] = 0.0; - } - etap_mass = new double[mpchain]; - } - } - - if (pre_exchange_flag) irregular = new Irregular(lmp); - else irregular = nullptr; - - // initialize vol0,t0 to zero to signal uninitialized - // values then assigned in init(), if necessary - - vol0 = t0 = 0.0; - - // find fix drude - - auto fdrude = modify->get_fix_by_style("^drude$"); - if (fdrude.size() < 1) error->all(FLERR, "Fix {} requires fix drude", style); - fix_drude = dynamic_cast(fdrude[0]); - if (!fix_drude) error->all(FLERR, "Fix {} requires fix drude", style); - - // make sure ghost atoms have velocity - if (!comm->ghost_velocity) - error->all(FLERR,"Fix {} requires ghost velocities. Use comm_modify vel yes", style); -} - -/* ---------------------------------------------------------------------- */ - -FixTGNHDrude::~FixTGNHDrude() -{ - if (copymode) return; - - delete irregular; - - // delete temperature and pressure if fix created them - - if (tcomputeflag) modify->delete_compute(id_temp); - delete[] id_temp; - - if (tstat_flag) { - delete[] etaint; - delete[] etaint_dot; - delete[] etaint_dotdot; - delete[] etaint_mass; - delete[] etamol; - delete[] etamol_dot; - delete[] etamol_dotdot; - delete[] etamol_mass; - delete[] etadrude; - delete[] etadrude_dot; - delete[] etadrude_dotdot; - delete[] etadrude_mass; - } - - if (pstat_flag) { - if (pcomputeflag) modify->delete_compute(id_press); - delete[] id_press; - if (mpchain) { - delete[] etap; - delete[] etap_dot; - delete[] etap_dotdot; - delete[] etap_mass; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixTGNHDrude::setmask() -{ - int mask = 0; - mask |= INITIAL_INTEGRATE; - mask |= FINAL_INTEGRATE; - mask |= INITIAL_INTEGRATE_RESPA; - mask |= PRE_FORCE_RESPA; - mask |= FINAL_INTEGRATE_RESPA; - if (pre_exchange_flag) mask |= PRE_EXCHANGE; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::init() -{ - // ensure no conflict with fix deform - - if (pstat_flag) - for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"deform") == 0) { - int *dimflag = (dynamic_cast(modify->fix[i]))->dimflag; - if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || - (p_flag[2] && dimflag[2]) || (p_flag[3] && dimflag[3]) || - (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5])) - error->all(FLERR,"Cannot use fix npt and fix deform on " - "same component of stress tensor"); - } - - // set temperature and pressure ptrs - - temperature = modify->get_compute_by_id(id_temp); - if (!temperature) error->all(FLERR,"Temperature ID for fix {} does not exist", style); - - if (temperature->tempbias) which = BIAS; - else which = NOBIAS; - - if (pstat_flag) { - pressure = modify->get_compute_by_id(id_press); - if (!pressure) error->all(FLERR,"Pressure ID for fix {} does not exist", id_press); - } - - // set timesteps and frequencies - - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - dthalf = 0.5 * update->dt; - dt4 = 0.25 * update->dt; - dt8 = 0.125 * update->dt; - dto = dthalf; - - p_freq_max = 0.0; - if (pstat_flag) { - p_freq_max = MAX(p_freq[0],p_freq[1]); - p_freq_max = MAX(p_freq_max,p_freq[2]); - if (pstyle == TRICLINIC) { - p_freq_max = MAX(p_freq_max,p_freq[3]); - p_freq_max = MAX(p_freq_max,p_freq[4]); - p_freq_max = MAX(p_freq_max,p_freq[5]); - } - } - - // tally the number of dimensions that are barostatted - // set initial volume and reference cell, if not already done - - if (pstat_flag) { - pdim = p_flag[0] + p_flag[1] + p_flag[2]; - if (vol0 == 0.0) { - if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; - else vol0 = domain->xprd * domain->yprd; - h0_inv[0] = domain->h_inv[0]; - h0_inv[1] = domain->h_inv[1]; - h0_inv[2] = domain->h_inv[2]; - h0_inv[3] = domain->h_inv[3]; - h0_inv[4] = domain->h_inv[4]; - h0_inv[5] = domain->h_inv[5]; - } - } - - boltz = force->boltz; - nktv2p = force->nktv2p; - - if (force->kspace) kspace_flag = 1; - else kspace_flag = 0; - - if (utils::strmatch(update->integrate_style,"^respa")) { - nlevels_respa = (dynamic_cast(update->integrate))->nlevels; - step_respa = (dynamic_cast(update->integrate))->step; - dto = 0.5*step_respa[0]; - } - - // detect if any rigid fixes exist so rigid bodies move when box is remapped - - rfix.clear(); - for (auto &ifix : modify->get_fix_list()) - if (ifix->rigid_flag) rfix.push_back(ifix); -} - -/* ---------------------------------------------------------------------- - compute T,P before integrator starts -------------------------------------------------------------------------- */ - -void FixTGNHDrude::setup_mol_mass_dof() { - double *mass = atom->mass; - int *mask = atom->mask; - tagint *molecule = atom->molecule; - int *type = atom->type; - int *drudetype = fix_drude->drudetype; - int n_drude, n_drude_tmp = 0; - tagint id_mol = 0, n_mol_in_group = 0; - - for (int i = 0; i < atom->nlocal; i++) { - // molecule id starts from 1. max(id_mol) equals to the number of molecules in the system - id_mol = std::max(id_mol, molecule[i]); - if (mask[i] & groupbit) { - if (drudetype[type[i]] == DRUDE_TYPE) - n_drude_tmp++; - } - } - MPI_Allreduce(&n_drude_tmp, &n_drude, 1, MPI_LMP_TAGINT, MPI_SUM, world); - MPI_Allreduce(&id_mol, &n_mol, 1, MPI_LMP_TAGINT, MPI_MAX, world); - - // use flag_mol to determine the number of molecules in the fix group - int *flag_mol = new int[n_mol + 1]; - int *flag_mol_tmp = new int[n_mol + 1]; - memset(flag_mol_tmp, 0, sizeof(int) * (n_mol + 1)); - for (int i = 0; i < atom->nlocal; i++) { - if (mask[i] & groupbit) { - flag_mol_tmp[molecule[i]] = 1; - } - } - MPI_Allreduce(flag_mol_tmp, flag_mol, n_mol + 1, MPI_INT, MPI_SUM, world); - for (int i = 1; i < n_mol + 1; i++) { - if (flag_mol[i]) - n_mol_in_group++; - } - delete[] flag_mol; - delete[] flag_mol_tmp; - - // length of v_mol set to n_mol+1, so that the subscript start from 1, we can call v_mol[n_mol] - memory->create(v_mol, n_mol + 1, 3, "fix_tgnh_drude::v_mol"); - memory->create(v_mol_tmp, n_mol + 1, 3, "fix_tgnh_drude::v_mol_tmp"); - memory->create(mass_mol, n_mol + 1, "fix_tgnh_drude::mass_mol"); - - auto mass_tmp = new double[n_mol + 1]; - memset(mass_tmp, 0, sizeof(double) * (n_mol + 1)); - for (int i = 0; i < atom->nlocal; i++) { - id_mol = molecule[i]; - mass_tmp[id_mol] += mass[type[i]]; - } - MPI_Allreduce(mass_tmp, mass_mol, n_mol + 1, MPI_DOUBLE, MPI_SUM, world); - delete[] mass_tmp; - - // DOFs - t_current = temperature->compute_scalar(); - tdof = temperature->dof; - // remove DOFs of COM translational motion based on the number of molecules in the group - dof_mol = 3.0 * n_mol_in_group - 3.0 * n_mol_in_group / n_mol; - dof_drude = 3.0 * n_drude; - dof_int = tdof - dof_mol - dof_drude; - - if (comm->me == 0) { - if (screen) { - fprintf(screen, "TGNHC thermostat for Drude model\n"); - fprintf(screen, " DOFs of molecules, atoms and dipoles: %.1f %.1f %.1f\n", - dof_mol, dof_int, dof_drude); - } - if (logfile) { - fprintf(logfile, "TGNHC thermostat for Drude model\n"); - fprintf(logfile, " DOFs of molecules, atoms and dipoles: %.1f %.1f %.1f\n", - dof_mol, dof_int, dof_drude); - } - } - if (dof_mol <=0 || dof_int <=0 || dof_drude <=0) - error->all(FLERR, "TGNHC thermostat requires DOFs of molecules, atoms and dipoles larger than 0"); -} - -void FixTGNHDrude::setup(int /*vflag*/) -{ - setup_mol_mass_dof(); - // t_target is needed by NVT and NPT in compute_scalar() - // If no thermostat or using fix nphug, - // t_target must be defined by other means. - - if (tstat_flag && strstr(style,"nphug") == nullptr) { - compute_temp_target(); - } else if (pstat_flag) { - - // t0 = reference temperature for masses - // cannot be done in init() b/c temperature cannot be called there - // is b/c Modify::init() inits computes after fixes due to dof dependence - // guesstimate a unit-dependent t0 if actual T = 0.0 - // if it was read in from a restart file, leave it be - - if (t0 == 0.0) { - t0 = temperature->compute_scalar(); - if (t0 == 0.0) { - if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0; - else t0 = 300.0; - } - } - t_target = t0; - } - - if (pstat_flag) compute_press_target(); - - if (pstat_flag) { - if (pstyle == ISO) pressure->compute_scalar(); - else pressure->compute_vector(); - couple(); - pressure->addstep(update->ntimestep+1); - } - - // masses and initial forces on thermostat variables - - if (tstat_flag) { - etaint_mass[0] = ke2int_target / (t_freq * t_freq); - etamol_mass[0] = ke2mol_target / (t_freq * t_freq); - etadrude_mass[0] = ke2drude_target / (tdrude_freq * tdrude_freq); - for (int ich = 1; ich < mtchain; ich++) { - etaint_mass[ich] = boltz * t_target / (t_freq * t_freq); - etamol_mass[ich] = boltz * t_target / (t_freq * t_freq); - etadrude_mass[ich] = boltz * tdrude_target / (tdrude_freq * tdrude_freq); - - etaint_dotdot[ich] = (etaint_mass[ich - 1] * etaint_dot[ich - 1] * etaint_dot[ich - 1] - - boltz * t_target) / etaint_mass[ich]; - etamol_dotdot[ich] = (etamol_mass[ich - 1] * etamol_dot[ich - 1] * etamol_dot[ich - 1] - - boltz * t_target) / etamol_mass[ich]; - etadrude_dotdot[ich] = (etadrude_mass[ich - 1] * etadrude_dot[ich - 1] * etadrude_dot[ich - 1] - - boltz * tdrude_target) / etadrude_mass[ich]; - } - } - - // masses and initial forces on barostat variables - - if (pstat_flag) { - double kt = boltz * t_target; - double nkt = (atom->natoms + 1) * kt; - - for (int i = 0; i < 3; i++) - if (p_flag[i]) - omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); - - if (pstyle == TRICLINIC) { - for (int i = 3; i < 6; i++) - if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); - } - - // masses and initial forces on barostat thermostat variables - - if (mpchain) { - etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_dotdot[ich] = - (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - - boltz * t_target) / etap_mass[ich]; - } - } -} - -/* ---------------------------------------------------------------------- - 1st half of Verlet update -------------------------------------------------------------------------- */ - -void FixTGNHDrude::initial_integrate(int /*vflag*/) -{ - // update eta_press_dot - - if (pstat_flag && mpchain) nhc_press_integrate(); - - // update eta_dot - - if (tstat_flag) { - compute_temp_target(); - nhc_temp_integrate(); - } - - // need to recompute pressure to account for change in KE - // t_current is up-to-date, but compute_temperature is not - // compute appropriately coupled elements of mvv_current - - if (pstat_flag) { - if (pstyle == ISO) { - temperature->compute_scalar(); - pressure->compute_scalar(); - } else { - temperature->compute_vector(); - pressure->compute_vector(); - } - couple(); - pressure->addstep(update->ntimestep+1); - } - - if (pstat_flag) { - compute_press_target(); - nh_omega_dot(); - nh_v_press(); - } - - nve_v(); - - // remap simulation box by 1/2 step - - if (pstat_flag) remap(); - - nve_x(); - - // remap simulation box by 1/2 step - // redo KSpace coeffs since volume has changed - - if (pstat_flag) { - remap(); - if (kspace_flag) force->kspace->setup(); - } -} - -/* ---------------------------------------------------------------------- - 2nd half of Verlet update -------------------------------------------------------------------------- */ - -void FixTGNHDrude::final_integrate() -{ - nve_v(); - - // re-compute temp before nh_v_press() - // only needed for temperature computes with BIAS on reneighboring steps: - // b/c some biases store per-atom values (e.g. temp/profile) - // per-atom values are invalid if reneigh/comm occurred - // since temp->compute() in initial_integrate() - - if (which == BIAS && neighbor->ago == 0) - t_current = temperature->compute_scalar(); - - if (pstat_flag) nh_v_press(); - - // compute new T,P after velocities rescaled by nh_v_press() - // compute appropriately coupled elements of mvv_current - - t_current = temperature->compute_scalar(); - tdof = temperature->dof; - - // need to recompute pressure to account for change in KE - // t_current is up-to-date, but compute_temperature is not - // compute appropriately coupled elements of mvv_current - - if (pstat_flag) { - if (pstyle == ISO) pressure->compute_scalar(); - else { - temperature->compute_vector(); - pressure->compute_vector(); - } - couple(); - pressure->addstep(update->ntimestep+1); - } - - if (pstat_flag) nh_omega_dot(); - - // update eta_dot - // update eta_press_dot - - if (tstat_flag) nhc_temp_integrate(); - if (pstat_flag && mpchain) nhc_press_integrate(); -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::initial_integrate_respa(int /*vflag*/, int ilevel, int /*iloop*/) -{ - // set timesteps by level - - dtv = step_respa[ilevel]; - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - dthalf = 0.5 * step_respa[ilevel]; - - // outermost level - update eta_dot and omega_dot, apply to v - // all other levels - NVE update of v - // x,v updates only performed for atoms in group - - if (ilevel == nlevels_respa-1) { - - // update eta_press_dot - - if (pstat_flag && mpchain) nhc_press_integrate(); - - // update eta_dot - - if (tstat_flag) { - compute_temp_target(); - nhc_temp_integrate(); - } - - // recompute pressure to account for change in KE - // t_current is up-to-date, but compute_temperature is not - // compute appropriately coupled elements of mvv_current - - if (pstat_flag) { - if (pstyle == ISO) { - temperature->compute_scalar(); - pressure->compute_scalar(); - } else { - temperature->compute_vector(); - pressure->compute_vector(); - } - couple(); - pressure->addstep(update->ntimestep+1); - } - - if (pstat_flag) { - compute_press_target(); - nh_omega_dot(); - nh_v_press(); - } - - nve_v(); - - } else nve_v(); - - // innermost level - also update x only for atoms in group - // if barostat, perform 1/2 step remap before and after - - if (ilevel == 0) { - if (pstat_flag) remap(); - nve_x(); - if (pstat_flag) remap(); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::pre_force_respa(int /*vflag*/, int ilevel, int /*iloop*/) -{ - // if barostat, redo KSpace coeffs at outermost level, - // since volume has changed - - if (ilevel == nlevels_respa-1 && kspace_flag && pstat_flag) - force->kspace->setup(); -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::final_integrate_respa(int ilevel, int /*iloop*/) -{ - // set timesteps by level - - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - dthalf = 0.5 * step_respa[ilevel]; - - // outermost level - update eta_dot and omega_dot, apply via final_integrate - // all other levels - NVE update of v - - if (ilevel == nlevels_respa-1) final_integrate(); - else nve_v(); -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::couple() -{ - double *tensor = pressure->vector; - - if (pstyle == ISO) - p_current[0] = p_current[1] = p_current[2] = pressure->scalar; - else if (pcouple == XYZ) { - double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); - p_current[0] = p_current[1] = p_current[2] = ave; - } else if (pcouple == XY) { - double ave = 0.5 * (tensor[0] + tensor[1]); - p_current[0] = p_current[1] = ave; - p_current[2] = tensor[2]; - } else if (pcouple == YZ) { - double ave = 0.5 * (tensor[1] + tensor[2]); - p_current[1] = p_current[2] = ave; - p_current[0] = tensor[0]; - } else if (pcouple == XZ) { - double ave = 0.5 * (tensor[0] + tensor[2]); - p_current[0] = p_current[2] = ave; - p_current[1] = tensor[1]; - } else { - p_current[0] = tensor[0]; - p_current[1] = tensor[1]; - p_current[2] = tensor[2]; - } - - if (!std::isfinite(p_current[0]) || !std::isfinite(p_current[1]) || !std::isfinite(p_current[2])) - error->all(FLERR,"Non-numeric pressure - simulation unstable"); - - // switch order from xy-xz-yz to Voigt - - if (pstyle == TRICLINIC) { - p_current[3] = tensor[5]; - p_current[4] = tensor[4]; - p_current[5] = tensor[3]; - - if (!std::isfinite(p_current[3]) || !std::isfinite(p_current[4]) || !std::isfinite(p_current[5])) - error->all(FLERR,"Non-numeric pressure - simulation unstable"); - } -} - -/* ---------------------------------------------------------------------- - change box size - remap all atoms or dilate group atoms depending on allremap flag - if rigid bodies exist, scale rigid body centers-of-mass -------------------------------------------------------------------------- */ - -void FixTGNHDrude::remap() -{ - double oldlo,oldhi; - double expfac; - - int nlocal = atom->nlocal; - double *h = domain->h; - - // omega is not used, except for book-keeping - - for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i]; - - // convert pertinent atoms and rigid bodies to lamda coords - - domain->x2lamda(nlocal); - - for (auto &ifix : rfix) ifix->deform(0); - - // reset global and local box to new size/shape - - // this operation corresponds to applying the - // translate and scale operations - // corresponding to the solution of the following ODE: - // - // h_dot = omega_dot * h - // - // where h_dot, omega_dot and h are all upper-triangular - // 3x3 tensors. In Voigt notation, the elements of the - // RHS product tensor are: - // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1] - // - // Ordering of operations preserves time symmetry. - - double dto2 = dto/2.0; - double dto4 = dto/4.0; - double dto8 = dto/8.0; - - // off-diagonal components, first half - - if (pstyle == TRICLINIC) { - - if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); - h[4] *= expfac; - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= expfac; - } - - if (p_flag[3]) { - expfac = exp(dto4*omega_dot[1]); - h[3] *= expfac; - h[3] += dto2*(omega_dot[3]*h[2]); - h[3] *= expfac; - } - - if (p_flag[5]) { - expfac = exp(dto4*omega_dot[0]); - h[5] *= expfac; - h[5] += dto2*(omega_dot[5]*h[1]); - h[5] *= expfac; - } - - if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); - h[4] *= expfac; - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= expfac; - } - } - - // scale diagonal components - // scale tilt factors with cell, if set - - if (p_flag[0]) { - oldlo = domain->boxlo[0]; - oldhi = domain->boxhi[0]; - expfac = exp(dto*omega_dot[0]); - domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0]; - domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0]; - } - - if (p_flag[1]) { - oldlo = domain->boxlo[1]; - oldhi = domain->boxhi[1]; - expfac = exp(dto*omega_dot[1]); - domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1]; - domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1]; - if (scalexy) h[5] *= expfac; - } - - if (p_flag[2]) { - oldlo = domain->boxlo[2]; - oldhi = domain->boxhi[2]; - expfac = exp(dto*omega_dot[2]); - domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2]; - domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2]; - if (scalexz) h[4] *= expfac; - if (scaleyz) h[3] *= expfac; - } - - // off-diagonal components, second half - - if (pstyle == TRICLINIC) { - - if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); - h[4] *= expfac; - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= expfac; - } - - if (p_flag[3]) { - expfac = exp(dto4*omega_dot[1]); - h[3] *= expfac; - h[3] += dto2*(omega_dot[3]*h[2]); - h[3] *= expfac; - } - - if (p_flag[5]) { - expfac = exp(dto4*omega_dot[0]); - h[5] *= expfac; - h[5] += dto2*(omega_dot[5]*h[1]); - h[5] *= expfac; - } - - if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); - h[4] *= expfac; - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= expfac; - } - - } - - domain->yz = h[3]; - domain->xz = h[4]; - domain->xy = h[5]; - - // tilt factor to cell length ratio can not exceed TILTMAX in one step - - if (domain->yz < -TILTMAX*domain->yprd || - domain->yz > TILTMAX*domain->yprd || - domain->xz < -TILTMAX*domain->xprd || - domain->xz > TILTMAX*domain->xprd || - domain->xy < -TILTMAX*domain->xprd || - domain->xy > TILTMAX*domain->xprd) - error->all(FLERR,"Fix npt/nph has tilted box too far in one step - " - "periodic cell is too far from equilibrium state"); - - domain->set_global_box(); - domain->set_local_box(); - - // convert pertinent atoms and rigid bodies back to box coords - - domain->lamda2x(nlocal); - - for (auto &ifix : rfix) ifix->deform(1); -} - -/* ---------------------------------------------------------------------- - pack entire state of Fix into one write -------------------------------------------------------------------------- */ - -void FixTGNHDrude::write_restart(FILE *fp) -{ - int nsize = size_restart_global(); - - double *list; - memory->create(list,nsize,"nh:list"); - - pack_restart_data(list); - - if (comm->me == 0) { - int size = nsize * sizeof(double); - fwrite(&size,sizeof(int),1,fp); - fwrite(list,sizeof(double),nsize,fp); - } - - memory->destroy(list); -} - -/* ---------------------------------------------------------------------- - calculate the number of data to be packed -------------------------------------------------------------------------- */ - -int FixTGNHDrude::size_restart_global() -{ - int nsize = 2; - if (tstat_flag) nsize += 1 + 6*mtchain; - if (pstat_flag) { - nsize += 16 + 2*mpchain; - if (deviatoric_flag) nsize += 6; - } - - return nsize; -} - -/* ---------------------------------------------------------------------- - pack restart data -------------------------------------------------------------------------- */ - -int FixTGNHDrude::pack_restart_data(double *list) -{ - int n = 0; - - list[n++] = tstat_flag; - if (tstat_flag) { - list[n++] = mtchain; - for (int ich = 0; ich < mtchain; ich++) { - list[n++] = etamol[ich]; - list[n++] = etaint[ich]; - list[n++] = etadrude[ich]; - } - for (int ich = 0; ich < mtchain; ich++) { - list[n++] = etamol_dot[ich]; - list[n++] = etaint_dot[ich]; - list[n++] = etadrude_dot[ich]; - } - } - - list[n++] = pstat_flag; - if (pstat_flag) { - list[n++] = omega[0]; - list[n++] = omega[1]; - list[n++] = omega[2]; - list[n++] = omega[3]; - list[n++] = omega[4]; - list[n++] = omega[5]; - list[n++] = omega_dot[0]; - list[n++] = omega_dot[1]; - list[n++] = omega_dot[2]; - list[n++] = omega_dot[3]; - list[n++] = omega_dot[4]; - list[n++] = omega_dot[5]; - list[n++] = vol0; - list[n++] = t0; - list[n++] = mpchain; - if (mpchain) { - for (int ich = 0; ich < mpchain; ich++) - list[n++] = etap[ich]; - for (int ich = 0; ich < mpchain; ich++) - list[n++] = etap_dot[ich]; - } - - list[n++] = deviatoric_flag; - if (deviatoric_flag) { - list[n++] = h0_inv[0]; - list[n++] = h0_inv[1]; - list[n++] = h0_inv[2]; - list[n++] = h0_inv[3]; - list[n++] = h0_inv[4]; - list[n++] = h0_inv[5]; - } - } - - return n; -} - -/* ---------------------------------------------------------------------- - use state info from restart file to restart the Fix -------------------------------------------------------------------------- */ - -void FixTGNHDrude::restart(char *buf) -{ - int n = 0; - auto list = (double *) buf; - int flag = static_cast (list[n++]); - if (flag) { - int m = static_cast (list[n++]); - if (tstat_flag && m == mtchain) { - for (int ich = 0; ich < mtchain; ich++) { - etamol[ich] = list[n++]; - etaint[ich] = list[n++]; - etadrude[ich] = list[n++]; - } - for (int ich = 0; ich < mtchain; ich++) { - etamol_dot[ich] = list[n++]; - etaint_dot[ich] = list[n++]; - etadrude_dot[ich] = list[n++]; - } - } else n += 2*m; - } - flag = static_cast (list[n++]); - if (flag) { - omega[0] = list[n++]; - omega[1] = list[n++]; - omega[2] = list[n++]; - omega[3] = list[n++]; - omega[4] = list[n++]; - omega[5] = list[n++]; - omega_dot[0] = list[n++]; - omega_dot[1] = list[n++]; - omega_dot[2] = list[n++]; - omega_dot[3] = list[n++]; - omega_dot[4] = list[n++]; - omega_dot[5] = list[n++]; - vol0 = list[n++]; - t0 = list[n++]; - int m = static_cast (list[n++]); - if (pstat_flag && m == mpchain) { - for (int ich = 0; ich < mpchain; ich++) - etap[ich] = list[n++]; - for (int ich = 0; ich < mpchain; ich++) - etap_dot[ich] = list[n++]; - } else n+=2*m; - flag = static_cast (list[n++]); - if (flag) { - h0_inv[0] = list[n++]; - h0_inv[1] = list[n++]; - h0_inv[2] = list[n++]; - h0_inv[3] = list[n++]; - h0_inv[4] = list[n++]; - h0_inv[5] = list[n++]; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixTGNHDrude::modify_param(int narg, char **arg) -{ - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (tcomputeflag) { - modify->delete_compute(id_temp); - tcomputeflag = 0; - } - delete[] id_temp; - id_temp = utils::strdup(arg[1]); - - temperature = modify->get_compute_by_id(id_temp); - if (!temperature) error->all(FLERR,"Could not find fix_modify temperature ID {}", id_temp); - - if (temperature->tempflag == 0) - error->all(FLERR, "Fix_modify temperature ID {} does not compute temperature", id_temp); - if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Temperature for fix modify is not for group all"); - - // reset id_temp of pressure to new temperature ID - - if (pstat_flag) { - pressure = modify->get_compute_by_id(id_press); - if (!pressure) error->all(FLERR,"Pressure ID {} for fix modify does not exist", id_press); - pressure->reset_extra_compute_fix(id_temp); - } - - return 2; - - } else if (strcmp(arg[0],"press") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command"); - if (pcomputeflag) { - modify->delete_compute(id_press); - pcomputeflag = 0; - } - delete[] id_press; - id_press = utils::strdup(arg[1]); - - pressure = modify->get_compute_by_id(id_press); - if (!pressure) error->all(FLERR,"Could not find fix_modify pressure ID {}", id_press); - - if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID {} does not compute pressure", id_press); - return 2; - } - - return 0; -} - -/* ---------------------------------------------------------------------- */ - -double FixTGNHDrude::compute_scalar() -{ - int i; - double volume; - double energy; - double kt = boltz * t_target; - double kt_drude = boltz * tdrude_target; - double lkt_press = 0.0; - int ich; - if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd; - else volume = domain->xprd * domain->yprd; - - energy = 0.0; - - // thermostat chain energy is equivalent to Eq. (2) in - // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 - // Sum(0.5*p_eta_k^2/Q_k,k=1,M) + L*k*T*eta_1 + Sum(k*T*eta_k,k=2,M), - // where L = tdof - // M = mtchain - // p_eta_k = Q_k*eta_dot[k-1] - // Q_1 = L*k*T/t_freq^2 - // Q_k = k*T/t_freq^2, k > 1 - - if (tstat_flag) { - energy += ke2mol_target * etamol[0] + 0.5 * etamol_mass[0] * etamol_dot[0] * etamol_dot[0]; - energy += ke2int_target * etaint[0] + 0.5 * etaint_mass[0] * etaint_dot[0] * etaint_dot[0]; - energy += ke2drude_target * etadrude[0] + 0.5 * etadrude_mass[0] * etadrude_dot[0] * etadrude_dot[0]; - for (ich = 1; ich < mtchain; ich++) { - energy += kt * etamol[ich] + 0.5*etamol_mass[ich]*etamol_dot[ich]*etamol_dot[ich]; - energy += kt * etaint[ich] + 0.5*etaint_mass[ich]*etaint_dot[ich]*etaint_dot[ich]; - energy += kt_drude * etadrude[ich] + 0.5*etadrude_mass[ich]*etadrude_dot[ich]*etadrude_dot[ich]; - } - } - - // barostat energy is equivalent to Eq. (8) in - // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 - // Sum(0.5*p_omega^2/W + P*V), - // where N = natoms - // p_omega = W*omega_dot - // W = N*k*T/p_freq^2 - // sum is over barostatted dimensions - - if (pstat_flag) { - for (i = 0; i < 3; i++) { - if (p_flag[i]) { - energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i] + - p_hydro*(volume-vol0) / (pdim*nktv2p); - lkt_press += kt; - } - } - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) { - if (p_flag[i]) { - energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i]; - lkt_press += kt; - } - } - } - - // extra contributions from thermostat chain for barostat - - if (mpchain) { - energy += lkt_press * etap[0] + 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0]; - for (ich = 1; ich < mpchain; ich++) - energy += kt * etap[ich] + - 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich]; - } - - // extra contribution from strain energy - - if (deviatoric_flag) energy += compute_strain_energy(); - } - - return energy; -} - -/* ---------------------------------------------------------------------- */ - -double FixTGNHDrude::compute_vector(int n) -{ - if (!temp_computed_end_of_step) - compute_temp_mol_int_drude(true); - switch (n) { - case 0: - return t_mol; - case 1: - return t_int; - case 2: - return t_drude; - default: - return 0.0; - } -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::reset_target(double t_new) -{ - t_target = t_start = t_stop = t_new; -} - -/* ---------------------------------------------------------------------- */ - -void FixTGNHDrude::reset_dt() -{ - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - dthalf = 0.5 * update->dt; - dt4 = 0.25 * update->dt; - dt8 = 0.125 * update->dt; - dto = dthalf; - - // If using respa, then remap is performed in innermost level - - if (utils::strmatch(update->integrate_style,"^respa")) - dto = 0.5*step_respa[0]; -} - -void FixTGNHDrude::compute_temp_mol_int_drude(bool end_of_step) { - double **v = atom->v; - double *mass = atom->mass; - tagint *molecule = atom->molecule; - int *type = atom->type; - int *mask = atom->mask; - int *drudetype = fix_drude->drudetype; - tagint *drudeid = fix_drude->drudeid; - int imol, ci, di; - double mass_com, mass_reduced, mass_core, mass_drude; - double vint, vcom, vrel; - // use array instead of two numbers to save MPI_Allreduce() - double ke2_int_drude_tmp[2] = {0.0, 0.0}; - double ke2_int_drude[2]; - - memset(*v_mol_tmp, 0, sizeof(double) * (n_mol + 1) * 3); // the length of v_mol is n_mol+1 - - /** - * If there are velocity bias, need to remove them before calculate kinetic energies - */ - for (int i = 0; i < atom->nlocal; i++) { - if (mask[i] & groupbit) { - if (which == BIAS) - temperature->remove_bias(i, v[i]); - - imol = molecule[i]; - for (int k = 0; k < 3; k++) - v_mol_tmp[imol][k] += v[i][k] * mass[type[i]]; - - if (which == BIAS) - temperature->restore_bias(i, v[i]); - } - } - MPI_Allreduce(*v_mol_tmp, *v_mol, (n_mol + 1) * 3, MPI_DOUBLE, MPI_SUM, world); - - ke2mol = 0; - for (int i = 1; i < n_mol + 1; i++) { - for (int k = 0; k < 3; k++) { - v_mol[i][k] /= mass_mol[i]; - ke2mol += mass_mol[i] * (v_mol[i][k] * v_mol[i][k]); - } - } - ke2mol *= force->mvv2e; - t_mol = ke2mol / dof_mol / boltz; - - /** - * Have to call remove_bias at the innermost loop, because drude atom may be a ghost - */ - for (int i = 0; i < atom->nlocal; i++) { - if (mask[i] & groupbit) { - imol = molecule[i]; - if (drudetype[type[i]] == NOPOL_TYPE) { - if (which == BIAS) - temperature->remove_bias(i, v[i]); - for (int k = 0; k < 3; k++) { - vint = v[i][k] - v_mol[imol][k]; - ke2_int_drude_tmp[0] += mass[type[i]] * vint * vint; - } - if (which == BIAS) - temperature->restore_bias(i, v[i]); - } else if (drudetype[type[i]] == CORE_TYPE) { - /** - * have to use closet_image() - * even though all images have the same velocity and it's sort of read-only - * but the bias velocity may depends on it's position like in compute vis/pp - */ - ci = i; - di = domain->closest_image(i, atom->map(drudeid[i])); - if (which == BIAS) { - temperature->remove_bias(ci, v[ci]); - temperature->remove_bias(di, v[di]); - } - mass_core = mass[type[ci]]; - mass_drude = mass[type[di]]; - mass_com = mass_core + mass_drude; - mass_reduced = mass_core * mass_drude / mass_com; - for (int k = 0; k < 3; k++) { - vcom = (mass_core * v[ci][k] + mass_drude * v[di][k]) / mass_com; - vint = vcom - v_mol[imol][k]; - ke2_int_drude_tmp[0] += mass_com * vint * vint; - vrel = v[di][k] - v[ci][k]; - ke2_int_drude_tmp[1] += mass_reduced * vrel * vrel; - } - if (which == BIAS) { - temperature->restore_bias(ci, v[ci]); - temperature->restore_bias(di, v[di]); - } - } - } - } - MPI_Allreduce(ke2_int_drude_tmp, ke2_int_drude, 2, MPI_DOUBLE, MPI_SUM, world); - ke2int = ke2_int_drude[0] * force->mvv2e; - ke2drude = ke2_int_drude[1] * force->mvv2e; - t_int = ke2int / dof_int / boltz; - t_drude = ke2drude / dof_drude / boltz; - - temp_computed_end_of_step = end_of_step; -} - -/* ---------------------------------------------------------------------- - perform half-step update of chain thermostat variables -------------------------------------------------------------------------- */ - -void FixTGNHDrude::nhc_temp_integrate() -{ - compute_temp_mol_int_drude(false); - - // update masses of thermostat in case target temperature changes - etamol_mass[0] = ke2mol_target / (t_freq*t_freq); - etaint_mass[0] = ke2int_target / (t_freq*t_freq); - for (int ich = 1; ich < mtchain; ich++) { - etamol_mass[ich] = boltz * t_target / (t_freq*t_freq); - etaint_mass[ich] = boltz * t_target / (t_freq*t_freq); - } - - // thermostat for molecular COM - factor_eta_mol = propagate(etamol, etamol_dot, etamol_dotdot, etamol_mass, - ke2mol, ke2mol_target, t_target); - factor_eta_int = propagate(etaint, etaint_dot, etaint_dotdot, etaint_mass, - ke2int, ke2int_target, t_target); - factor_eta_drude = propagate(etadrude, etadrude_dot, etadrude_dotdot, etadrude_mass, - ke2drude, ke2drude_target, tdrude_target); - - nh_v_temp(); -} - -double FixTGNHDrude::propagate(double *eta, double *eta_dot, double *eta_dotdot, const double *eta_mass, - const double &ke2, const double &ke2_target, const double &tt) const { - int ich; - double expfac; - double ncfac = 1.0 / nc_tchain; - double factor_eta = 1.0; - - eta_dotdot[0] = (ke2 - ke2_target) / eta_mass[0]; - for (int iloop = 0; iloop < nc_tchain; iloop++) { - for (ich = mtchain - 1; ich > 0; ich--) { - expfac = exp(-ncfac * dt8 * eta_dot[ich + 1]); - eta_dot[ich] *= expfac; - eta_dot[ich] += eta_dotdot[ich] * ncfac * dt4; - eta_dot[ich] *= expfac; - } - expfac = exp(-ncfac * dt8 * eta_dot[1]); - eta_dot[0] *= expfac; - eta_dot[0] += eta_dotdot[0] * ncfac * dt4; - eta_dot[0] *= expfac; - factor_eta *= exp(-ncfac * dthalf * eta_dot[0]); - - for (ich = 0; ich < mtchain; ich++) - eta[ich] += ncfac * dthalf * eta_dot[ich]; - - eta_dotdot[0] = (ke2 * factor_eta * factor_eta - ke2_target) / eta_mass[0]; - eta_dot[0] *= expfac; - eta_dot[0] += eta_dotdot[0] * ncfac * dt4; - eta_dot[0] *= expfac; - for (ich = 1; ich < mtchain; ich++) { - expfac = exp(-ncfac * dt8 * eta_dot[ich + 1]); - eta_dot[ich] *= expfac; - eta_dotdot[ich] = (eta_mass[ich - 1] * eta_dot[ich - 1] * eta_dot[ich - 1] - - boltz * tt) / eta_mass[ich]; - eta_dot[ich] += eta_dotdot[ich] * ncfac * dt4; - eta_dot[ich] *= expfac; - } - } - return factor_eta; -} - -/* ---------------------------------------------------------------------- - perform half-step update of chain thermostat variables for barostat - scale barostat velocities -------------------------------------------------------------------------- */ - -void FixTGNHDrude::nhc_press_integrate() -{ - int ich,i,pdof; - double expfac,factor_etap,kecurrent; - double kt = boltz * t_target; - double lkt_press; - - // Update masses, to preserve initial freq, if t_target changed - double nkt = (atom->natoms + 1) * kt; - for (int i = 0; i < 3; i++) - if (p_flag[i]) - omega_mass[i] = nkt / (p_freq[i] * p_freq[i]); - if (pstyle == TRICLINIC) { - for (int i = 3; i < 6; i++) - if (p_flag[i]) omega_mass[i] = nkt / (p_freq[i] * p_freq[i]); - } - if (mpchain) { - etap_mass[0] = kt / (p_freq_max * p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_mass[ich] = kt / (p_freq_max * p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_dotdot[ich] = (etap_mass[ich - 1] * etap_dot[ich - 1] * etap_dot[ich - 1] - - kt) / etap_mass[ich]; - } - - kecurrent = 0.0; - pdof = 0; - for (i = 0; i < 3; i++) - if (p_flag[i]) { - kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - pdof++; - } - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) { - kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - pdof++; - } - } - - if (pstyle == ISO) lkt_press = kt; - else lkt_press = pdof * kt; - etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; - - double ncfac = 1.0/nc_pchain; - for (int iloop = 0; iloop < nc_pchain; iloop++) { - - for (ich = mpchain-1; ich > 0; ich--) { - expfac = exp(-ncfac*dt8*etap_dot[ich+1]); - etap_dot[ich] *= expfac; - etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; - etap_dot[ich] *= expfac; - } - - expfac = exp(-ncfac*dt8*etap_dot[1]); - etap_dot[0] *= expfac; - etap_dot[0] += etap_dotdot[0] * ncfac*dt4; - etap_dot[0] *= expfac; - - for (ich = 0; ich < mpchain; ich++) - etap[ich] += ncfac*dthalf*etap_dot[ich]; - - factor_etap = exp(-ncfac*dthalf*etap_dot[0]); - for (i = 0; i < 3; i++) - if (p_flag[i]) omega_dot[i] *= factor_etap; - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) omega_dot[i] *= factor_etap; - } - - kecurrent = 0.0; - for (i = 0; i < 3; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - } - - etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; - - etap_dot[0] *= expfac; - etap_dot[0] += etap_dotdot[0] * ncfac*dt4; - etap_dot[0] *= expfac; - - for (ich = 1; ich < mpchain; ich++) { - expfac = exp(-ncfac*dt8*etap_dot[ich+1]); - etap_dot[ich] *= expfac; - etap_dotdot[ich] = - (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - kt) / etap_mass[ich]; - etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; - etap_dot[ich] *= expfac; - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step barostat scaling of velocities ------------------------------------------------------------------------*/ - -void FixTGNHDrude::nh_v_press() -{ - double factor[3]; - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - - if (which == NOBIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - if (pstyle == TRICLINIC) { - v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); - v[i][1] += -dthalf*v[i][2]*omega_dot[3]; - } - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - } - } - } else if (which == BIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - if (pstyle == TRICLINIC) { - v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); - v[i][1] += -dthalf*v[i][2]*omega_dot[3]; - } - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - temperature->restore_bias(i,v[i]); - } - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step update of velocities ------------------------------------------------------------------------*/ - -void FixTGNHDrude::nve_v() -{ - double dtfm; - double **v = atom->v; - double **f = atom->f; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - if (rmass) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - dtfm = dtf / rmass[i]; - v[i][0] += dtfm*f[i][0]; - v[i][1] += dtfm*f[i][1]; - v[i][2] += dtfm*f[i][2]; - } - } - } else { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - dtfm = dtf / mass[type[i]]; - v[i][0] += dtfm*f[i][0]; - v[i][1] += dtfm*f[i][1]; - v[i][2] += dtfm*f[i][2]; - } - } - } -} - -/* ---------------------------------------------------------------------- - perform full-step update of positions ------------------------------------------------------------------------*/ - -void FixTGNHDrude::nve_x() -{ - double **x = atom->x; - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - // x update by full step only for atoms in group - - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - x[i][0] += dtv * v[i][0]; - x[i][1] += dtv * v[i][1]; - x[i][2] += dtv * v[i][2]; - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step thermostat scaling of velocities ------------------------------------------------------------------------*/ - -void FixTGNHDrude::nh_v_temp() -{ - double **v = atom->v; - double *mass = atom->mass; - int *mask = atom->mask; - int *type = atom->type; - tagint *molecule = atom->molecule; - int *drudetype = fix_drude->drudetype; - tagint *drudeid = fix_drude->drudeid; - - int imol, i, j, ci, di, itype; - double mass_com, mass_core, mass_drude; - double vint, vcom, vrel; - - /** - * If there are velocity bias, need to remove them before scale velocity - * Have to call remove_bias at the innermost loop, because drude atom may be a ghost - */ - for (i = 0; i < atom->nlocal; i++) { - if (mask[i] & groupbit) { - imol = molecule[i]; - itype = drudetype[type[i]]; - if (itype == NOPOL_TYPE) { - if (which == BIAS) - temperature->remove_bias(i, v[i]); - for (int k = 0; k < 3; k++) { - vint = v[i][k] - v_mol[imol][k]; - vint *= factor_eta_int; - v[i][k] = v_mol[imol][k] * factor_eta_mol + vint; - } - if (which == BIAS) - temperature->restore_bias(i, v[i]); - } else { - // have to use closest_image() because we are manipulating the velocity - j = domain->closest_image(i, atom->map(drudeid[i])); - if (itype == DRUDE_TYPE && j < atom->nlocal) continue; - if (itype == CORE_TYPE) { - ci = i; - di = j; - } else { - ci = j; - di = i; - } - if (which == BIAS) { - temperature->remove_bias(ci, v[ci]); - temperature->remove_bias(di, v[di]); - } - mass_core = mass[type[ci]]; - mass_drude = mass[type[di]]; - mass_com = mass_core + mass_drude; - for (int k = 0; k < 3; k++) { - vcom = (mass_core * v[ci][k] + mass_drude * v[di][k]) / mass_com; - vint = vcom - v_mol[imol][k]; - vrel = v[di][k] - v[ci][k]; - vint *= factor_eta_int; - vrel *= factor_eta_drude; - v[ci][k] = v_mol[imol][k] * factor_eta_mol + vint - vrel * mass_drude / mass_com; - v[di][k] = v_mol[imol][k] * factor_eta_mol + vint + vrel * mass_core / mass_com; - } - if (which == BIAS) { - temperature->restore_bias(ci, v[ci]); - temperature->restore_bias(di, v[di]); - } - } - } - } -} - -/* ---------------------------------------------------------------------- - compute sigma tensor - needed whenever p_target or h0_inv changes ------------------------------------------------------------------------*/ - -void FixTGNHDrude::compute_sigma() -{ - // if nreset_h0 > 0, reset vol0 and h0_inv - // every nreset_h0 timesteps - - if (nreset_h0 > 0) { - int delta = update->ntimestep - update->beginstep; - if (delta % nreset_h0 == 0) { - if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; - else vol0 = domain->xprd * domain->yprd; - h0_inv[0] = domain->h_inv[0]; - h0_inv[1] = domain->h_inv[1]; - h0_inv[2] = domain->h_inv[2]; - h0_inv[3] = domain->h_inv[3]; - h0_inv[4] = domain->h_inv[4]; - h0_inv[5] = domain->h_inv[5]; - } - } - - // generate upper-triangular half of - // sigma = vol0*h0inv*(p_target-p_hydro)*h0inv^t - // units of sigma are are PV/L^2 e.g. atm.A - // - // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] - // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] - // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] - - sigma[0] = - vol0*(h0_inv[0]*((p_target[0]-p_hydro)*h0_inv[0] + - p_target[5]*h0_inv[5]+p_target[4]*h0_inv[4]) + - h0_inv[5]*(p_target[5]*h0_inv[0] + - (p_target[1]-p_hydro)*h0_inv[5]+p_target[3]*h0_inv[4]) + - h0_inv[4]*(p_target[4]*h0_inv[0]+p_target[3]*h0_inv[5] + - (p_target[2]-p_hydro)*h0_inv[4])); - sigma[1] = - vol0*(h0_inv[1]*((p_target[1]-p_hydro)*h0_inv[1] + - p_target[3]*h0_inv[3]) + - h0_inv[3]*(p_target[3]*h0_inv[1] + - (p_target[2]-p_hydro)*h0_inv[3])); - sigma[2] = - vol0*(h0_inv[2]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[3] = - vol0*(h0_inv[1]*(p_target[3]*h0_inv[2]) + - h0_inv[3]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[4] = - vol0*(h0_inv[0]*(p_target[4]*h0_inv[2]) + - h0_inv[5]*(p_target[3]*h0_inv[2]) + - h0_inv[4]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[5] = - vol0*(h0_inv[0]*(p_target[5]*h0_inv[1]+p_target[4]*h0_inv[3]) + - h0_inv[5]*((p_target[1]-p_hydro)*h0_inv[1]+p_target[3]*h0_inv[3]) + - h0_inv[4]*(p_target[3]*h0_inv[1]+(p_target[2]-p_hydro)*h0_inv[3])); -} - -/* ---------------------------------------------------------------------- - compute strain energy ------------------------------------------------------------------------*/ - -double FixTGNHDrude::compute_strain_energy() -{ - // compute strain energy = 0.5*Tr(sigma*h*h^t) in energy units - - double* h = domain->h; - double d0,d1,d2; - - d0 = - sigma[0]*(h[0]*h[0]+h[5]*h[5]+h[4]*h[4]) + - sigma[5]*( h[1]*h[5]+h[3]*h[4]) + - sigma[4]*( h[2]*h[4]); - d1 = - sigma[5]*( h[5]*h[1]+h[4]*h[3]) + - sigma[1]*( h[1]*h[1]+h[3]*h[3]) + - sigma[3]*( h[2]*h[3]); - d2 = - sigma[4]*( h[4]*h[2]) + - sigma[3]*( h[3]*h[2]) + - sigma[2]*( h[2]*h[2]); - - double energy = 0.5*(d0+d1+d2)/nktv2p; - return energy; -} - -/* ---------------------------------------------------------------------- - compute deviatoric barostat force = h*sigma*h^t ------------------------------------------------------------------------*/ - -void FixTGNHDrude::compute_deviatoric() -{ - // generate upper-triangular part of h*sigma*h^t - // units of fdev are are PV, e.g. atm*A^3 - // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] - // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] - // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] - - double* h = domain->h; - - fdev[0] = - h[0]*(sigma[0]*h[0]+sigma[5]*h[5]+sigma[4]*h[4]) + - h[5]*(sigma[5]*h[0]+sigma[1]*h[5]+sigma[3]*h[4]) + - h[4]*(sigma[4]*h[0]+sigma[3]*h[5]+sigma[2]*h[4]); - fdev[1] = - h[1]*( sigma[1]*h[1]+sigma[3]*h[3]) + - h[3]*( sigma[3]*h[1]+sigma[2]*h[3]); - fdev[2] = - h[2]*( sigma[2]*h[2]); - fdev[3] = - h[1]*( sigma[3]*h[2]) + - h[3]*( sigma[2]*h[2]); - fdev[4] = - h[0]*( sigma[4]*h[2]) + - h[5]*( sigma[3]*h[2]) + - h[4]*( sigma[2]*h[2]); - fdev[5] = - h[0]*( sigma[5]*h[1]+sigma[4]*h[3]) + - h[5]*( sigma[1]*h[1]+sigma[3]*h[3]) + - h[4]*( sigma[3]*h[1]+sigma[2]*h[3]); -} - -/* ---------------------------------------------------------------------- - compute target temperature and kinetic energy ------------------------------------------------------------------------*/ - -void FixTGNHDrude::compute_temp_target() -{ - double delta = update->ntimestep - update->beginstep; - if (delta != 0.0) delta /= update->endstep - update->beginstep; - - t_target = t_start + delta * (t_stop-t_start); - ke2mol_target = dof_mol * boltz * t_target; - ke2int_target = dof_int * boltz * t_target; - ke2drude_target = dof_drude * boltz * tdrude_target; -} - -/* ---------------------------------------------------------------------- - compute hydrostatic target pressure ------------------------------------------------------------------------*/ - -void FixTGNHDrude::compute_press_target() -{ - double delta = update->ntimestep - update->beginstep; - if (delta != 0.0) delta /= update->endstep - update->beginstep; - - p_hydro = 0.0; - for (int i = 0; i < 3; i++) - if (p_flag[i]) { - p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); - p_hydro += p_target[i]; - } - if (pdim > 0) p_hydro /= pdim; - - if (pstyle == TRICLINIC) - for (int i = 3; i < 6; i++) - p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); - - // if deviatoric, recompute sigma each time p_target changes - - if (deviatoric_flag) compute_sigma(); -} - -/* ---------------------------------------------------------------------- - update omega_dot, omega ------------------------------------------------------------------------*/ - -void FixTGNHDrude::nh_omega_dot() -{ - double f_omega,volume; - - if (dimension == 3) volume = domain->xprd*domain->yprd*domain->zprd; - else volume = domain->xprd*domain->yprd; - - if (deviatoric_flag) compute_deviatoric(); - - mtk_term1 = 0.0; - if (mtk_flag) { - if (pstyle == ISO) { - mtk_term1 = tdof * boltz * t_current; - mtk_term1 /= pdim * atom->natoms; - } else { - double *mvv_current = temperature->vector; - for (int i = 0; i < 3; i++) - if (p_flag[i]) - mtk_term1 += mvv_current[i]; - mtk_term1 /= pdim * atom->natoms; - } - } - - for (int i = 0; i < 3; i++) - if (p_flag[i]) { - f_omega = (p_current[i]-p_hydro)*volume / - (omega_mass[i] * nktv2p) + mtk_term1 / omega_mass[i]; - if (deviatoric_flag) f_omega -= fdev[i]/(omega_mass[i] * nktv2p); - omega_dot[i] += f_omega*dthalf; - } - - mtk_term2 = 0.0; - if (mtk_flag) { - for (int i = 0; i < 3; i++) - if (p_flag[i]) - mtk_term2 += omega_dot[i]; - if (pdim > 0) mtk_term2 /= pdim * atom->natoms; - } - - if (pstyle == TRICLINIC) { - for (int i = 3; i < 6; i++) { - if (p_flag[i]) { - f_omega = p_current[i]*volume/(omega_mass[i] * nktv2p); - if (deviatoric_flag) - f_omega -= fdev[i]/(omega_mass[i] * nktv2p); - omega_dot[i] += f_omega*dthalf; - } - } - } -} - -/* ---------------------------------------------------------------------- - if any tilt ratios exceed limits, set flip = 1 and compute new tilt values - do not flip in x or y if non-periodic (can tilt but not flip) - this is b/c the box length would be changed (dramatically) by flip - if yz tilt exceeded, adjust C vector by one B vector - if xz tilt exceeded, adjust C vector by one A vector - if xy tilt exceeded, adjust B vector by one A vector - check yz first since it may change xz, then xz check comes after - if any flip occurs, create new box in domain - image_flip() adjusts image flags due to box shape change induced by flip - remap() puts atoms outside the new box back into the new box - perform irregular on atoms in lamda coords to migrate atoms to new procs - important that image_flip comes before remap, since remap may change - image flags to new values, making eqs in doc of Domain:image_flip incorrect -------------------------------------------------------------------------- */ - -void FixTGNHDrude::pre_exchange() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - - // flip is only triggered when tilt exceeds 0.5 by DELTAFLIP - // this avoids immediate re-flipping due to tilt oscillations - - double xtiltmax = (0.5+DELTAFLIP)*xprd; - double ytiltmax = (0.5+DELTAFLIP)*yprd; - - int flipxy,flipxz,flipyz; - flipxy = flipxz = flipyz = 0; - - if (domain->yperiodic) { - if (domain->yz < -ytiltmax) { - domain->yz += yprd; - domain->xz += domain->xy; - flipyz = 1; - } else if (domain->yz >= ytiltmax) { - domain->yz -= yprd; - domain->xz -= domain->xy; - flipyz = -1; - } - } - - if (domain->xperiodic) { - if (domain->xz < -xtiltmax) { - domain->xz += xprd; - flipxz = 1; - } else if (domain->xz >= xtiltmax) { - domain->xz -= xprd; - flipxz = -1; - } - if (domain->xy < -xtiltmax) { - domain->xy += xprd; - flipxy = 1; - } else if (domain->xy >= xtiltmax) { - domain->xy -= xprd; - flipxy = -1; - } - } - - int flip = 0; - if (flipxy || flipxz || flipyz) flip = 1; - - if (flip) { - domain->set_global_box(); - domain->set_local_box(); - - domain->image_flip(flipxy,flipxz,flipyz); - - double **x = atom->x; - imageint *image = atom->image; - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) domain->remap(x[i],image[i]); - - domain->x2lamda(atom->nlocal); - irregular->migrate_atoms(); - domain->lamda2x(atom->nlocal); - } -} - -/* ---------------------------------------------------------------------- - memory usage of Irregular -------------------------------------------------------------------------- */ - -double FixTGNHDrude::memory_usage() -{ - double bytes = 0.0; - if (irregular) bytes += irregular->memory_usage(); - return bytes; -} diff --git a/src/KOKKOS/fix_tgnh_drude_kokkos.h b/src/KOKKOS/fix_tgnh_drude_kokkos.h deleted file mode 100644 index f87642f188..0000000000 --- a/src/KOKKOS/fix_tgnh_drude_kokkos.h +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_FIX_TGNH_DRUDE_KOKKOS_H -#define LMP_FIX_TGNH_DRUDE_KOKKOS_H - -#include "fix_tgnh_drude_kokkos.h" - -namespace LAMMPS_NS { - -class FixTGNHDrudeKokkos : public FixTGNHDrude { - public: - FixTGNHDrudeKokkos(class LAMMPS *, int, char **); - ~FixTGNHDrudeKokkos() override; - int setmask() override; - void init() override; - void setup(int) override; - void initial_integrate(int) override; - void final_integrate() override; - void pre_force_respa(int, int, int) override; - void initial_integrate_respa(int, int, int) override; - void final_integrate_respa(int, int) override; - void pre_exchange() override; - double compute_scalar() override; - double compute_vector(int) override; - void write_restart(FILE *) override; - virtual int pack_restart_data(double *); // pack restart data - void restart(char *) override; - int modify_param(int, char **) override; - void reset_target(double) override; - void reset_dt() override; - double memory_usage() override; - - protected: - int dimension, which; - double dtv, dtf, dthalf, dt4, dt8, dto; - double boltz, nktv2p, tdof; - double vol0; // reference volume - double t0; // reference temperature - // used for barostat mass - double t_start, t_stop; - double t_current, t_target; - double t_freq; - - int tstat_flag; // 1 if control T - int pstat_flag; // 1 if control P - - int pstyle, pcouple; - int p_flag[6]; // 1 if control P on this dim, 0 if not - double p_start[6], p_stop[6]; - double p_freq[6], p_target[6]; - double omega[6], omega_dot[6]; - double omega_mass[6]; - double p_current[6]; - int kspace_flag; // 1 if KSpace invoked, 0 if not - std::vector rfix; // indices of rigid fixes - class Irregular *irregular; // for migrating atoms after box flips - - int nlevels_respa; - double *step_respa; - - char *id_temp, *id_press; - class Compute *temperature, *pressure; - int tcomputeflag, pcomputeflag; // 1 = compute was created by fix - // 0 = created externally - - double *etamol; - double *etamol_dot; // chain thermostat for motion of whole molecules - double *etamol_dotdot; - double *etamol_mass; - - double *etaint; - double *etaint_dot; // chain thermostat for internal DOFs - double *etaint_dotdot; - double *etaint_mass; - - double *etadrude; - double *etadrude_dot; // chain thermostat for Drude relative motions - double *etadrude_dotdot; - double *etadrude_mass; - - double *etap; // chain thermostat for barostat - double *etap_dot; - double *etap_dotdot; - double *etap_mass; - - int mtchain; // length of chain - int mpchain; // length of chain - - int mtk_flag; // 0 if using Hoover barostat - int pdim; // number of barostatted dims - double p_freq_max; // maximum barostat frequency - - double p_hydro; // hydrostatic target pressure - - int nc_tchain, nc_pchain; - double sigma[6]; // scaled target stress - double fdev[6]; // deviatoric force on barostat - int deviatoric_flag; // 0 if target stress tensor is hydrostatic - double h0_inv[6]; // h_inv of reference (zero strain) box - int nreset_h0; // interval for resetting h0 - - double mtk_term1, mtk_term2; // Martyna-Tobias-Klein corrections - - int scaleyz; // 1 if yz scaled with lz - int scalexz; // 1 if xz scaled with lz - int scalexy; // 1 if xy scaled with ly - int flipflag; // 1 if box flips are invoked as needed - - int pre_exchange_flag; // set if pre_exchange needed for box flips - - double fixedpoint[3]; // location of dilation fixed-point - - void couple(); - virtual void remap(); - void nhc_temp_integrate(); - void nhc_press_integrate(); - - virtual void nve_x(); // may be overwritten by child classes - virtual void nve_v(); - virtual void nh_v_press(); - virtual void nh_v_temp(); - virtual void compute_temp_target(); - virtual int size_restart_global(); - - void compute_sigma(); - void compute_deviatoric(); - double compute_strain_energy(); - void compute_press_target(); - void nh_omega_dot(); - - class FixDrude *fix_drude; - int n_mol; // number of molecules in the system - double *mass_mol; - double dof_mol, dof_int, dof_drude; // DOFs of different modes in the fix group - void setup_mol_mass_dof(); - double **v_mol, **v_mol_tmp; - void compute_temp_mol_int_drude(bool); // calculate the temperatures of three sets of DOFs - bool temp_computed_end_of_step = false; - double tdrude_target, tdrude_freq; - double t_mol, t_int, t_drude; - double ke2mol, ke2int, ke2drude; - double ke2mol_target, ke2int_target, ke2drude_target; - double factor_eta_mol, factor_eta_int, factor_eta_drude; - double propagate(double *, double *, double *, const double *, const double &, const double &, - const double &) const; -}; - -} // namespace LAMMPS_NS - -#endif diff --git a/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp b/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp deleted file mode 100644 index d014dd51ce..0000000000 --- a/src/KOKKOS/fix_tgnpt_drude_kokkos.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// clang-format off -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail) -------------------------------------------------------------------------- */ - -#include "fix_tgnpt_drude_kokkos.h" - -#include "error.h" -#include "modify.h" - -using namespace LAMMPS_NS; -using namespace FixConst; - -/* ---------------------------------------------------------------------- */ - -FixTGNPTDrudeKokkos::FixTGNPTDrudeKokkos(LAMMPS *lmp, int narg, char **arg) : - FixTGNHDrudeKokkos(lmp, narg, arg) -{ - if (!tstat_flag) - error->all(FLERR,"Temperature control must be used with fix npt"); - if (!pstat_flag) - error->all(FLERR,"Pressure control must be used with fix npt"); - - // create a new compute temp style - // id = fix-ID + temp - // compute group = all since pressure is always global (group all) - // and thus its KE/temperature contribution should use group all - - id_temp = utils::strdup(std::string(id) + "_temp"); - modify->add_compute(fmt::format("{} all temp",id_temp)); - tcomputeflag = 1; - - // create a new compute pressure style - // id = fix-ID + press, compute group = all - // pass id_temp as 4th arg to pressure constructor - - id_press = utils::strdup(std::string(id) + "_press"); - modify->add_compute(fmt::format("{} all pressure {}",id_press, id_temp)); - pcomputeflag = 1; -} diff --git a/src/KOKKOS/fix_tgnpt_drude_kokkos.h b/src/KOKKOS/fix_tgnpt_drude_kokkos.h deleted file mode 100644 index 7a57ddad14..0000000000 --- a/src/KOKKOS/fix_tgnpt_drude_kokkos.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS -// clang-format off -FixStyle(tgnpt/drude/kk,FixTGNPTDrudeKokkos); -FixStyle(tgnpt/drude/kk/device,FixTGNPTDrudeKokkos); -FixStyle(tgnpt/drude/kk/host,FixTGNPTDrudeKokkos); -// clang-format on -#else - -#ifndef LMP_FIX_TGNPT_DRUDE_KOKKOS_H -#define LMP_FIX_TGNPT_DRUDE_KOKKOS_H - -#include "fix_tgnh_drude_kokkos.h" - -namespace LAMMPS_NS { - -class FixTGNPTDrudeKokkos : public FixTGNHDrudeKokkos { - public: - FixTGNPTDrudeKokkos(class LAMMPS *, int, char **); -}; - -} // namespace LAMMPS_NS - -#endif -#endif diff --git a/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp b/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp deleted file mode 100644 index 93a3e104fe..0000000000 --- a/src/KOKKOS/fix_tgnvt_drude_kokkos.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// clang-format off -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail) -------------------------------------------------------------------------- */ - -#include "fix_tgnvt_drude_kokkos.h" - -#include "error.h" -#include "group.h" -#include "modify.h" - -using namespace LAMMPS_NS; -using namespace FixConst; - -/* ---------------------------------------------------------------------- */ - -FixTGNVTDrudeKokkos::FixTGNVTDrudeKokkos(LAMMPS *lmp, int narg, char **arg) : - FixTGNHDrudeKokkos(lmp, narg, arg) -{ - if (!tstat_flag) - error->all(FLERR,"Temperature control must be used with fix nvt"); - if (pstat_flag) - error->all(FLERR,"Pressure control can not be used with fix nvt"); - - // create a new compute temp style - // id = fix-ID + temp - - id_temp = utils::strdup(std::string(id) + "_temp"); - modify->add_compute(fmt::format("{} {} temp",id_temp,group->names[igroup])); - tcomputeflag = 1; -} diff --git a/src/KOKKOS/fix_tgnvt_drude_kokkos.h b/src/KOKKOS/fix_tgnvt_drude_kokkos.h deleted file mode 100644 index 9830d3f5f0..0000000000 --- a/src/KOKKOS/fix_tgnvt_drude_kokkos.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS -// clang-format off -FixStyle(tgnvt/drude/kk,FixTGNVTDrudeKokkos); -FixStyle(tgnvt/drude/kk/device,FixTGNVTDrudeKokkos); -FixStyle(tgnvt/drude/kk/host,FixTGNVTDrudeKokkos); -// clang-format on -#else - -#ifndef LMP_FIX_TGNVT_DRUDE_KOKKOS_H -#define LMP_FIX_TGNVT_DRUDE_KOKKOS_H - -#include "fix_tgnh_drude_kokkos.h" - -namespace LAMMPS_NS { - -class FixTGNVTDrudeKokkos : public FixTGNHDrudeKokkos { - public: - FixTGNVTDrudeKokkos(class LAMMPS *, int, char **); -}; - -} // namespace LAMMPS_NS - -#endif -#endif From 384d8cf51f5a1cebbf1ca9ad0894cd4f9a7d5f75 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 12:43:57 -0400 Subject: [PATCH 108/294] compute_vector() bugfix --- src/KOKKOS/fix_recenter_kokkos.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index de8b2639be..ca2607466f 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio@gmail.com) + Contributing author: Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ #include "fix_recenter_kokkos.h" @@ -103,23 +103,27 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) // shift coords by difference between actual COM and requested COM - double shiftx = xflag ? (xtarget - xcm[0]) : 0.0; - double shifty = yflag ? (ytarget - xcm[1]) : 0.0; - double shiftz = zflag ? (ztarget - xcm[2]) : 0.0; - distance = sqrt(shiftx*shiftx + shifty*shifty + shiftz*shiftz); + shift[0] = xflag ? (xtarget - xcm[0]) : 0.0; + shift[1] = yflag ? (ytarget - xcm[1]) : 0.0; + shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; + distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); + auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); auto l_group2bit = group2bit; + double l_shiftx = shift[0]; + double l_shifty = shift[1]; + double l_shiftz = shift[2]; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), KOKKOS_LAMBDA(const int i) { if (d_mask[i] & l_group2bit) { - d_x(i,0) += shiftx; - d_x(i,1) += shifty; - d_x(i,2) += shiftz; + d_x(i,0) += l_shiftx; + d_x(i,1) += l_shifty; + d_x(i,2) += l_shiftz; } }); From 8332d89a4ac973ab762d3c9de926c14a3e3a0a02 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 12:46:33 -0400 Subject: [PATCH 109/294] regenerate fix recenter tests --- unittest/force-styles/tests/fix-timestep-recenter-coords.yaml | 4 ++-- unittest/force-styles/tests/fix-timestep-recenter-init.yaml | 4 ++-- unittest/force-styles/tests/fix-timestep-recenter-null.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml index 982a3b18e0..042f91545f 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Tue Jul 30 04:23:44 2024 +date_generated: Tue Oct 1 12:45:25 2024 epsilon: 2e-13 skip_tests: prerequisites: ! | diff --git a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml index efc67fd477..72aeaf94c5 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Tue Jul 30 04:14:01 2024 +date_generated: Tue Oct 1 12:45:46 2024 epsilon: 2e-13 skip_tests: prerequisites: ! | diff --git a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml index bb09e1a1d4..f0db929f0c 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Tue Jul 30 04:25:52 2024 +date_generated: Tue Oct 1 12:45:37 2024 epsilon: 2e-13 skip_tests: prerequisites: ! | From e61d9f08eecec3838173285cc2a9e761e190bc56 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 1 Oct 2024 13:00:59 -0400 Subject: [PATCH 110/294] regenerate fix wall/region tests --- .../fix-timestep-wall_region_harmonic.yaml | 57 ++++++++++--------- .../fix-timestep-wall_region_lj1043.yaml | 4 +- .../tests/fix-timestep-wall_region_lj126.yaml | 4 +- .../tests/fix-timestep-wall_region_lj93.yaml | 4 +- .../tests/fix-timestep-wall_region_morse.yaml | 4 +- 5 files changed, 37 insertions(+), 36 deletions(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml index 5ae9df3a06..b175d40128 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml @@ -1,6 +1,7 @@ --- -lammps_version: 3 Aug 2022 -date_generated: Mon Aug 15 01:14:02 2022 +lammps_version: 29 Aug 2024 +tags: generated +date_generated: Tue Oct 1 12:59:45 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | @@ -51,33 +52,33 @@ run_pos: ! |2 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 run_vel: ! |2 - 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913212e-03 - 2 5.4501493445687828e-03 5.1791699408496412e-03 -1.4372931530376577e-03 - 3 -8.2298292722385591e-03 -1.2926551614621364e-02 -4.0984181178163734e-03 - 4 -3.7699042590093506e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 - 5 -1.1021961004346575e-02 -9.8906780939336039e-03 -2.8410737829284390e-03 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 - 7 9.1033953013898753e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 - 8 7.9064712058855742e-03 -3.3507254552631585e-03 3.4557098492564643e-02 - 9 1.5644176117320938e-03 3.7365546102722208e-03 1.5047408822037651e-02 - 10 2.9201446820573192e-02 -2.9249578745486140e-02 -1.5018077424322544e-02 - 11 -4.7835961513517542e-03 -3.7481385134185211e-03 -2.3464104142290089e-03 - 12 2.2696451841920672e-03 -3.4774154398129641e-04 -3.0640770327796966e-03 - 13 2.7531740451953164e-03 5.8171061612840502e-03 -7.9467454022160669e-04 - 14 3.5246182371994205e-03 -5.7939995585585538e-03 -3.9478431172751361e-03 - 15 -1.8547943640122950e-03 -5.8554729942777778e-03 6.2938485140538675e-03 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 - 18 -8.0065795274987550e-04 -8.6270473974390637e-04 -1.4483040536385791e-03 - 19 1.2452390067376827e-03 -2.5061097800836321e-03 7.2998639311871857e-03 - 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194727e-03 - 21 -1.4689219756961610e-03 -2.7352107824530291e-04 7.0581625180892197e-04 - 22 -7.0694199165145105e-03 -4.2577148692717545e-03 2.8079117911323598e-04 - 23 6.0446963236685230e-03 -1.4000131545098772e-03 2.5819754799379716e-03 - 24 3.1926368451268083e-04 -9.9445664487428820e-04 1.4999960207062409e-04 - 25 1.3789752933078488e-04 -4.4335894831520756e-03 -8.1808138106080120e-04 - 26 2.0485904023409989e-03 2.7813358660936129e-03 4.3245726853349256e-03 - 27 4.5604120293369840e-04 -1.0305523026921111e-03 2.1188058381358413e-04 - 28 -6.2544520861855151e-03 1.4127711176146879e-03 -1.8429821884794260e-03 - 29 6.4110631534402174e-04 3.1273432719593824e-03 3.7253671105656736e-03 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 ... diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml index 1664609b7a..df1e017a2b 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Mon Aug 5 06:12:03 2024 +date_generated: Tue Oct 1 13:00:12 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml index 275fc6f721..298512aad8 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Mon Aug 5 06:12:32 2024 +date_generated: Tue Oct 1 12:59:56 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml index d68a2f0e75..82e2ead2f7 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Mon Aug 5 05:42:49 2024 +date_generated: Tue Oct 1 13:00:05 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml index 2798fba1ba..05ca96933c 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 27 Jun 2024 +lammps_version: 29 Aug 2024 tags: generated -date_generated: Mon Aug 5 06:13:11 2024 +date_generated: Tue Oct 1 13:00:18 2024 epsilon: 4e-14 skip_tests: prerequisites: ! | From a8acd9b5736cf4a7c6b7ebbf545a047293c40bf0 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 13:41:41 -0400 Subject: [PATCH 111/294] Update fix_nve_limit.rst --- doc/src/fix_nve_limit.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/fix_nve_limit.rst b/doc/src/fix_nve_limit.rst index 23517aea40..35ed25084c 100644 --- a/doc/src/fix_nve_limit.rst +++ b/doc/src/fix_nve_limit.rst @@ -3,6 +3,8 @@ fix nve/limit command ===================== +Accelerator Variants: *nve/limit/kk* + Syntax """""" From 89346740e1a40753cc06de851904502389d4df9d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 13:42:13 -0400 Subject: [PATCH 112/294] Update fix_recenter.rst --- doc/src/fix_recenter.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/fix_recenter.rst b/doc/src/fix_recenter.rst index 9991904b37..d5841cb51b 100644 --- a/doc/src/fix_recenter.rst +++ b/doc/src/fix_recenter.rst @@ -3,6 +3,8 @@ fix recenter command ==================== +Accelerator Variants: *recenter/kk* + Syntax """""" From fff1456553e191ee64c4a46329e0e265f4164544 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 13:43:23 -0400 Subject: [PATCH 113/294] Update fix_wall_region.rst --- doc/src/fix_wall_region.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/fix_wall_region.rst b/doc/src/fix_wall_region.rst index 466319c12e..89b2919f92 100644 --- a/doc/src/fix_wall_region.rst +++ b/doc/src/fix_wall_region.rst @@ -3,6 +3,8 @@ fix wall/region command ======================= +Accelerator Variants: *wall/region/kk* + Syntax """""" From 8a25871db5af7fe2a86dd6c161659ffe95c4033b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 13:45:59 -0400 Subject: [PATCH 114/294] Update region.rst --- doc/src/region.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/region.rst b/doc/src/region.rst index 079fff402f..94feee6ad4 100644 --- a/doc/src/region.rst +++ b/doc/src/region.rst @@ -3,6 +3,8 @@ region command ============== +Accelerator Variants: *block/kk*, *sphere/kk* + Syntax """""" From 26b83602be3efc5e9330d16ce26d8dcd59a2371a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 13:56:21 -0400 Subject: [PATCH 115/294] Update Commands_fix.rst --- doc/src/Commands_fix.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index d9febcc289..127ca97801 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -43,7 +43,7 @@ OPT. * :doc:`brownian/asphere ` * :doc:`brownian/sphere ` * :doc:`charge/regulation ` - * :doc:`cmap ` + * :doc:`cmap (k) ` * :doc:`colvars ` * :doc:`controller ` * :doc:`damping/cundall ` @@ -134,7 +134,7 @@ OPT. * :doc:`nve/dot ` * :doc:`nve/dotc/langevin ` * :doc:`nve/eff ` - * :doc:`nve/limit ` + * :doc:`nve/limit (k) ` * :doc:`nve/line ` * :doc:`nve/manifold/rattle ` * :doc:`nve/noforce ` @@ -189,7 +189,7 @@ OPT. * :doc:`rattle ` * :doc:`reaxff/bonds (k) ` * :doc:`reaxff/species (k) ` - * :doc:`recenter ` + * :doc:`recenter (k) ` * :doc:`restrain ` * :doc:`rheo ` * :doc:`rheo/oxidation ` @@ -267,7 +267,7 @@ OPT. * :doc:`wall/piston ` * :doc:`wall/reflect (k) ` * :doc:`wall/reflect/stochastic ` - * :doc:`wall/region ` + * :doc:`wall/region (k) ` * :doc:`wall/region/ees ` * :doc:`wall/srd ` * :doc:`wall/table ` From 26aadc25ad9edc3b4368968879b56bb66d866278 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 14:31:54 -0400 Subject: [PATCH 116/294] Update fix_cmap.rst --- doc/src/fix_cmap.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/fix_cmap.rst b/doc/src/fix_cmap.rst index 493bdb774f..8d354cce76 100644 --- a/doc/src/fix_cmap.rst +++ b/doc/src/fix_cmap.rst @@ -1,4 +1,5 @@ .. index:: fix cmap +.. index:: fix cmap/kk fix cmap command ================ From cac22c216ab164af630fe7aafbc0bcfbaa41366b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 2 Oct 2024 14:37:03 -0400 Subject: [PATCH 117/294] update kk index entries --- doc/src/fix_nve_limit.rst | 1 + doc/src/fix_recenter.rst | 1 + doc/src/fix_wall_region.rst | 1 + 3 files changed, 3 insertions(+) diff --git a/doc/src/fix_nve_limit.rst b/doc/src/fix_nve_limit.rst index 35ed25084c..e46c7e5ea2 100644 --- a/doc/src/fix_nve_limit.rst +++ b/doc/src/fix_nve_limit.rst @@ -1,4 +1,5 @@ .. index:: fix nve/limit +.. index:: fix nve/limit/kk fix nve/limit command ===================== diff --git a/doc/src/fix_recenter.rst b/doc/src/fix_recenter.rst index d5841cb51b..dbef13965e 100644 --- a/doc/src/fix_recenter.rst +++ b/doc/src/fix_recenter.rst @@ -1,4 +1,5 @@ .. index:: fix recenter +.. index:: fix recenter/kk fix recenter command ==================== diff --git a/doc/src/fix_wall_region.rst b/doc/src/fix_wall_region.rst index 89b2919f92..526de55202 100644 --- a/doc/src/fix_wall_region.rst +++ b/doc/src/fix_wall_region.rst @@ -1,4 +1,5 @@ .. index:: fix wall/region +.. index:: fix wall/region/kk fix wall/region command ======================= From 9c06e8265ff9158d65ef1b9787c47981dd13815e Mon Sep 17 00:00:00 2001 From: alphataubio <145145864+alphataubio@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:48:12 -0400 Subject: [PATCH 118/294] Update src/create_atoms.cpp Co-authored-by: Axel Kohlmeyer --- src/create_atoms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index b8d42156d1..f7a1be44eb 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -303,7 +303,7 @@ void CreateAtoms::command(int narg, char **arg) if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates"); if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types"); if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes) - error->all(FLERR, "Invalid atom type {} in create_atoms mol command (onemol->ntypes {} atom->ntypes {})", ntype, onemol->ntypes, atom->ntypes); + error->all(FLERR, "Invalid atom type {} in create_atoms mol command", ntype + onemol->ntypes); if (onemol->tag_require && !atom->tag_enable) error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not"); if (atom->molecular == Atom::TEMPLATE && onemol != atom->avec->onemols[0]) From 29eafa999bee544974078f5e3c4cd31c15a50544 Mon Sep 17 00:00:00 2001 From: alphataubio <145145864+alphataubio@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:49:56 -0400 Subject: [PATCH 119/294] Update src/region_sphere.h Co-authored-by: Axel Kohlmeyer --- src/region_sphere.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/region_sphere.h b/src/region_sphere.h index 1fdda7fd29..3053d98edb 100644 --- a/src/region_sphere.h +++ b/src/region_sphere.h @@ -36,7 +36,7 @@ class RegSphere : public Region { void set_velocity_shape() override; void velocity_contact_shape(double *, double *) override; - protected: // KOKKOS subclass needs protected not private + protected: double xc, yc, zc; double radius; int xstyle, xvar; From b5fdd8f0ae5adf278367c01857df0345661fd61c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 10:26:24 -0400 Subject: [PATCH 120/294] Revert "update atom_style template test for added charge" This reverts commit 3ea74b17252ef4a4e16d2ee6841c518e983e435a. --- unittest/formats/test_atom_styles.cpp | 431 +++++++++++++++++++++++++- 1 file changed, 429 insertions(+), 2 deletions(-) diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp index 90c8b07b02..921d469e31 100644 --- a/unittest/formats/test_atom_styles.cpp +++ b/unittest/formats/test_atom_styles.cpp @@ -2743,7 +2743,6 @@ TEST_F(AtomStyleTest, template) expected.has_x = true; expected.has_v = true; expected.has_f = true; - expected.q_flag = 1; expected.molecule_flag = 1; expected.molindex_flag = 1; expected.molatom_flag = 1; @@ -3014,7 +3013,7 @@ TEST_F(AtomStyleTest, template) ASSERT_EQ(lmp->atom->nghost, 0); ASSERT_NE(lmp->atom->nmax, -1); ASSERT_EQ(lmp->atom->tag_enable, 1); - ASSERT_EQ(lmp->atom->q_flag, 1); + ASSERT_EQ(lmp->atom->q_flag, 0); ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); ASSERT_EQ(lmp->atom->ntypes, 4); ASSERT_EQ(lmp->atom->tag_consecutive(), 0); @@ -3118,6 +3117,434 @@ TEST_F(AtomStyleTest, template) ASSERT_EQ(molatom[GETIDX(16)], -1); } +TEST_F(AtomStyleTest, template_charge) +{ + if (!LAMMPS::is_installed_pkg("MOLECULE")) GTEST_SKIP(); + BEGIN_HIDE_OUTPUT(); + command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); + command("atom_style hybrid template twomols charge"); + command("newton on"); + END_HIDE_OUTPUT(); + + AtomState expected; + expected.atom_style = "hybrid"; + expected.molecular = Atom::TEMPLATE; + expected.nbondtypes = 2; + expected.nangletypes = 2; + expected.tag_enable = 1; + expected.has_type = true; + expected.has_mask = true; + expected.has_image = true; + expected.has_x = true; + expected.has_v = true; + expected.has_f = true; + expected.molecule_flag = 1; + expected.molindex_flag = 1; + expected.molatom_flag = 1; + expected.q_flag = 1; + expected.nmolecule = 2; + expected.map_style = 3; + + ASSERT_ATOM_STATE_EQ(lmp->atom, expected); + + auto *hybrid = dynamic_cast(lmp->atom->avec); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_EQ(hybrid->nstyles, 2); + ASSERT_THAT(std::string(hybrid->keywords[0]), Eq("template")); + ASSERT_THAT(std::string(hybrid->keywords[1]), Eq("charge")); + ASSERT_NE(hybrid->styles[0], nullptr); + ASSERT_NE(hybrid->styles[1], nullptr); + + BEGIN_HIDE_OUTPUT(); + command("create_box 4 box bond/types 2 angle/types 2 "); + command("create_atoms 0 single -2.0 2.0 0.1 mol twomols 65234"); + command("create_atoms 0 single -2.0 -2.0 -0.1 mol twomols 62346"); + command("create_atoms 0 single 2.0 2.0 -0.1 mol twomols 61354"); + command("create_atoms 3 single 2.0 -2.0 0.1"); + command("create_atoms 3 single 2.0 2.0 -2.1"); + command("create_atoms 4 single 2.0 -2.0 2.1"); + command("mass 1 16.0"); + command("mass 2 1.0"); + command("mass 3 12.0"); + command("mass 4 16.0"); + command("set atom 10 charge 0.7"); + command("set atom 11 charge -0.35"); + command("set atom 12 charge -0.35"); + command("bond_style zero"); + command("bond_coeff 1 1.0"); + command("bond_coeff 2 1.16"); + command("angle_style zero"); + command("angle_coeff * 109.0"); + command("pair_coeff * *"); + END_HIDE_OUTPUT(); + ASSERT_NE(lmp->atom->avec, nullptr); + hybrid = dynamic_cast(lmp->atom->avec); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_EQ(hybrid->nstyles, 2); + ASSERT_THAT(std::string(hybrid->keywords[0]), Eq("template")); + ASSERT_THAT(std::string(hybrid->keywords[1]), Eq("charge")); + ASSERT_NE(hybrid->styles[0], nullptr); + ASSERT_NE(hybrid->styles[1], nullptr); + + ASSERT_EQ(lmp->atom->natoms, 12); + ASSERT_EQ(lmp->atom->nbonds, 6); + ASSERT_EQ(lmp->atom->nbondtypes, 2); + ASSERT_EQ(lmp->atom->nangles, 3); + ASSERT_EQ(lmp->atom->nangletypes, 2); + ASSERT_EQ(lmp->atom->nellipsoids, 0); + ASSERT_EQ(lmp->atom->nlocal, 12); + ASSERT_EQ(lmp->atom->nghost, 0); + ASSERT_NE(lmp->atom->nmax, -1); + ASSERT_EQ(lmp->atom->tag_enable, 1); + ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); + ASSERT_EQ(lmp->atom->q_flag, 1); + ASSERT_EQ(lmp->atom->ntypes, 4); + ASSERT_EQ(lmp->atom->nextra_grow, 0); + ASSERT_EQ(lmp->atom->nextra_restart, 0); + ASSERT_EQ(lmp->atom->nextra_border, 0); + ASSERT_EQ(lmp->atom->nextra_grow_max, 0); + ASSERT_EQ(lmp->atom->nextra_restart_max, 0); + ASSERT_EQ(lmp->atom->nextra_border_max, 0); + ASSERT_EQ(lmp->atom->nextra_store, 0); + ASSERT_EQ(lmp->atom->extra_grow, nullptr); + ASSERT_EQ(lmp->atom->extra_restart, nullptr); + ASSERT_EQ(lmp->atom->extra_border, nullptr); + ASSERT_EQ(lmp->atom->extra, nullptr); + + ASSERT_NE(lmp->atom->mass, nullptr); + ASSERT_NE(lmp->atom->mass_setflag, nullptr); + + BEGIN_HIDE_OUTPUT(); + command("write_data test_atom_styles.data"); + command("clear"); + command("units real"); + command("newton off on"); + command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); + command("atom_style hybrid template twomols charge"); + command("pair_style zero 4.0"); + command("bond_style zero"); + command("angle_style zero"); + command("atom_modify map array"); + command("read_data test_atom_styles.data"); + END_HIDE_OUTPUT(); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_NE(lmp->atom->avec, nullptr); + + ASSERT_EQ(lmp->atom->natoms, 12); + ASSERT_EQ(lmp->atom->nlocal, 12); + ASSERT_EQ(lmp->atom->nbonds, 6); + ASSERT_EQ(lmp->atom->nangles, 3); + ASSERT_EQ(lmp->atom->nbondtypes, 2); + ASSERT_EQ(lmp->atom->nangletypes, 2); + ASSERT_EQ(lmp->atom->nghost, 0); + ASSERT_NE(lmp->atom->nmax, -1); + ASSERT_EQ(lmp->atom->tag_enable, 1); + ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); + ASSERT_EQ(lmp->atom->ntypes, 4); + ASSERT_NE(lmp->atom->sametag, nullptr); + ASSERT_EQ(lmp->atom->tag_consecutive(), 1); + ASSERT_EQ(lmp->atom->map_style, Atom::MAP_ARRAY); + ASSERT_EQ(lmp->atom->map_user, 1); + ASSERT_EQ(lmp->atom->map_tag_max, 12); + + auto *molecule = lmp->atom->molecule; + auto *molindex = lmp->atom->molindex; + auto *molatom = lmp->atom->molatom; + + ASSERT_EQ(molecule[GETIDX(1)], 1); + ASSERT_EQ(molecule[GETIDX(2)], 1); + ASSERT_EQ(molecule[GETIDX(3)], 1); + ASSERT_EQ(molecule[GETIDX(4)], 2); + ASSERT_EQ(molecule[GETIDX(5)], 2); + ASSERT_EQ(molecule[GETIDX(6)], 2); + ASSERT_EQ(molecule[GETIDX(7)], 3); + ASSERT_EQ(molecule[GETIDX(8)], 3); + ASSERT_EQ(molecule[GETIDX(9)], 3); + ASSERT_EQ(molecule[GETIDX(10)], 0); + ASSERT_EQ(molecule[GETIDX(11)], 0); + ASSERT_EQ(molecule[GETIDX(12)], 0); + ASSERT_EQ(molindex[GETIDX(1)], 0); + ASSERT_EQ(molindex[GETIDX(2)], 0); + ASSERT_EQ(molindex[GETIDX(3)], 0); + ASSERT_EQ(molindex[GETIDX(4)], 0); + ASSERT_EQ(molindex[GETIDX(5)], 0); + ASSERT_EQ(molindex[GETIDX(6)], 0); + ASSERT_EQ(molindex[GETIDX(7)], 0); + ASSERT_EQ(molindex[GETIDX(8)], 0); + ASSERT_EQ(molindex[GETIDX(9)], 0); + ASSERT_EQ(molindex[GETIDX(10)], -1); + ASSERT_EQ(molindex[GETIDX(11)], -1); + ASSERT_EQ(molindex[GETIDX(12)], -1); + ASSERT_EQ(molatom[GETIDX(1)], 0); + ASSERT_EQ(molatom[GETIDX(2)], 1); + ASSERT_EQ(molatom[GETIDX(3)], 2); + ASSERT_EQ(molatom[GETIDX(4)], 0); + ASSERT_EQ(molatom[GETIDX(5)], 1); + ASSERT_EQ(molatom[GETIDX(6)], 2); + ASSERT_EQ(molatom[GETIDX(7)], 0); + ASSERT_EQ(molatom[GETIDX(8)], 1); + ASSERT_EQ(molatom[GETIDX(9)], 2); + ASSERT_EQ(molatom[GETIDX(10)], -1); + ASSERT_EQ(molatom[GETIDX(11)], -1); + ASSERT_EQ(molatom[GETIDX(12)], -1); + + BEGIN_HIDE_OUTPUT(); + command("clear"); + command("units real"); + command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); + command("atom_style hybrid template twomols charge"); + command("pair_style zero 4.0"); + command("bond_style zero"); + command("angle_style zero"); + command("atom_modify map array"); + command("read_data test_atom_styles.data"); + END_HIDE_OUTPUT(); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_NE(lmp->atom->avec, nullptr); + + ASSERT_EQ(lmp->atom->natoms, 12); + ASSERT_EQ(lmp->atom->nlocal, 12); + ASSERT_EQ(lmp->atom->nbonds, 6); + ASSERT_EQ(lmp->atom->nangles, 3); + ASSERT_EQ(lmp->atom->nbondtypes, 2); + ASSERT_EQ(lmp->atom->nangletypes, 2); + ASSERT_EQ(lmp->atom->nghost, 0); + ASSERT_NE(lmp->atom->nmax, -1); + ASSERT_EQ(lmp->atom->tag_enable, 1); + ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); + ASSERT_EQ(lmp->atom->ntypes, 4); + ASSERT_NE(lmp->atom->sametag, nullptr); + ASSERT_EQ(lmp->atom->tag_consecutive(), 1); + ASSERT_EQ(lmp->atom->map_style, Atom::MAP_ARRAY); + ASSERT_EQ(lmp->atom->map_user, 1); + ASSERT_EQ(lmp->atom->map_tag_max, 12); + + molecule = lmp->atom->molecule; + molindex = lmp->atom->molindex; + molatom = lmp->atom->molatom; + + ASSERT_EQ(molindex[GETIDX(1)], 0); + ASSERT_EQ(molindex[GETIDX(2)], 0); + ASSERT_EQ(molindex[GETIDX(3)], 0); + ASSERT_EQ(molindex[GETIDX(4)], 0); + ASSERT_EQ(molindex[GETIDX(5)], 0); + ASSERT_EQ(molindex[GETIDX(6)], 0); + ASSERT_EQ(molindex[GETIDX(7)], 0); + ASSERT_EQ(molindex[GETIDX(8)], 0); + ASSERT_EQ(molindex[GETIDX(9)], 0); + ASSERT_EQ(molindex[GETIDX(10)], -1); + ASSERT_EQ(molindex[GETIDX(11)], -1); + ASSERT_EQ(molindex[GETIDX(12)], -1); + ASSERT_EQ(molatom[GETIDX(1)], 0); + ASSERT_EQ(molatom[GETIDX(2)], 1); + ASSERT_EQ(molatom[GETIDX(3)], 2); + ASSERT_EQ(molatom[GETIDX(4)], 0); + ASSERT_EQ(molatom[GETIDX(5)], 1); + ASSERT_EQ(molatom[GETIDX(6)], 2); + ASSERT_EQ(molatom[GETIDX(7)], 0); + ASSERT_EQ(molatom[GETIDX(8)], 1); + ASSERT_EQ(molatom[GETIDX(9)], 2); + ASSERT_EQ(molatom[GETIDX(10)], -1); + ASSERT_EQ(molatom[GETIDX(11)], -1); + ASSERT_EQ(molatom[GETIDX(12)], -1); + + auto *x = lmp->atom->x; + auto *v = lmp->atom->v; + auto *type = lmp->atom->type; + auto *q = lmp->atom->q; + + EXPECT_NEAR(x[GETIDX(10)][0], 2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(10)][1], -2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(10)][2], 0.1, EPSILON); + EXPECT_NEAR(x[GETIDX(11)][0], 2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(11)][1], 2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(11)][2], -2.1, EPSILON); + EXPECT_NEAR(x[GETIDX(12)][0], 2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(12)][1], -2.0, EPSILON); + EXPECT_NEAR(x[GETIDX(12)][2], 2.1, EPSILON); + EXPECT_NEAR(v[GETIDX(1)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(1)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(1)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(2)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(2)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(2)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(3)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(3)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(3)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(4)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(4)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(4)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(5)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(5)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(5)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(6)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(6)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(6)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(7)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(7)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(7)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(8)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(8)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(8)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(9)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(9)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(9)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(10)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(10)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(10)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(11)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(11)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(11)][2], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(12)][0], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(12)][1], 0.0, EPSILON); + EXPECT_NEAR(v[GETIDX(12)][2], 0.0, EPSILON); + EXPECT_NEAR(q[GETIDX(1)], -0.8472, EPSILON); + EXPECT_NEAR(q[GETIDX(2)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(3)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(4)], -0.8472, EPSILON); + EXPECT_NEAR(q[GETIDX(5)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(6)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(7)], -0.8472, EPSILON); + EXPECT_NEAR(q[GETIDX(8)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(9)], 0.4236, EPSILON); + EXPECT_NEAR(q[GETIDX(10)], 0.7, EPSILON); + EXPECT_NEAR(q[GETIDX(11)], -0.35, EPSILON); + EXPECT_NEAR(q[GETIDX(12)], -0.35, EPSILON); + ASSERT_EQ(type[GETIDX(1)], 1); + ASSERT_EQ(type[GETIDX(2)], 2); + ASSERT_EQ(type[GETIDX(3)], 2); + ASSERT_EQ(type[GETIDX(4)], 1); + ASSERT_EQ(type[GETIDX(5)], 2); + ASSERT_EQ(type[GETIDX(6)], 2); + ASSERT_EQ(type[GETIDX(7)], 1); + ASSERT_EQ(type[GETIDX(8)], 2); + ASSERT_EQ(type[GETIDX(9)], 2); + ASSERT_EQ(type[GETIDX(10)], 3); + ASSERT_EQ(type[GETIDX(11)], 3); + ASSERT_EQ(type[GETIDX(12)], 4); + + BEGIN_HIDE_OUTPUT(); + command("group two id 7:10"); + command("delete_atoms group two compress no"); + command("write_restart test_atom_styles.restart"); + command("clear"); + command("molecule twomols h2o.mol co2.mol offset 2 1 1 0 0"); + command("read_restart test_atom_styles.restart"); + command("replicate 1 1 2 bbox"); + END_HIDE_OUTPUT(); + ASSERT_THAT(std::string(lmp->atom->atom_style), Eq("hybrid")); + ASSERT_NE(lmp->atom->avec, nullptr); + ASSERT_EQ(lmp->atom->natoms, 16); + + ASSERT_EQ(lmp->atom->nlocal, 16); + ASSERT_EQ(lmp->atom->nghost, 0); + ASSERT_NE(lmp->atom->nmax, -1); + ASSERT_EQ(lmp->atom->tag_enable, 1); + ASSERT_EQ(lmp->atom->molecular, Atom::TEMPLATE); + ASSERT_EQ(lmp->atom->ntypes, 4); + ASSERT_EQ(lmp->atom->nbonds, 8); + ASSERT_EQ(lmp->atom->nangles, 4); + ASSERT_EQ(lmp->atom->tag_consecutive(), 0); + ASSERT_EQ(lmp->atom->map_tag_max, 24); + + type = lmp->atom->type; + molecule = lmp->atom->molecule; + molindex = lmp->atom->molindex; + molatom = lmp->atom->molatom; + ASSERT_EQ(type[GETIDX(1)], 1); + ASSERT_EQ(type[GETIDX(2)], 2); + ASSERT_EQ(type[GETIDX(3)], 2); + ASSERT_EQ(type[GETIDX(4)], 1); + ASSERT_EQ(type[GETIDX(5)], 2); + ASSERT_EQ(type[GETIDX(6)], 2); + ASSERT_EQ(type[GETIDX(11)], 3); + ASSERT_EQ(type[GETIDX(12)], 4); + ASSERT_EQ(type[GETIDX(13)], 1); + ASSERT_EQ(type[GETIDX(14)], 2); + ASSERT_EQ(type[GETIDX(15)], 2); + ASSERT_EQ(type[GETIDX(16)], 1); + ASSERT_EQ(type[GETIDX(17)], 2); + ASSERT_EQ(type[GETIDX(18)], 2); + ASSERT_EQ(type[GETIDX(23)], 3); + ASSERT_EQ(type[GETIDX(24)], 4); + + ASSERT_EQ(molindex[GETIDX(1)], 0); + ASSERT_EQ(molindex[GETIDX(2)], 0); + ASSERT_EQ(molindex[GETIDX(3)], 0); + ASSERT_EQ(molindex[GETIDX(4)], 0); + ASSERT_EQ(molindex[GETIDX(5)], 0); + ASSERT_EQ(molindex[GETIDX(6)], 0); + ASSERT_EQ(molindex[GETIDX(11)], -1); + ASSERT_EQ(molindex[GETIDX(12)], -1); + ASSERT_EQ(molindex[GETIDX(13)], 0); + ASSERT_EQ(molindex[GETIDX(14)], 0); + ASSERT_EQ(molindex[GETIDX(15)], 0); + ASSERT_EQ(molindex[GETIDX(16)], 0); + ASSERT_EQ(molindex[GETIDX(17)], 0); + ASSERT_EQ(molindex[GETIDX(18)], 0); + ASSERT_EQ(molindex[GETIDX(23)], -1); + ASSERT_EQ(molindex[GETIDX(24)], -1); + ASSERT_EQ(molatom[GETIDX(1)], 0); + ASSERT_EQ(molatom[GETIDX(2)], 1); + ASSERT_EQ(molatom[GETIDX(3)], 2); + ASSERT_EQ(molatom[GETIDX(4)], 0); + ASSERT_EQ(molatom[GETIDX(5)], 1); + ASSERT_EQ(molatom[GETIDX(6)], 2); + ASSERT_EQ(molatom[GETIDX(11)], -1); + ASSERT_EQ(molatom[GETIDX(12)], -1); + ASSERT_EQ(molatom[GETIDX(13)], 0); + ASSERT_EQ(molatom[GETIDX(14)], 1); + ASSERT_EQ(molatom[GETIDX(15)], 2); + ASSERT_EQ(molatom[GETIDX(16)], 0); + ASSERT_EQ(molatom[GETIDX(17)], 1); + ASSERT_EQ(molatom[GETIDX(18)], 2); + ASSERT_EQ(molatom[GETIDX(23)], -1); + ASSERT_EQ(molatom[GETIDX(24)], -1); + + BEGIN_HIDE_OUTPUT(); + command("reset_atoms id"); + END_HIDE_OUTPUT(); + ASSERT_EQ(lmp->atom->tag_consecutive(), 1); + ASSERT_EQ(lmp->atom->map_tag_max, 16); + + type = lmp->atom->type; + molecule = lmp->atom->molecule; + molindex = lmp->atom->molindex; + molatom = lmp->atom->molatom; + ASSERT_EQ(type[GETIDX(1)], 1); + ASSERT_EQ(type[GETIDX(2)], 2); + ASSERT_EQ(type[GETIDX(3)], 2); + ASSERT_EQ(type[GETIDX(4)], 1); + ASSERT_EQ(type[GETIDX(5)], 2); + ASSERT_EQ(type[GETIDX(6)], 2); + ASSERT_EQ(type[GETIDX(7)], 4); + ASSERT_EQ(type[GETIDX(8)], 3); + ASSERT_EQ(type[GETIDX(9)], 1); + ASSERT_EQ(type[GETIDX(10)], 2); + ASSERT_EQ(type[GETIDX(11)], 2); + ASSERT_EQ(type[GETIDX(12)], 1); + ASSERT_EQ(type[GETIDX(13)], 2); + ASSERT_EQ(type[GETIDX(14)], 2); + ASSERT_EQ(type[GETIDX(15)], 4); + ASSERT_EQ(type[GETIDX(16)], 3); + ASSERT_EQ(molatom[GETIDX(1)], 0); + ASSERT_EQ(molatom[GETIDX(2)], 1); + ASSERT_EQ(molatom[GETIDX(3)], 2); + ASSERT_EQ(molatom[GETIDX(4)], 0); + ASSERT_EQ(molatom[GETIDX(5)], 1); + ASSERT_EQ(molatom[GETIDX(6)], 2); + ASSERT_EQ(molatom[GETIDX(7)], -1); + ASSERT_EQ(molatom[GETIDX(8)], -1); + ASSERT_EQ(molatom[GETIDX(9)], 0); + ASSERT_EQ(molatom[GETIDX(10)], 1); + ASSERT_EQ(molatom[GETIDX(11)], 2); + ASSERT_EQ(molatom[GETIDX(12)], 0); + ASSERT_EQ(molatom[GETIDX(13)], 1); + ASSERT_EQ(molatom[GETIDX(14)], 2); + ASSERT_EQ(molatom[GETIDX(15)], -1); + ASSERT_EQ(molatom[GETIDX(16)], -1); +} + TEST_F(AtomStyleTest, bond) { if (!LAMMPS::is_installed_pkg("MOLECULE")) GTEST_SKIP(); From 4b43f90bd6901ab2e82b8c2ba2c608db3def3b0b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 11:39:24 -0400 Subject: [PATCH 121/294] revert changes requested by @akohlmey --- src/REACTION/fix_bond_react.cpp | 2 -- src/REACTION/fix_bond_react.h | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 2b853f1f3a..532724f26a 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -611,8 +611,6 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : FixBondReact::~FixBondReact() { - if (copymode) return; // needed for KOKKOS [alphataubio,2024/08] - for (int i = 0; i < narrhenius; i++) { delete rrhandom[i]; } diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 1faf4ff266..c3a92d91a0 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -11,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing Author: Jacob Gissinger (jgissing@stevens.edu) +------------------------------------------------------------------------- */ + #ifdef FIX_CLASS // clang-format off FixStyle(bond/react,FixBondReact); @@ -51,7 +55,7 @@ class FixBondReact : public Fix { double compute_vector(int) override; double memory_usage() override; - protected: + private: int newton_bond; int nreacts; int *nevery; From 3c5b4ebccbfb1ae8b7ee0e3b1ced8edcff28bd62 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 11:43:08 -0400 Subject: [PATCH 122/294] Delete charmm36.py --- tools/charmm36/charmm36.py | 201 ------------------------------------- 1 file changed, 201 deletions(-) delete mode 100644 tools/charmm36/charmm36.py diff --git a/tools/charmm36/charmm36.py b/tools/charmm36/charmm36.py deleted file mode 100644 index b003046c56..0000000000 --- a/tools/charmm36/charmm36.py +++ /dev/null @@ -1,201 +0,0 @@ - -# python3 charmm36.py | gzip -9 > charmm_c36_jul24.gz - -################################################################ -# FIXME: dihedral weighting factor -################################################################ - -import re - -mass = dict() - -# BONDS -# V(bond) = Kb(b - b0)**2 -# Kb: kcal/mole/A**2 -# b0: A -bond = dict() - -# ANGLES -# V(angle) = Ktheta(Theta - Theta0)**2 -# V(Urey-Bradley) = Kub(S - S0)**2 -# Ktheta: kcal/mole/rad**2 -# Theta0: degrees -# Kub: kcal/mole/A**2 (Urey-Bradley) -# S0: A -angle = dict() - -# DIHEDRALS -# V(dihedral) = Kchi(1 + cos(n(chi) - delta)) -# Kchi: kcal/mole -# n: multiplicity -# delta: degrees -dihedral = dict() - -# IMPROPER -# V(improper) = Kpsi(psi - psi0)**2 -# Kpsi: kcal/mole/rad**2 -# psi0: degrees -# note that the second column of numbers (0) is ignored -improper = dict() - -# NONBONDED nbxmod 5 atom cdiel fshift vatom vdistance vfswitch - -# cutnb 14.0 ctofnb 12.0 ctonnb 10.0 eps 1.0 e14fac 1.0 wmin 1.5 -# V(Lennard-Jones) = Eps,i,j[(Rmin,i,j/ri,j)**12 - 2(Rmin,i,j/ri,j)**6] -# epsilon: kcal/mole, Eps,i,j = sqrt(eps,i * eps,j) -# Rmin/2: A, Rmin,i,j = Rmin/2,i + Rmin/2,j -# atom ignored epsilon Rmin/2 ignored eps,1-4 Rmin/2,1-4 -pair = dict() - -prms = [ - "par_all36m_prot.prm", - "par_all36_na.prm", - #"par_all36_carb.prm", - "par_all36_lipid.prm", - "par_all36_cgenff.prm", - #"toppar_all36_moreions.str", - #"toppar/par_interface.prm", - "toppar_water_ions.str"] - -#prms = ["par_all36_lipid.prm"] - -for prm in prms: - - file = open(prm, "r") - - for line in file: - - match = re.search(r"^MASS\s+-1\s+(\w+)\s+(-?\d+.\d+).*", line) - if( match != None ): - mass.update( {match.group(1): match.group(2)} ) - - match = re.search(r"^(\w+)\s+(\w+)\s+(\d+.\d+)\s+(\d+.\d+)\s+.*", line) - if( match != None ): - bond.update( {"{}-{}".format(match.group(1),match.group(2)) : - "{} {}".format(match.group(3),match.group(4))} ) - - match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+).*", line) - if( match != None ): - angle.update( {"{}-{}-{}".format(match.group(1),match.group(2),match.group(3)) : - "{} {} {} {}".format(match.group(4),match.group(5),match.group(6),match.group(7))} ) - - match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(-?\d+.\d+)\s+(\d+)\s+(\d+).*", line) - if( match != None ): - dihedral.update( {"{}-{}-{}-{}".format(match.group(1),match.group(2),match.group(3),match.group(4)) : - "{} {} {} 1.00".format(match.group(5),match.group(6),match.group(7))} ) - - match = re.search(r"^(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\d+.\d+)\s+0\s+(\d+.\d+).*", line) - if( match != None ): - improper.update( {"{}-{}-{}-{}".format(match.group(1),match.group(2),match.group(3),match.group(4)) : - "{} {}".format(match.group(5),match.group(6))} ) - - match = re.search(r"^(\w+)\s+0.0+\s+-(\d+.\d+)\s+(\d+.\d+).*", line) - if( match != None ): - pair.update( {match.group(1) : - "{} {:.15} {} {:.15}".format(match.group(2),float(match.group(3))*1.7817974362806774,match.group(2),float(match.group(3))*1.7817974362806774)} ) - - match = re.search(r"^(\w+)\s+0.0+\s+-(\d+.\d+)\s+(\d+.\d+)\s+0.0\s+-(\d+.\d+)\s+(\d+.\d+).*", line) - if( match != None ): - pair.update( {match.group(1) : - "{} {:.15} {} {:.15}".format(match.group(2),float(match.group(3))*1.7817974362806774,match.group(4),float(match.group(5))*1.7817974362806774)} ) - - - file.close() - -#44 atoms -#11 atom types -#42 bonds -#15 bond types -#74 angles -#29 angle types -#100 dihedrals -#36 dihedral types -#44 impropers -#13 improper types - -# Header - -print( "LAMMPS CHARMM36 force field (toppar_c36_jul24.tgz) [https://mackerell.umaryland.edu/charmm_ff.shtml]\n" ) - -print( " ", len(mass), " atom types" ) -print( " ", len(bond), " bond types" ) -print( " ", len(angle), " angle types" ) -print( " ", len(dihedral), " dihedral types" ) -print( " ", len(improper), " improper types" ) - -# -------- Atom Type Labels -------- -print( "\nAtom Type Labels\n" ) -i=1 -for k in mass.keys(): - print(" ", i, k) - i+=1 - -# -------- Masses -------- -print( "\nMasses\n" ) -i=1 -for v in mass.values(): - print(" ", i, v) - i+=1 - -# -------- Bond Type Labels -------- -print( "\nBond Type Labels\n" ) -i=1 -for k in bond.keys(): - print(" ", i, k) - i+=1 - -# -------- Bond Coeffs -------- -print( "\nBond Coeffs # harmonic\n" ) -i=1 -for v in bond.values(): - print(" ", i, v) - i+=1 - -# -------- Angle Type Labels -------- -print( "\nAngle Type Labels\n" ) -i=1 -for k in angle.keys(): - print(" ", i, k) - i+=1 - -# -------- Angle Coeffs -------- -print( "\nAngle Coeffs # charmm\n" ) -i=1 -for v in angle.values(): - print(" ", i, v) - i+=1 - -# -------- Dihedral Type Labels -------- -print( "\nDihedral Type Labels\n" ) -i=1 -for k in dihedral.keys(): - print(" ", i, k) - i+=1 - -# -------- Dihedral Coeffs -------- -print( "\nDihedral Coeffs # charmmfsw\n" ) -i=1 -for v in dihedral.values(): - print(" ", i, v) - i+=1 - -# -------- Improper Type Labels -------- -print( "\nImproper Type Labels\n" ) -i=1 -for k in improper.keys(): - print(" ", i, k) - i+=1 - -# -------- Improper Coeffs -------- -print( "\nImproper Coeffs # harmonic\n" ) -i=1 -for v in improper.values(): - print(" ", i, v) - i+=1 - -# -------- Pair Coeffs -------- -print( "\nPair Coeffs\n" ) -i=1 -for k in mass.keys(): - print(" ", i, pair[k]) - i+=1 - From 5c8552f4cedb91479c8ba33975821b195734e2f1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 12:21:10 -0400 Subject: [PATCH 123/294] Update atom_vec_template.cpp --- src/MOLECULE/atom_vec_template.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/MOLECULE/atom_vec_template.cpp b/src/MOLECULE/atom_vec_template.cpp index 8c646b27c0..dc450a30ff 100644 --- a/src/MOLECULE/atom_vec_template.cpp +++ b/src/MOLECULE/atom_vec_template.cpp @@ -29,7 +29,6 @@ AtomVecTemplate::AtomVecTemplate(LAMMPS *lmp) : AtomVec(lmp) atom->molecule_flag = 1; atom->molindex_flag = 1; atom->molatom_flag = 1; - atom->q_flag = 1; // strings with peratom variables to include in each AtomVec method // strings cannot contain fields in corresponding AtomVec default strings From 9ad3e59c8f9299211aefd5cffc0c7cf0aa181b26 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 12:38:32 -0400 Subject: [PATCH 124/294] Update test_atom_styles.cpp --- unittest/formats/test_atom_styles.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp index 921d469e31..293574c7e0 100644 --- a/unittest/formats/test_atom_styles.cpp +++ b/unittest/formats/test_atom_styles.cpp @@ -347,7 +347,6 @@ void ASSERT_ATOM_STATE_EQ(Atom *atom, const AtomState &expected) ASSERT_ARRAY_ALLOCATED(atom->x, expected.has_x); ASSERT_ARRAY_ALLOCATED(atom->v, expected.has_v); ASSERT_ARRAY_ALLOCATED(atom->f, expected.has_f); - ASSERT_ARRAY_ALLOCATED(atom->q, expected.q_flag); ASSERT_ARRAY_ALLOCATED(atom->mu, expected.mu_flag); ASSERT_ARRAY_ALLOCATED(atom->omega, expected.omega_flag); From 2c2ce1b3273a52cc44d585779afbc5ebeba69ee3 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 13:15:13 -0400 Subject: [PATCH 125/294] Update create_atoms.cpp --- src/create_atoms.cpp | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index f7a1be44eb..d6c402f721 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -303,7 +303,7 @@ void CreateAtoms::command(int narg, char **arg) if (onemol->xflag == 0) error->all(FLERR, "Create_atoms molecule must have coordinates"); if (onemol->typeflag == 0) error->all(FLERR, "Create_atoms molecule must have atom types"); if (ntype + onemol->ntypes <= 0 || ntype + onemol->ntypes > atom->ntypes) - error->all(FLERR, "Invalid atom type {} in create_atoms mol command", ntype + onemol->ntypes); + error->all(FLERR, "Invalid atom type in create_atoms mol command"); if (onemol->tag_require && !atom->tag_enable) error->all(FLERR, "Create_atoms molecule has atom IDs, but system does not"); if (atom->molecular == Atom::TEMPLATE && onemol != atom->avec->onemols[0]) @@ -319,7 +319,11 @@ void CreateAtoms::command(int narg, char **arg) memory->create(xmol, onemol->natoms, 3, "create_atoms:xmol"); } - if (style == MESH && scaleflag) error->all(FLERR, "Create_atoms mesh must use 'units box' option"); + if (style == MESH) { + if (mode == MOLECULE) + error->all(FLERR, "Create_atoms mesh is not compatible with the 'mol' option"); + if (scaleflag) error->all(FLERR, "Create_atoms mesh must use 'units box' option"); + } ranlatt = nullptr; if (subsetflag != NONE) ranlatt = new RanMars(lmp, subsetseed + comm->me); @@ -964,12 +968,7 @@ int CreateAtoms::add_bisection(const double vert[3][3], tagint molid) if ((center[0] >= sublo[0]) && (center[0] < subhi[0]) && (center[1] >= sublo[1]) && (center[1] < subhi[1]) && (center[2] >= sublo[2]) && (center[2] < subhi[2])) { - if (mode == ATOM) atom->avec->create_atom(ntype, center); - else { - get_xmol(center); - add_molecule(); - } - + atom->avec->create_atom(ntype, center); int idx = atom->nlocal - 1; if (atom->radius_flag) atom->radius[idx] = ravg * radscale; if (atom->molecule_flag) atom->molecule[idx] = molid; @@ -1051,12 +1050,7 @@ int CreateAtoms::add_quasirandom(const double vert[3][3], tagint molid) if ((point[0] >= sublo[0]) && (point[0] < subhi[0]) && (point[1] >= sublo[1]) && (point[1] < subhi[1]) && (point[2] >= sublo[2]) && (point[2] < subhi[2])) { - if (mode == ATOM) atom->avec->create_atom(ntype, point); - else { - get_xmol(point); - add_molecule(); - } - + atom->avec->create_atom(ntype, point); int idx = atom->nlocal - 1; if (atom->molecule_flag) atom->molecule[idx] = molid; if (atom->radius_flag) atom->radius[idx] = rad * radscale; From 81db0a4a8048377c054d295507e6b391cab55c91 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:16:41 -0400 Subject: [PATCH 126/294] Update fix-timestep-wall_region_morse.yaml --- unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml index 05ca96933c..6bb6e8ce40 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_morse.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 13:00:18 2024 epsilon: 4e-14 skip_tests: From c059961b8faf9f6898c9e97c0483f50836bbf295 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:17:39 -0400 Subject: [PATCH 127/294] Update fix-timestep-wall_region_lj1043.yaml --- unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml index df1e017a2b..7b900ce6a3 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj1043.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 13:00:12 2024 epsilon: 4e-14 skip_tests: From bf12caf37a6b6834802d97d95685a5b0547dfc1b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:18:02 -0400 Subject: [PATCH 128/294] Update fix-timestep-wall_region_lj126.yaml --- unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml index 298512aad8..2dcb9eccbb 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj126.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 12:59:56 2024 epsilon: 4e-14 skip_tests: From 6f068fc067bfcb0c6e430ce603b688fc2b27f780 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:19:53 -0400 Subject: [PATCH 129/294] Update fix-timestep-wall_region_harmonic.yaml --- .../force-styles/tests/fix-timestep-wall_region_harmonic.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml index b175d40128..46455f68a0 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_harmonic.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 12:59:45 2024 epsilon: 4e-14 skip_tests: From 86fc79d2def46db8b93f65169560f51d79407f00 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:20:09 -0400 Subject: [PATCH 130/294] Update fix-timestep-wall_lj93_const.yaml --- unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml index a5eff76773..5431a8e0a8 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Fri Aug 2 23:56:34 2024 epsilon: 2e-14 skip_tests: From 59db2e0af89e79412456b224be9fc8374dcdc7d3 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:35:28 -0400 Subject: [PATCH 131/294] Update atom_vec_template.cpp --- src/MOLECULE/atom_vec_template.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/MOLECULE/atom_vec_template.cpp b/src/MOLECULE/atom_vec_template.cpp index dc450a30ff..489756bf4c 100644 --- a/src/MOLECULE/atom_vec_template.cpp +++ b/src/MOLECULE/atom_vec_template.cpp @@ -35,14 +35,14 @@ AtomVecTemplate::AtomVecTemplate(LAMMPS *lmp) : AtomVec(lmp) // order of fields in the string does not matter // except fields_data_atom and fields_data_vel which must match data file - fields_grow = {"q","molecule", "molindex", "molatom"}; - fields_copy = {"q","molecule", "molindex", "molatom"}; - fields_border = {"q","molecule", "molindex", "molatom"}; - fields_border_vel = {"q","molecule", "molindex", "molatom"}; - fields_exchange = {"q","molecule", "molindex", "molatom"}; - fields_restart = {"q","molecule", "molindex", "molatom"}; - fields_create = {"q","molecule", "molindex", "molatom"}; - fields_data_atom = {"id", "molecule", "type", "q", "x", "molindex", "molatom"}; + fields_grow = {"molecule", "molindex", "molatom"}; + fields_copy = {"molecule", "molindex", "molatom"}; + fields_border = {"molecule", "molindex", "molatom"}; + fields_border_vel = {"molecule", "molindex", "molatom"}; + fields_exchange = {"molecule", "molindex", "molatom"}; + fields_restart = {"molecule", "molindex", "molatom"}; + fields_create = {"molecule", "molindex", "molatom"}; + fields_data_atom = {"id", "molecule", "type", "x", "molindex", "molatom"}; fields_data_vel = {"id", "v"}; setup_fields(); From 1799f887d2b4968a63d56809ad44f5caf14f68c8 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:36:39 -0400 Subject: [PATCH 132/294] Update test_atom_styles.cpp --- unittest/formats/test_atom_styles.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp index 293574c7e0..f41584876b 100644 --- a/unittest/formats/test_atom_styles.cpp +++ b/unittest/formats/test_atom_styles.cpp @@ -348,6 +348,7 @@ void ASSERT_ATOM_STATE_EQ(Atom *atom, const AtomState &expected) ASSERT_ARRAY_ALLOCATED(atom->v, expected.has_v); ASSERT_ARRAY_ALLOCATED(atom->f, expected.has_f); ASSERT_ARRAY_ALLOCATED(atom->mu, expected.mu_flag); + ASSERT_ARRAY_ALLOCATED(atom->q, expected.q_flag); ASSERT_ARRAY_ALLOCATED(atom->omega, expected.omega_flag); ASSERT_ARRAY_ALLOCATED(atom->angmom, expected.angmom_flag); From 70dad2848157e9d223676862a305e1baad56368f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:45:12 -0400 Subject: [PATCH 133/294] Update fix_recenter.cpp --- src/fix_recenter.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index 76181c04d4..ab9fde7031 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -217,10 +217,7 @@ void FixRecenter::initial_integrate_respa(int vflag, int ilevel, int /*iloop*/) // outermost level - operate recenter // all other levels - nothing - //if (ilevel == nlevels_respa-1) initial_integrate(vflag); - // FIXME: why does always calling initial_integrate make respa tests - // pass, i dont know ! - initial_integrate(vflag); + if (ilevel == nlevels_respa-1) initial_integrate(vflag); } From 13ceebe82cf63316ef35f65a9ce1b2de0acc5f66 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 3 Oct 2024 18:45:52 -0400 Subject: [PATCH 134/294] fix recenter test skipped for respa --- unittest/force-styles/test_fix_timestep.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unittest/force-styles/test_fix_timestep.cpp b/unittest/force-styles/test_fix_timestep.cpp index 7d12d0b20c..2b04ea81b5 100644 --- a/unittest/force-styles/test_fix_timestep.cpp +++ b/unittest/force-styles/test_fix_timestep.cpp @@ -430,7 +430,8 @@ TEST(FixTimestep, plain) // fix nve/limit cannot work with r-RESPA ifix = lmp->modify->get_fix_by_id("test"); if (ifix && !utils::strmatch(ifix->style, "^rigid") && - !utils::strmatch(ifix->style, "^nve/limit")) { + !utils::strmatch(ifix->style, "^nve/limit") && + !utils::strmatch(ifix->style, "^recenter")) { if (!verbose) ::testing::internal::CaptureStdout(); cleanup_lammps(lmp, test_config); if (!verbose) ::testing::internal::GetCapturedStdout(); From 39def85b48a15f79ba748d5848d2d36cb883aa18 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 4 Oct 2024 13:04:52 -0400 Subject: [PATCH 135/294] Update fix_cmap.rst --- doc/src/fix_cmap.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/fix_cmap.rst b/doc/src/fix_cmap.rst index 8d354cce76..ccca88232b 100644 --- a/doc/src/fix_cmap.rst +++ b/doc/src/fix_cmap.rst @@ -144,6 +144,12 @@ outermost level. MUST not disable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" From d27026c364be9d2e4af89c9cc8906c864ff0bc2b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 4 Oct 2024 13:05:26 -0400 Subject: [PATCH 136/294] Update fix_nve_limit.rst --- doc/src/fix_nve_limit.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/fix_nve_limit.rst b/doc/src/fix_nve_limit.rst index e46c7e5ea2..3533a63ebc 100644 --- a/doc/src/fix_nve_limit.rst +++ b/doc/src/fix_nve_limit.rst @@ -82,6 +82,12 @@ is "extensive". No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none From 921d9df57446ae6355a0b62b0e77fc47d3ff05c8 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 4 Oct 2024 13:05:48 -0400 Subject: [PATCH 137/294] Update fix_recenter.rst --- doc/src/fix_recenter.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/fix_recenter.rst b/doc/src/fix_recenter.rst index dbef13965e..60a8a674d0 100644 --- a/doc/src/fix_recenter.rst +++ b/doc/src/fix_recenter.rst @@ -116,6 +116,12 @@ The scalar and vector values calculated by this fix are "extensive". No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" From 485cddf3037d043634bc2fb10da18d17d63c2270 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 4 Oct 2024 13:06:32 -0400 Subject: [PATCH 138/294] Update fix_wall_region.rst --- doc/src/fix_wall_region.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/fix_wall_region.rst b/doc/src/fix_wall_region.rst index 526de55202..67b9b9cdb2 100644 --- a/doc/src/fix_wall_region.rst +++ b/doc/src/fix_wall_region.rst @@ -237,6 +237,12 @@ invoked by the :doc:`minimize ` command. minimized), you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none From 95c18c5373166b7341b841e45c6353d495a3401f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 11:44:57 -0400 Subject: [PATCH 139/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 49 +++------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index faf8614bda..f6a869feff 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -47,9 +47,7 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : { kokkosable = 1; - // FIXME: test/bugfix pack_exchange_kokkos() and unpack_exchange_kokkos() - //exchange_comm_device = sort_device = 1; - sort_device = 1; + exchange_comm_device = sort_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -350,8 +348,6 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) con r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); a2sq = a2x*a2x + a2y*a2y + a2z*a2z; b2sq = b2x*b2x + b2y*b2y + b2z*b2z; - //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) - // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; @@ -417,7 +413,7 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) con double ecmapKK = 0.0; -// FIXME: needed for compute_scalar() + // needed for compute_scalar() double engfraction = 0.2 * E; if (i1 < nlocal) ecmapKK += engfraction; if (i2 < nlocal) ecmapKK += engfraction; @@ -425,9 +421,6 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) con if (i4 < nlocal) ecmapKK += engfraction; if (i5 < nlocal) ecmapKK += engfraction; - //std::cerr << fmt::format("*** i {} {} {} {} {} nlocal {} E {} ecmapKK {}\n", - //i1,i2,i3,i4,i5,nlocal,E,ecmapKK); - // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; @@ -733,26 +726,9 @@ int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) } /* ---------------------------------------------------------------------- - (KOKKOS) pack values in local atom-based array for exchange + pack values in local atom-based array for exchange ------------------------------------------------------------------------- */ -/* -int FixCMAP::pack_exchange(int i, double *buf) -{ - int n = 0; - buf[n++] = ubuf(num_crossterm[i]).d; - for (int m = 0; m < num_crossterm[i]; m++) { - buf[n++] = ubuf(crossterm_type[i][m]).d; - buf[n++] = ubuf(crossterm_atom1[i][m]).d; - buf[n++] = ubuf(crossterm_atom2[i][m]).d; - buf[n++] = ubuf(crossterm_atom3[i][m]).d; - buf[n++] = ubuf(crossterm_atom4[i][m]).d; - buf[n++] = ubuf(crossterm_atom5[i][m]).d; - } - return n; -} -*/ - template int FixCMAPKokkos::pack_exchange_kokkos( const int &nsend, DAT::tdual_xfloat_2d &k_buf, @@ -811,26 +787,9 @@ int FixCMAPKokkos::pack_exchange_kokkos( } /* ---------------------------------------------------------------------- - (KOKKOS) unpack values in local atom-based array from exchange + unpack values in local atom-based array from exchange ------------------------------------------------------------------------- */ -/* -int FixCMAP::unpack_exchange(int nlocal, double *buf) -{ - int n = 0; - num_crossterm[nlocal] = (int) ubuf(buf[n++]).i; - for (int m = 0; m < num_crossterm[nlocal]; m++) { - crossterm_type[nlocal][m] = (int) ubuf(buf[n++]).i; - crossterm_atom1[nlocal][m] = (tagint) ubuf(buf[n++]).i; - crossterm_atom2[nlocal][m] = (tagint) ubuf(buf[n++]).i; - crossterm_atom3[nlocal][m] = (tagint) ubuf(buf[n++]).i; - crossterm_atom4[nlocal][m] = (tagint) ubuf(buf[n++]).i; - crossterm_atom5[nlocal][m] = (tagint) ubuf(buf[n++]).i; - } - return n; -} -*/ - template void FixCMAPKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, From 03c3bfec3a59a5708403b564881495fa3c7f7fc6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 11:45:41 -0400 Subject: [PATCH 140/294] Update fix_nve_limit_kokkos.cpp --- src/KOKKOS/fix_nve_limit_kokkos.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index ed0bb44704..942ee41f3a 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy, alphataubio at gmail com + Contributing author: Mitch Murphy, alphataubio at gmail ------------------------------------------------------------------------- */ #include "fix_nve_limit_kokkos.h" @@ -33,10 +33,6 @@ FixNVELimitKokkos::FixNVELimitKokkos(LAMMPS *lmp, int narg, char **a FixNVELimit(lmp, narg, arg) { kokkosable = 1; - - //FIXME: unit test fails when i turn this on - //fuse_integrate_flag = 1; - execution_space = ExecutionSpaceFromDevice::space; atomKK = (AtomKokkos *) atom; datamask_read = EMPTY_MASK; From 81f96d3ea348af98ce08c738075220ccb2cffece Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 11:48:18 -0400 Subject: [PATCH 141/294] Update fix_recenter_kokkos.h --- src/KOKKOS/fix_recenter_kokkos.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h index 4e28c41d18..589da4b33f 100644 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -32,12 +32,7 @@ template class FixRecenterKokkos : public FixRecenter { public: FixRecenterKokkos(class LAMMPS *, int, char **); - void initial_integrate(int) override; - - private: - //typename ArrayTypes::t_x_array d_x; - //typename ArrayTypes::t_int_1d d_mask; }; } // namespace LAMMPS_NS From 84b50bfd267ea06f86ddf754b4d67f59af62e263 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 11:59:50 -0400 Subject: [PATCH 142/294] Update region_sphere_kokkos.cpp --- src/KOKKOS/region_sphere_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 13dd00957a..b442bb7ae5 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -64,8 +64,8 @@ void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_i copymode = 1; - // FIXME: capture lambda reference to KOKKOS_INLINE_FUNCTION match() - // workaround: KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA + // capture lambda reference to KOKKOS_INLINE_FUNCTION match() + // use KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA // https://github.com/kokkos/kokkos/issues/695 Kokkos::parallel_for(atom->nlocal, KOKKOS_CLASS_LAMBDA( const int &i ) { From de8396ea556c486089ee9f0a6d70dd0b6946d93b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 12:01:34 -0400 Subject: [PATCH 143/294] Update fix_wall_region.h --- src/fix_wall_region.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fix_wall_region.h b/src/fix_wall_region.h index c95cc65fa9..fb07f00ee9 100644 --- a/src/fix_wall_region.h +++ b/src/fix_wall_region.h @@ -43,7 +43,7 @@ class FixWallRegion : public Fix { double epsilon, sigma, cutoff; double alpha; int eflag; - double *ewall, ewall_all[4]; // need ewall double*, not double[] for kokkos dual view + double ewall[4], ewall_all[4]; int ilevel_respa; char *idregion; class Region *region; From 2d261388c840c81249b2895581b59560dc149b1b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 12:02:00 -0400 Subject: [PATCH 144/294] Update fix_wall_region.cpp --- src/fix_wall_region.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fix_wall_region.cpp b/src/fix_wall_region.cpp index 542e1cc445..8c245a3ed6 100644 --- a/src/fix_wall_region.cpp +++ b/src/fix_wall_region.cpp @@ -50,7 +50,6 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) : virial_global_flag = virial_peratom_flag = 1; respa_level_support = 1; ilevel_respa = 0; - ewall = new double[4]; // parse args @@ -103,7 +102,6 @@ FixWallRegion::~FixWallRegion() { if (copymode) return; delete[] idregion; - delete[] ewall; } /* ---------------------------------------------------------------------- */ From 58bea0691b0307140f1fdc80dc0704057d795a95 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 13:55:33 -0400 Subject: [PATCH 145/294] oops --- src/KOKKOS/fix_wall_region_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 49fa49f7d8..ccb20b52d4 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -57,7 +57,7 @@ FixWallRegionKokkos::FixWallRegionKokkos(LAMMPS *lmp, int narg, char datamask_read = X_MASK | V_MASK | MASK_MASK; datamask_modify = F_MASK; - memoryKK->create_kokkos(k_ewall,ewall,4,"wall_region:ewall"); + memoryKK->create_kokkos(k_ewall,4,"wall_region:ewall"); d_ewall = k_ewall.template view(); } @@ -67,7 +67,7 @@ FixWallRegionKokkos::~FixWallRegionKokkos() if (copymode) return; memoryKK->destroy_kokkos(k_vatom,vatom); - memoryKK->destroy_kokkos(k_ewall,ewall); + memoryKK->destroy_kokkos(k_ewall); } /* ---------------------------------------------------------------------- */ From 1f61c427451c984f70ab8bf94a894743cad69f2f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 16:40:10 -0400 Subject: [PATCH 146/294] add GroupKokkos --- cmake/Modules/Packages/KOKKOS.cmake | 1 + src/KOKKOS/group_kokkos.cpp | 147 ++++++++++++++++++++++++++++ src/KOKKOS/group_kokkos.h | 32 ++++++ 3 files changed, 180 insertions(+) create mode 100644 src/KOKKOS/group_kokkos.cpp create mode 100644 src/KOKKOS/group_kokkos.h diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index adb3abab6b..ddd2daefcd 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -130,6 +130,7 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/group_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp new file mode 100644 index 0000000000..3a3a7b8288 --- /dev/null +++ b/src/KOKKOS/group_kokkos.cpp @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "group_kokkos.h" + +#include "atom_kokkos.h" +#include "domain_kokkos.h" +#include "kokkos_few.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +GroupKokkos::GroupKokkos(LAMMPS *lmp) : Group(lmp) +{ + atomKK = (AtomKokkos *)atom; +} + +// ---------------------------------------------------------------------- +// computations on a group of atoms +// ---------------------------------------------------------------------- + +/* ---------------------------------------------------------------------- + compute the total mass of group of atoms + use either per-type mass or per-atom rmass +------------------------------------------------------------------------- */ + +template +double GroupKokkos::mass(int igroup) +{ + int groupbit = bitmask[igroup]; + + auto d_mass = atomKK->k_mass.template view(); + auto d_rmass = atomKK->k_rmass.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_type = atomKK->k_type.template view(); + + double one = 0.0; + + if (atomKK->rmass) { + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_rmass(i); + }, one); + + } else { + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { + if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); + }, one); + + } + + double all; + MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world); + return all; +} + + +/* ---------------------------------------------------------------------- + compute the center-of-mass coords of group of atoms + masstotal = total mass + return center-of-mass coords in cm[] + must unwrap atoms to compute center-of-mass correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::xcm(int igroup, double masstotal, double *cm) +{ + int groupbit = bitmask[igroup]; + + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_type = atomKK->k_type.template view(); + auto d_image = atomKK->k_image.template view(); + auto d_mass = atomKK->k_mass.template view(); + auto d_rmass = atomKK->k_rmass.template view(); + + auto l_prd = domain->prd; + auto l_h = domain->h; + auto l_triclinic = domain->triclinic; + + double cmone[3]; + + if (atomKK->rmass) { + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } else { + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + l_cmx += unwrapKK[0] * massone; + l_cmy += unwrapKK[1] * massone; + l_cmz += unwrapKK[2] * massone; + } + }, cmone[0], cmone[1], cmone[2]); + + } + + MPI_Allreduce(cmone, cm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + cm[0] /= masstotal; + cm[1] /= masstotal; + cm[2] /= masstotal; + } +} + +namespace LAMMPS_NS { +template class GroupKokkos; +#ifdef LMP_KOKKOS_GPU +template class GroupKokkos; +#endif +} diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h new file mode 100644 index 0000000000..c8573b0d74 --- /dev/null +++ b/src/KOKKOS/group_kokkos.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_GROUP_KOKKOS_H +#define LMP_GROUP_KOKKOS_H + +#include "group.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class GroupKokkos : public Group { + public: + GroupKokkos(class LAMMPS *); + double mass(int); // total mass of atoms in group + void xcm(int, double, double *); // center-of-mass coords of group +}; + +} // namespace LAMMPS_NS + +#endif From 0bc554a694b0ede59df6390aae84b2279ad5e0df Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 16:40:32 -0400 Subject: [PATCH 147/294] remove FIXME --- src/KOKKOS/fix_recenter_kokkos.cpp | 19 +++++-------------- src/KOKKOS/fix_recenter_kokkos.h | 4 ++++ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index ca2607466f..4738f29be7 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -24,8 +24,7 @@ #include "modify.h" #include "update.h" #include "domain.h" -#include "group.h" -#include "kokkos_few.h" +#include "group_kokkos.h" using namespace LAMMPS_NS; @@ -39,6 +38,7 @@ FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; atomKK = (AtomKokkos *)atom; + groupKK = (GroupKokkos *)group; execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | MASK_MASK; @@ -88,18 +88,9 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) // current COM - // FIXME: make Group kokkos-aware - //double xcm[3]; - //if (group->dynamic[igroup]) - // masstotal = group->mass(igroup); - - //group->xcm(igroup,masstotal,xcm); - - /* this is needed because Group is not Kokkos-aware ! */ - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - Few tmpxcm; - group->xcm(igroup,masstotal,&tmpxcm[0]); - const Few xcm(tmpxcm); + double xcm[3]; + if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup); + groupKK->xcm(igroup,masstotal,xcm); // shift coords by difference between actual COM and requested COM diff --git a/src/KOKKOS/fix_recenter_kokkos.h b/src/KOKKOS/fix_recenter_kokkos.h index 589da4b33f..36e154e05c 100644 --- a/src/KOKKOS/fix_recenter_kokkos.h +++ b/src/KOKKOS/fix_recenter_kokkos.h @@ -24,6 +24,8 @@ FixStyle(recenter/kk/host,FixRecenterKokkos); #define LMP_FIX_RECENTER_KOKKOS_H #include "fix_recenter.h" + +#include "group_kokkos.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -33,6 +35,8 @@ class FixRecenterKokkos : public FixRecenter { public: FixRecenterKokkos(class LAMMPS *, int, char **); void initial_integrate(int) override; + private: + GroupKokkos *groupKK; }; } // namespace LAMMPS_NS From a8157fc68ee14d02fd90c49635d4480b5ea2656f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 16:41:02 -0400 Subject: [PATCH 148/294] relax epsilon for kokkos_omp to pass --- unittest/force-styles/tests/fix-timestep-recenter-init.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml index 72aeaf94c5..326cdb4f3d 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -2,7 +2,7 @@ lammps_version: 29 Aug 2024 tags: generated date_generated: Tue Oct 1 12:45:46 2024 -epsilon: 2e-13 +epsilon: 1e-12 skip_tests: prerequisites: ! | atom full From af03836dfad83e1c10f0426e535591c700da60ce Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 17:03:50 -0400 Subject: [PATCH 149/294] Update fix_cmap.cpp --- src/MOLECULE/fix_cmap.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index 8a8e4b30aa..d9859326c9 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -415,8 +415,6 @@ void FixCMAP::post_force(int vflag) r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); a2sq = a2x*a2x + a2y*a2y + a2z*a2z; b2sq = b2x*b2x + b2y*b2y + b2z*b2z; - //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) - // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) continue; dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; From bcb500894c7744fe6341ba8f666f0cc56ef4af94 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 5 Oct 2024 18:49:39 -0400 Subject: [PATCH 150/294] compute_scalar() bugfix --- src/KOKKOS/fix_cmap_kokkos.cpp | 240 ++++++++++++++------------------- src/KOKKOS/fix_cmap_kokkos.h | 2 +- 2 files changed, 99 insertions(+), 143 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index f6a869feff..0cc88960e8 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -226,28 +226,24 @@ void FixCMAPKokkos::operator()(TagFixCmapPreNeighbor, const int i, i template void FixCMAPKokkos::post_force(int vflag) { - d_x = atomKK->k_x.template view(); d_f = atomKK->k_f.template view(); atomKK->sync(execution_space,X_MASK|F_MASK); - ecmap = 0.0; int eflag = eflag_caller; ev_init(eflag,vflag); copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,ncrosstermlist), *this); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,ncrosstermlist),*this,ecmap); copymode = 0; atomKK->modified(execution_space,F_MASK); - } - /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) const +void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, double &ecmapKK) const { int i1,i2,i3,i4,i5,type; @@ -385,162 +381,122 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n) con t1 = type-1; if (t1 < 0 || t1 > 5) Kokkos::abort("Invalid CMAP crossterm_type"); - // determine the values and derivatives for the grid square points + // determine the values and derivatives for the grid square points - gs[0] = d_cmapgrid(t1,mli3,mli4); - gs[1] = d_cmapgrid(t1,mli31,mli4); - gs[2] = d_cmapgrid(t1,mli31,mli41); - gs[3] = d_cmapgrid(t1,mli3,mli41); - d1gs[0] = d_d1cmapgrid(t1,mli1,mli2); - d1gs[1] = d_d1cmapgrid(t1,mli11,mli2); - d1gs[2] = d_d1cmapgrid(t1,mli11,mli21); - d1gs[3] = d_d1cmapgrid(t1,mli1,mli21); - d2gs[0] = d_d2cmapgrid(t1,mli1,mli2); - d2gs[1] = d_d2cmapgrid(t1,mli11,mli2); - d2gs[2] = d_d2cmapgrid(t1,mli11,mli21); - d2gs[3] = d_d2cmapgrid(t1,mli1,mli21); - d12gs[0] = d_d12cmapgrid(t1,mli1,mli2); - d12gs[1] = d_d12cmapgrid(t1,mli11,mli2); - d12gs[2] = d_d12cmapgrid(t1,mli11,mli21); - d12gs[3] = d_d12cmapgrid(t1,mli1,mli21); + gs[0] = d_cmapgrid(t1,mli3,mli4); + gs[1] = d_cmapgrid(t1,mli31,mli4); + gs[2] = d_cmapgrid(t1,mli31,mli41); + gs[3] = d_cmapgrid(t1,mli3,mli41); + d1gs[0] = d_d1cmapgrid(t1,mli1,mli2); + d1gs[1] = d_d1cmapgrid(t1,mli11,mli2); + d1gs[2] = d_d1cmapgrid(t1,mli11,mli21); + d1gs[3] = d_d1cmapgrid(t1,mli1,mli21); + d2gs[0] = d_d2cmapgrid(t1,mli1,mli2); + d2gs[1] = d_d2cmapgrid(t1,mli11,mli2); + d2gs[2] = d_d2cmapgrid(t1,mli11,mli21); + d2gs[3] = d_d2cmapgrid(t1,mli1,mli21); + d12gs[0] = d_d12cmapgrid(t1,mli1,mli2); + d12gs[1] = d_d12cmapgrid(t1,mli11,mli2); + d12gs[2] = d_d12cmapgrid(t1,mli11,mli21); + d12gs[3] = d_d12cmapgrid(t1,mli1,mli21); - // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) + // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) - double E, dEdPhi, dEdPsi; - bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); + double E, dEdPhi, dEdPsi; + bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); - // sum up cmap energy contributions + // sum up cmap energy contributions - double ecmapKK = 0.0; + // needed for compute_scalar() + double engfraction = 0.2 * E; + if (i1 < nlocal) ecmapKK += engfraction; + if (i2 < nlocal) ecmapKK += engfraction; + if (i3 < nlocal) ecmapKK += engfraction; + if (i4 < nlocal) ecmapKK += engfraction; + if (i5 < nlocal) ecmapKK += engfraction; - // needed for compute_scalar() - double engfraction = 0.2 * E; - if (i1 < nlocal) ecmapKK += engfraction; - if (i2 < nlocal) ecmapKK += engfraction; - if (i3 < nlocal) ecmapKK += engfraction; - if (i4 < nlocal) ecmapKK += engfraction; - if (i5 < nlocal) ecmapKK += engfraction; + // calculate the derivatives dphi/dr_i - // calculate the derivatives dphi/dr_i + dphidr1x = 1.0*r32/a1sq*a1x; + dphidr1y = 1.0*r32/a1sq*a1y; + dphidr1z = 1.0*r32/a1sq*a1z; - dphidr1x = 1.0*r32/a1sq*a1x; - dphidr1y = 1.0*r32/a1sq*a1y; - dphidr1z = 1.0*r32/a1sq*a1z; + dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + dpr34r32/b1sq/r32*b1x; + dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + dpr34r32/b1sq/r32*b1y; + dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + dpr34r32/b1sq/r32*b1z; - dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + - dpr34r32/b1sq/r32*b1x; - dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + - dpr34r32/b1sq/r32*b1y; - dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + - dpr34r32/b1sq/r32*b1z; + dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; + dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; + dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; - dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; - dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; - dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; + dphidr4x = r32/b1sq*b1x; + dphidr4y = r32/b1sq*b1y; + dphidr4z = r32/b1sq*b1z; - dphidr4x = r32/b1sq*b1x; - dphidr4y = r32/b1sq*b1y; - dphidr4z = r32/b1sq*b1z; + // calculate the derivatives dpsi/dr_i - // calculate the derivatives dpsi/dr_i + dpsidr1x = 1.0*r43/a2sq*a2x; + dpsidr1y = 1.0*r43/a2sq*a2y; + dpsidr1z = 1.0*r43/a2sq*a2z; - dpsidr1x = 1.0*r43/a2sq*a2x; - dpsidr1y = 1.0*r43/a2sq*a2y; - dpsidr1z = 1.0*r43/a2sq*a2z; + dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; + dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; + dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; - dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; - dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; - dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; + dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; + dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; + dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; - dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; - dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; - dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; + dpsidr4x = r43/b2sq*b2x; + dpsidr4y = r43/b2sq*b2y; + dpsidr4z = r43/b2sq*b2z; - dpsidr4x = r43/b2sq*b2x; - dpsidr4y = r43/b2sq*b2y; - dpsidr4z = r43/b2sq*b2z; + // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) - // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) + f1[0] = dEdPhi*dphidr1x; + f1[1] = dEdPhi*dphidr1y; + f1[2] = dEdPhi*dphidr1z; + f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x; + f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y; + f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z; + f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x; + f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y; + f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z; + f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x; + f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y; + f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z; + f5[0] = -dEdPsi*dpsidr4x; + f5[1] = -dEdPsi*dpsidr4y; + f5[2] = -dEdPsi*dpsidr4z; - f1[0] = dEdPhi*dphidr1x; - f1[1] = dEdPhi*dphidr1y; - f1[2] = dEdPhi*dphidr1z; - f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x; - f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y; - f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z; - f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x; - f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y; - f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z; - f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x; - f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y; - f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z; - f5[0] = -dEdPsi*dpsidr4x; - f5[1] = -dEdPsi*dpsidr4y; - f5[2] = -dEdPsi*dpsidr4z; - - // apply force to each of the 5 atoms - - if (i1 < nlocal) { - d_f(i1,0) += f1[0]; - d_f(i1,1) += f1[1]; - d_f(i1,2) += f1[2]; - } - if (i2 < nlocal) { - d_f(i2,0) += f2[0]; - d_f(i2,1) += f2[1]; - d_f(i2,2) += f2[2]; - } - if (i3 < nlocal) { - d_f(i3,0) += f3[0]; - d_f(i3,1) += f3[1]; - d_f(i3,2) += f3[2]; - } - if (i4 < nlocal) { - d_f(i4,0) += f4[0]; - d_f(i4,1) += f4[1]; - d_f(i4,2) += f4[2]; - } - if (i5 < nlocal) { - d_f(i5,0) += f5[0]; - d_f(i5,1) += f5[1]; - d_f(i5,2) += f5[2]; - } - - // tally energy and/or virial - -/* - if (evflag) { - //std::cerr << "******** tally energy and/or virial\n"; - int nlist = 0; - int list[5]; - double vb54x = -1.0*vb45x; - double vb54y = -1.0*vb45y; - double vb54z = -1.0*vb45z; - double vcmap[CMAPMAX]; - - if (i1 < nlocal) list[nlist++] = i1; - if (i2 < nlocal) list[nlist++] = i2; - if (i3 < nlocal) list[nlist++] = i3; - if (i4 < nlocal) list[nlist++] = i4; - if (i5 < nlocal) list[nlist++] = i5; - vcmap[0] = (vb12x*f1[0])+(vb32x*f3[0])+((vb43x+vb32x)*f4[0])+ - ((vb54x+vb43x+vb32x)*f5[0]); - vcmap[1] = (vb12y*f1[1])+(vb32y*f3[1])+((vb43y+vb32y)*f4[1])+ - ((vb54y+vb43y+vb32y)*f5[1]); - vcmap[2] = (vb12z*f1[2])+(vb32z*f3[2])+((vb43z+vb32z)*f4[2])+ - ((vb54z+vb43z+vb32z)*f5[2]); - vcmap[3] = (vb12x*f1[1])+(vb32x*f3[1])+((vb43x+vb32x)*f4[1])+ - ((vb54x+vb43x+vb32x)*f5[1]); - vcmap[4] = (vb12x*f1[2])+(vb32x*f3[2])+((vb43x+vb32x)*f4[2])+ - ((vb54x+vb43x+vb32x)*f5[2]); - vcmap[5] = (vb12y*f1[2])+(vb32y*f3[2])+((vb43y+vb32y)*f4[2])+ - ((vb54y+vb43y+vb32y)*f5[2]); - ev_tally(nlist,list,5.0,E,vcmap); - //ev_tally(5,list,nlocal,newton_bond,E,vcmap); - } -*/ + // apply force to each of the 5 atoms + if (i1 < nlocal) { + d_f(i1,0) += f1[0]; + d_f(i1,1) += f1[1]; + d_f(i1,2) += f1[2]; } + if (i2 < nlocal) { + d_f(i2,0) += f2[0]; + d_f(i2,1) += f2[1]; + d_f(i2,2) += f2[2]; + } + if (i3 < nlocal) { + d_f(i3,0) += f3[0]; + d_f(i3,1) += f3[1]; + d_f(i3,2) += f3[2]; + } + if (i4 < nlocal) { + d_f(i4,0) += f4[0]; + d_f(i4,1) += f4[1]; + d_f(i4,2) += f4[2]; + } + if (i5 < nlocal) { + d_f(i5,0) += f5[0]; + d_f(i5,1) += f5[1]; + d_f(i5,2) += f5[2]; + } +} /* ---------------------------------------------------------------------- allocate atom-based array diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 8e2bf7e643..2e1b00d923 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -49,7 +49,7 @@ class FixCMAPKokkos : public FixCMAP, public KokkosBase { void operator()(TagFixCmapPreNeighbor, const int, int&, const bool) const; KOKKOS_INLINE_FUNCTION - void operator()(TagFixCmapPostForce, const int) const; + void operator()(TagFixCmapPostForce, const int, double&) const; void grow_arrays(int) override; void copy_arrays(int, int, int) override; From 6295fa0b547390215ce71fe4878a48cda44dff9f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 5 Oct 2024 23:07:50 -0400 Subject: [PATCH 151/294] whitespace --- src/KOKKOS/atom_kokkos.cpp | 2 -- src/KOKKOS/comm_kokkos.cpp | 2 -- src/fix_recenter.cpp | 1 - 3 files changed, 5 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index c07746b823..7529921058 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -208,10 +208,8 @@ void AtomKokkos::sort() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->sort_device) { flag = 0; - if (comm->me == 0) error->warning(FLERR,"Fix {} not compatible with Kokkos sorting on device", fix_iextra->style); - break; } } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index d8ccc34296..4cebf34eb2 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -739,10 +739,8 @@ void CommKokkos::exchange() auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; if (!fix_iextra->exchange_comm_device) { flag = 0; - if (comm->me == 0) error->warning(FLERR,"Fix {} not compatible with sending data in Kokkos communication", fix_iextra->style); - break; } } diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp index ab9fde7031..4da8c4787b 100644 --- a/src/fix_recenter.cpp +++ b/src/fix_recenter.cpp @@ -218,7 +218,6 @@ void FixRecenter::initial_integrate_respa(int vflag, int ilevel, int /*iloop*/) // all other levels - nothing if (ilevel == nlevels_respa-1) initial_integrate(vflag); - } /* ---------------------------------------------------------------------- */ From 92deb5c8dc39e3bef822f081d5158e1f1ceb4a67 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 5 Oct 2024 23:15:50 -0400 Subject: [PATCH 152/294] revert change that breaks backward compatibility --- src/MOLECULE/atom_vec_template.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MOLECULE/atom_vec_template.cpp b/src/MOLECULE/atom_vec_template.cpp index 489756bf4c..2678f44327 100644 --- a/src/MOLECULE/atom_vec_template.cpp +++ b/src/MOLECULE/atom_vec_template.cpp @@ -42,7 +42,7 @@ AtomVecTemplate::AtomVecTemplate(LAMMPS *lmp) : AtomVec(lmp) fields_exchange = {"molecule", "molindex", "molatom"}; fields_restart = {"molecule", "molindex", "molatom"}; fields_create = {"molecule", "molindex", "molatom"}; - fields_data_atom = {"id", "molecule", "type", "x", "molindex", "molatom"}; + fields_data_atom = {"id", "molecule", "molindex", "molatom", "type", "x"}; fields_data_vel = {"id", "v"}; setup_fields(); From af7f5a5b5eecf98aefc944039070c4f1188e602e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 15:03:13 -0400 Subject: [PATCH 153/294] port surface() to KOKKOS --- src/KOKKOS/region_block_kokkos.cpp | 200 ++++++++++++++++++++++++++++ src/KOKKOS/region_block_kokkos.h | 7 + src/KOKKOS/region_sphere_kokkos.cpp | 114 ++++++++++++++++ src/KOKKOS/region_sphere_kokkos.h | 7 + 4 files changed, 328 insertions(+) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index f5e7499456..d91c86fb59 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -27,6 +27,206 @@ RegBlockKokkos::RegBlockKokkos(LAMMPS *lmp, int narg, char **arg) atomKK = (AtomKokkos*) atom; } +/* ---------------------------------------------------------------------- + generate list of contact points for interior or exterior regions + if region has variable shape, invoke shape_update() once per timestep + if region is dynamic: + before: inverse transform x,y,z (unmove, then unrotate) + after: forward transform contact point xs,yx,zs (rotate, then move), + then reset contact delx,dely,delz based on new contact point + no need to do this if no rotation since delxyz doesn't change + caller is responsible for wrapping this call with + modify->clearstep_compute() and modify->addstep_compute() if needed +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegBlockKokkos::surface(double x, double y, double z, double cutoff) +{ + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior(xnear, cutoff); + else + ncontact = surface_exterior(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior(xnear, cutoff) + surface_interior(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - contact[i].delx; + ys = xnear[1] - contact[i].dely; + zs = xnear[2] - contact[i].delz; + forward_transform(xs, ys, zs); + contact[i].delx = xorig[0] - xs; + contact[i].dely = xorig[1] - ys; + contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; +} + +/* ---------------------------------------------------------------------- + contact if 0 <= x < cutoff from one or more inner surfaces of block + can be one contact for each of 6 faces + no contact if outside (possible if called from union/intersect) + delxyz = vector from nearest point on block to x +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegBlockKokkos::surface_interior(double *x, double cutoff) +{ + double delta; + + // x is exterior to block + + if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; + + // x is interior to block or on its surface + + int n = 0; + + delta = x[0] - xlo; + if (delta < cutoff && !open_faces[0]) { + contact[n].r = delta; + contact[n].delx = delta; + contact[n].dely = contact[n].delz = 0.0; + contact[n].radius = 0; + contact[n].iwall = 0; + n++; + } + delta = xhi - x[0]; + if (delta < cutoff && !open_faces[1]) { + contact[n].r = delta; + contact[n].delx = -delta; + contact[n].dely = contact[n].delz = 0.0; + contact[n].radius = 0; + contact[n].iwall = 1; + n++; + } + + delta = x[1] - ylo; + if (delta < cutoff && !open_faces[2]) { + contact[n].r = delta; + contact[n].dely = delta; + contact[n].delx = contact[n].delz = 0.0; + contact[n].radius = 0; + contact[n].iwall = 2; + n++; + } + delta = yhi - x[1]; + if (delta < cutoff && !open_faces[3]) { + contact[n].r = delta; + contact[n].dely = -delta; + contact[n].delx = contact[n].delz = 0.0; + contact[n].radius = 0; + contact[n].iwall = 3; + n++; + } + + delta = x[2] - zlo; + if (delta < cutoff && !open_faces[4]) { + contact[n].r = delta; + contact[n].delz = delta; + contact[n].delx = contact[n].dely = 0.0; + contact[n].radius = 0; + contact[n].iwall = 4; + n++; + } + delta = zhi - x[2]; + if (delta < cutoff && !open_faces[5]) { + contact[n].r = delta; + contact[n].delz = -delta; + contact[n].delx = contact[n].dely = 0.0; + contact[n].radius = 0; + contact[n].iwall = 5; + n++; + } + + return n; +} + +/* ---------------------------------------------------------------------- + one contact if 0 <= x < cutoff from outer surface of block + no contact if inside (possible if called from union/intersect) + delxyz = vector from nearest point on block to x +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegBlockKokkos::surface_exterior(double *x, double cutoff) +{ + double xp, yp, zp; + double xc, yc, zc, dist, mindist; + + // x is far enough from block that there is no contact + // x is interior to block + + if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || + x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) + return 0; + if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; + + // x is exterior to block or on its surface + // xp,yp,zp = point on surface of block that x is closest to + // could be edge or corner pt of block + // do not add contact point if r >= cutoff + + if (!openflag) { + if (x[0] < xlo) + xp = xlo; + else if (x[0] > xhi) + xp = xhi; + else + xp = x[0]; + if (x[1] < ylo) + yp = ylo; + else if (x[1] > yhi) + yp = yhi; + else + yp = x[1]; + if (x[2] < zlo) + zp = zlo; + else if (x[2] > zhi) + zp = zhi; + else + zp = x[2]; + } else { + mindist = BIG; + for (int i = 0; i < 6; i++) { + if (open_faces[i]) continue; + dist = find_closest_point(i, x, xc, yc, zc); + if (dist < mindist) { + xp = xc; + yp = yc; + zp = zc; + mindist = dist; + } + } + } + + add_contact(0, x, xp, yp, zp); + contact[0].iwall = 0; + if (contact[0].r < cutoff) return 1; + return 0; +} + + /* ---------------------------------------------------------------------- inside = 1 if x,y,z is inside or on surface inside = 0 if x,y,z is outside and not on surface diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 017e4e5ee4..19ade67590 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -62,6 +62,13 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void rotate(double &, double &, double &, double) const; + KOKKOS_INLINE_FUNCTION + int surface(double, double, double, double); + KOKKOS_INLINE_FUNCTION + int surface_interior(double *, double) override; + KOKKOS_INLINE_FUNCTION + int surface_exterior(double *, double) override; + }; } diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index b442bb7ae5..73ec1f478c 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -32,6 +32,120 @@ RegSphereKokkos::RegSphereKokkos(LAMMPS *lmp, int narg, char **arg) atomKK = (AtomKokkos*) atom; } +/* ---------------------------------------------------------------------- + generate list of contact points for interior or exterior regions + if region has variable shape, invoke shape_update() once per timestep + if region is dynamic: + before: inverse transform x,y,z (unmove, then unrotate) + after: forward transform contact point xs,yx,zs (rotate, then move), + then reset contact delx,dely,delz based on new contact point + no need to do this if no rotation since delxyz doesn't change + caller is responsible for wrapping this call with + modify->clearstep_compute() and modify->addstep_compute() if needed +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegSphereKokkos::surface(double x, double y, double z, double cutoff) +{ + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior(xnear, cutoff); + else + ncontact = surface_exterior(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior(xnear, cutoff) + surface_interior(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - contact[i].delx; + ys = xnear[1] - contact[i].dely; + zs = xnear[2] - contact[i].delz; + forward_transform(xs, ys, zs); + contact[i].delx = xorig[0] - xs; + contact[i].dely = xorig[1] - ys; + contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; +} + +/* ---------------------------------------------------------------------- + one contact if 0 <= x < cutoff from inner surface of sphere + no contact if outside (possible if called from union/intersect) + delxyz = vector from nearest point on sphere to x + special case: no contact if x is at center of sphere +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegSphereKokkos::surface_interior(double *x, double cutoff) +{ + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r > radius || r == 0.0) return 0; + + double delta = radius - r; + if (delta < cutoff) { + contact[0].r = delta; + contact[0].delx = delx * (1.0 - radius / r); + contact[0].dely = dely * (1.0 - radius / r); + contact[0].delz = delz * (1.0 - radius / r); + contact[0].radius = -radius; + contact[0].iwall = 0; + contact[0].varflag = 1; + return 1; + } + return 0; +} + +/* ---------------------------------------------------------------------- + one contact if 0 <= x < cutoff from outer surface of sphere + no contact if inside (possible if called from union/intersect) + delxyz = vector from nearest point on sphere to x +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int RegSphereKokkos::surface_exterior(double *x, double cutoff) +{ + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r < radius) return 0; + + double delta = r - radius; + if (delta < cutoff) { + contact[0].r = delta; + contact[0].delx = delx * (1.0 - radius / r); + contact[0].dely = dely * (1.0 - radius / r); + contact[0].delz = delz * (1.0 - radius / r); + contact[0].radius = radius; + contact[0].iwall = 0; + contact[0].varflag = 1; + return 1; + } + return 0; +} + /* ---------------------------------------------------------------------- inside = 1 if x,y,z is inside or on surface inside = 0 if x,y,z is outside and not on surface diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 579f339ca8..ce08d48548 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -55,6 +55,13 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION void rotate(double &, double &, double &, double) const; + KOKKOS_INLINE_FUNCTION + int surface(double, double, double, double); + KOKKOS_INLINE_FUNCTION + int surface_interior(double *, double) override; + KOKKOS_INLINE_FUNCTION + int surface_exterior(double *, double) override; + }; } From ff66b42022b708d2aab9b3e75bd509ac08337ec1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 15:04:34 -0400 Subject: [PATCH 154/294] cleanup --- src/KOKKOS/fix_wall_region_kokkos.cpp | 9 ++++----- src/KOKKOS/fix_wall_region_kokkos.h | 3 --- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index ccb20b52d4..7cd76a3233 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mitch Murphy (alphataubio at gmail.com) + Contributing author: Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ #include "fix_wall_region_kokkos.h" @@ -57,8 +57,8 @@ FixWallRegionKokkos::FixWallRegionKokkos(LAMMPS *lmp, int narg, char datamask_read = X_MASK | V_MASK | MASK_MASK; datamask_modify = F_MASK; - memoryKK->create_kokkos(k_ewall,4,"wall_region:ewall"); - d_ewall = k_ewall.template view(); + //memoryKK->create_kokkos(k_ewall,4,"wall_region:ewall"); + //d_ewall = k_ewall.template view(); } template @@ -117,7 +117,6 @@ void FixWallRegionKokkos::post_force(int vflag) // eflag is used to track whether wall energies have been communicated. eflag = 0; - d_ewall(0)=d_ewall(1)=d_ewall(2)=d_ewall(3)=0.0; double result[10]; @@ -126,7 +125,7 @@ void FixWallRegionKokkos::post_force(int vflag) Kokkos::parallel_reduce(nlocal,functor,result); copymode = 0; - for( int i=0 ; i<4 ; i++ ) Kokkos::atomic_add(&(d_ewall[i]),result[i]); + for( int i=0 ; i<4 ; i++ ) ewall[i] = result[i]; if (vflag_global) { virial[0] += result[4]; diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h index 63bf8db7e2..ca98b15d3d 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.h +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -55,9 +55,6 @@ class FixWallRegionKokkos : public FixWallRegion { DAT::tdual_virial_array k_vatom; typename AT::t_virial_array d_vatom; - typename AT::tdual_ffloat_1d k_ewall; - typename AT::t_ffloat_1d d_ewall; - KOKKOS_INLINE_FUNCTION double lj93(double, double&) const; From 177b04fb56c72bb927bed53227b6284b26681a8b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 15:06:01 -0400 Subject: [PATCH 155/294] Update fix_wall_region_kokkos.cpp --- src/KOKKOS/fix_wall_region_kokkos.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 7cd76a3233..8f488208b9 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -56,18 +56,13 @@ FixWallRegionKokkos::FixWallRegionKokkos(LAMMPS *lmp, int narg, char execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | V_MASK | MASK_MASK; datamask_modify = F_MASK; - - //memoryKK->create_kokkos(k_ewall,4,"wall_region:ewall"); - //d_ewall = k_ewall.template view(); } template FixWallRegionKokkos::~FixWallRegionKokkos() { if (copymode) return; - memoryKK->destroy_kokkos(k_vatom,vatom); - memoryKK->destroy_kokkos(k_ewall); } /* ---------------------------------------------------------------------- */ @@ -136,9 +131,6 @@ void FixWallRegionKokkos::post_force(int vflag) virial[5] += result[9]; } - k_ewall.template modify(); - k_ewall.template sync(); - atomKK->modified(execution_space,F_MASK); if (vflag_atom) { From 0e1ee42b89c108c2cf822e8011917efbbffa4e7e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 15:12:10 -0400 Subject: [PATCH 156/294] Update region_block_kokkos.cpp --- src/KOKKOS/region_block_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index d91c86fb59..3c4c49e7f3 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -207,7 +207,7 @@ int RegBlockKokkos::surface_exterior(double *x, double cutoff) else zp = x[2]; } else { - mindist = BIG; + mindist = MAXDOUBLEINT; for (int i = 0; i < 6; i++) { if (open_faces[i]) continue; dist = find_closest_point(i, x, xc, yc, zc); From 4492ecaf394e3852c045c86c906bc187094f6cb1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:04:21 -0400 Subject: [PATCH 157/294] Update region_sphere.cpp --- src/region_sphere.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index f449978938..ea6e39d894 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -101,6 +101,7 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : RegSphere::~RegSphere() { + if (copymode) return; delete[] xstr; delete[] ystr; delete[] zstr; From e44d366d92139a0f26794dc465f6f4c41c353595 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:04:28 -0400 Subject: [PATCH 158/294] Update region.cpp --- src/region.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/region.cpp b/src/region.cpp index 0017186c5d..6bcbc4470a 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -50,10 +50,8 @@ Region::Region(LAMMPS *lmp, int /*narg*/, char **arg) : Region::~Region() { if (copymode) return; - delete[] id; delete[] style; - delete[] xstr; delete[] ystr; delete[] zstr; From 74ac224dd938ca8c4984948c401f9981e59bcaef Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:05:08 -0400 Subject: [PATCH 159/294] Create fix-timestep-wall_region_sphere.yaml --- .../fix-timestep-wall_region_sphere.yaml | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml new file mode 100644 index 0000000000..893537d02b --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml @@ -0,0 +1,84 @@ +--- +lammps_version: 29 Aug 2024 +tags: generated +date_generated: Mon Oct 7 17:02:09 2024 +epsilon: 4e-14 +skip_tests: +prerequisites: ! | + atom full + fix wall/region +pre_commands: ! | + boundary f f f +post_commands: ! | + fix move all nve + region 1 sphere 0 0 0 10 + fix test solute wall/region 1 lj93 1.0 1.0 2.5 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384026e-01 2.4912159905679729e+00 -1.6695851791541885e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789466e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855988e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853944e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963298e-01 -1.6231898107386231e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616152e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704094664e+00 3.0158507413593139e+00 -3.5179348337135590e+00 + 19 1.5355837135395243e+00 2.6255292354730009e+00 -4.2353987771401354e+00 + 20 2.7727573003748263e+00 3.6923910441179069e+00 -3.9330842453167185e+00 + 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 + 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 + 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 + 24 2.0689243582454213e+00 3.1513346907303501e+00 3.1550389751128463e+00 + 25 1.3045351331414130e+00 3.2665125705869009e+00 2.5111855257365274e+00 + 26 2.5809237402714267e+00 4.0117602605512728e+00 3.2212060528800821e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262832e-03 1.6516406176274298e-02 4.7902264318913203e-03 + 2 5.4501493445687828e-03 5.1791699408496447e-03 -1.4372931530376549e-03 + 3 -8.2298292722385574e-03 -1.2926551614621364e-02 -4.0984181178163699e-03 + 4 -3.7699042590093523e-03 -6.5722892098813894e-03 -1.1184640360133299e-03 + 5 -1.1021961004346582e-02 -9.8906780939336091e-03 -2.8410737829284408e-03 + 6 -3.9676663166400027e-02 4.6817061464710263e-02 3.7148491979476131e-02 + 7 9.1033953013898742e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855742e-03 -3.3507254552631576e-03 3.4557098492564650e-02 + 9 1.5644176117320932e-03 3.7365546102722212e-03 1.5047408822037651e-02 + 10 2.9201446820573192e-02 -2.9249578745486147e-02 -1.5018077424322544e-02 + 11 -4.7835961513517542e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920694e-03 -3.4774154398129690e-04 -3.0640770327796979e-03 + 13 2.7531740451953164e-03 5.8171061612840493e-03 -7.9467454022160377e-04 + 14 3.5246182371994183e-03 -5.7939995585585503e-03 -3.9478431172751344e-03 + 15 -1.8547943640122972e-03 -5.8554729942777778e-03 6.2938485140538692e-03 + 16 1.8681499973445252e-02 -1.3262466204585332e-02 -4.5638651457003250e-02 + 17 -1.2896269981100378e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065795274987550e-04 -8.6270473974390605e-04 -1.4483040536385806e-03 + 19 1.2452390067376805e-03 -2.5061097800836356e-03 7.2998639311871892e-03 + 20 3.5930058460518109e-03 3.6938852051849871e-03 3.2322738480194770e-03 + 21 -1.4689219756961604e-03 -2.7352107824530231e-04 7.0581625180892046e-04 + 22 -7.0694199165145140e-03 -4.2577148692717554e-03 2.8079117911323815e-04 + 23 6.0446963236685256e-03 -1.4000131545098772e-03 2.5819754799379755e-03 + 24 3.1926368451268056e-04 -9.9445664487428712e-04 1.4999960207062358e-04 + 25 1.3789752933078488e-04 -4.4335894831520773e-03 -8.1808138106080109e-04 + 26 2.0485904023410002e-03 2.7813358660936120e-03 4.3245726853349290e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... From b16ddfcdd4c89352e758887bc9133b4b265a4764 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:05:33 -0400 Subject: [PATCH 160/294] add d_contact --- src/KOKKOS/region_block_kokkos.cpp | 91 +++++++++++++++++------------ src/KOKKOS/region_block_kokkos.h | 3 + src/KOKKOS/region_sphere_kokkos.cpp | 54 ++++++++++------- src/KOKKOS/region_sphere_kokkos.h | 3 + 4 files changed, 93 insertions(+), 58 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 3c4c49e7f3..91deeeac29 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -13,8 +13,10 @@ ------------------------------------------------------------------------- */ #include "region_block_kokkos.h" + #include "atom_kokkos.h" #include "atom_masks.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -25,6 +27,16 @@ RegBlockKokkos::RegBlockKokkos(LAMMPS *lmp, int narg, char **arg) : RegBlock(lmp, narg, arg) { atomKK = (AtomKokkos*) atom; + memoryKK->create_kokkos(d_contact,6,"region_block:d_contact"); +} + +/* ---------------------------------------------------------------------- */ + +template +RegBlockKokkos::~RegBlockKokkos() +{ + if (copymode) return; + memoryKK->destroy_kokkos(d_contact); } /* ---------------------------------------------------------------------- @@ -47,6 +59,8 @@ int RegBlockKokkos::surface(double x, double y, double z, double cut double xs, ys, zs; double xnear[3], xorig[3]; + utils::logmesg(lmp, " *** RegBlockKokkos::surface\n"); + if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); @@ -67,13 +81,13 @@ int RegBlockKokkos::surface(double x, double y, double z, double cut if (rotateflag && ncontact) { for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - contact[i].delx; - ys = xnear[1] - contact[i].dely; - zs = xnear[2] - contact[i].delz; + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; forward_transform(xs, ys, zs); - contact[i].delx = xorig[0] - xs; - contact[i].dely = xorig[1] - ys; - contact[i].delz = xorig[2] - zs; + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; } } @@ -103,58 +117,58 @@ int RegBlockKokkos::surface_interior(double *x, double cutoff) delta = x[0] - xlo; if (delta < cutoff && !open_faces[0]) { - contact[n].r = delta; - contact[n].delx = delta; - contact[n].dely = contact[n].delz = 0.0; - contact[n].radius = 0; - contact[n].iwall = 0; + d_contact[n].r = delta; + d_contact[n].delx = delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 0; n++; } delta = xhi - x[0]; if (delta < cutoff && !open_faces[1]) { - contact[n].r = delta; - contact[n].delx = -delta; - contact[n].dely = contact[n].delz = 0.0; - contact[n].radius = 0; - contact[n].iwall = 1; + d_contact[n].r = delta; + d_contact[n].delx = -delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 1; n++; } delta = x[1] - ylo; if (delta < cutoff && !open_faces[2]) { - contact[n].r = delta; - contact[n].dely = delta; - contact[n].delx = contact[n].delz = 0.0; - contact[n].radius = 0; - contact[n].iwall = 2; + d_contact[n].r = delta; + d_contact[n].dely = delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 2; n++; } delta = yhi - x[1]; if (delta < cutoff && !open_faces[3]) { - contact[n].r = delta; - contact[n].dely = -delta; - contact[n].delx = contact[n].delz = 0.0; - contact[n].radius = 0; - contact[n].iwall = 3; + d_contact[n].r = delta; + d_contact[n].dely = -delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 3; n++; } delta = x[2] - zlo; if (delta < cutoff && !open_faces[4]) { - contact[n].r = delta; - contact[n].delz = delta; - contact[n].delx = contact[n].dely = 0.0; - contact[n].radius = 0; - contact[n].iwall = 4; + d_contact[n].r = delta; + d_contact[n].delz = delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 4; n++; } delta = zhi - x[2]; if (delta < cutoff && !open_faces[5]) { - contact[n].r = delta; - contact[n].delz = -delta; - contact[n].delx = contact[n].dely = 0.0; - contact[n].radius = 0; - contact[n].iwall = 5; + d_contact[n].r = delta; + d_contact[n].delz = -delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 5; n++; } @@ -220,9 +234,10 @@ int RegBlockKokkos::surface_exterior(double *x, double cutoff) } } + // FIXME: write add_contact inline for KOKKOS add_contact(0, x, xp, yp, zp); - contact[0].iwall = 0; - if (contact[0].r < cutoff) return 1; + d_contact[0].iwall = 0; + if (d_contact[0].r < cutoff) return 1; return 0; } diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 19ade67590..d311a10e8c 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -40,6 +40,7 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { typedef ArrayTypes AT; RegBlockKokkos(class LAMMPS *, int, char **); + ~RegBlockKokkos() override; void match_all_kokkos(int, DAT::tdual_int_1d) override; @@ -53,6 +54,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { typename AT::t_x_array_randomread x; typename AT::t_int_1d_randomread mask; + Kokkos::View d_contact; + KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 73ec1f478c..23ab53c794 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -20,6 +20,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -30,8 +31,19 @@ RegSphereKokkos::RegSphereKokkos(LAMMPS *lmp, int narg, char **arg) : RegSphere(lmp, narg, arg) { atomKK = (AtomKokkos*) atom; + memoryKK->create_kokkos(d_contact,1,"region_sphere:d_contact"); } +/* ---------------------------------------------------------------------- */ + +template +RegSphereKokkos::~RegSphereKokkos() +{ + if (copymode) return; + memoryKK->destroy_kokkos(d_contact); +} + + /* ---------------------------------------------------------------------- generate list of contact points for interior or exterior regions if region has variable shape, invoke shape_update() once per timestep @@ -52,6 +64,8 @@ int RegSphereKokkos::surface(double x, double y, double z, double cu double xs, ys, zs; double xnear[3], xorig[3]; + utils::logmesg(lmp, " *** RegSphereKokkos::surface\n"); + if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); @@ -72,13 +86,13 @@ int RegSphereKokkos::surface(double x, double y, double z, double cu if (rotateflag && ncontact) { for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - contact[i].delx; - ys = xnear[1] - contact[i].dely; - zs = xnear[2] - contact[i].delz; + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; forward_transform(xs, ys, zs); - contact[i].delx = xorig[0] - xs; - contact[i].dely = xorig[1] - ys; - contact[i].delz = xorig[2] - zs; + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; } } @@ -104,13 +118,13 @@ int RegSphereKokkos::surface_interior(double *x, double cutoff) double delta = radius - r; if (delta < cutoff) { - contact[0].r = delta; - contact[0].delx = delx * (1.0 - radius / r); - contact[0].dely = dely * (1.0 - radius / r); - contact[0].delz = delz * (1.0 - radius / r); - contact[0].radius = -radius; - contact[0].iwall = 0; - contact[0].varflag = 1; + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = -radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; return 1; } return 0; @@ -134,13 +148,13 @@ int RegSphereKokkos::surface_exterior(double *x, double cutoff) double delta = r - radius; if (delta < cutoff) { - contact[0].r = delta; - contact[0].delx = delx * (1.0 - radius / r); - contact[0].dely = dely * (1.0 - radius / r); - contact[0].delz = delz * (1.0 - radius / r); - contact[0].radius = radius; - contact[0].iwall = 0; - contact[0].varflag = 1; + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; return 1; } return 0; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index ce08d48548..b7e1e5dcf7 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -38,6 +38,7 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { typedef ArrayTypes AT; RegSphereKokkos(class LAMMPS *, int, char **); + ~RegSphereKokkos() override; void match_all_kokkos(int, DAT::tdual_int_1d) override; @@ -46,6 +47,8 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { private: + Kokkos::View d_contact; + KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION From 941b3dd56646ce99c8e00d8604c490f175bc0a16 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:15:09 -0400 Subject: [PATCH 161/294] add two examples for regression testing --- examples/wall/in.wall.block | 40 +++++++++++++++++++++++ examples/wall/in.wall.sphere | 40 +++++++++++++++++++++++ examples/wall/tip3p.mol | 62 ++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 examples/wall/in.wall.block create mode 100644 examples/wall/in.wall.sphere create mode 100644 examples/wall/tip3p.mol diff --git a/examples/wall/in.wall.block b/examples/wall/in.wall.block new file mode 100644 index 0000000000..7e18d9ab4c --- /dev/null +++ b/examples/wall/in.wall.block @@ -0,0 +1,40 @@ + +units real + +molecule water tip3p.mol +atom_style full + +variable radius equal 100.0 +region box block $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) +create_box 2 box bond/types 1 angle/types 1 & + extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 + +mass 1 15.9994 +mass 2 1.008 + +bond_style zero +bond_coeff 1 0.9574 +angle_style zero +angle_coeff 1 104.52 + +region block1 block -80 80 -80 80 -80 80 +region block2 block -70 70 -70 70 -70 70 +create_atoms 0 random 5000 12345 block2 mol water 12345 overlap 2 + +thermo 1 +thermo_style custom step time spcpu temp press etotal pe + +fix wall all wall/region block1 harmonic 1000.0 0.0 2.5 +fix_modify wall energy yes + +pair_style lj/cut/coul/cut 8.0 +pair_coeff 1 1 0.1521 3.1507 +pair_coeff 2 2 0.0 1.0 +velocity all create 300.0 12345 +fix 1 all nvt temp 300 300 100.0 +fix 2 all shake 0.001 10 10000 b 1 a 1 + +dump 2 all movie 10 wall.block.mpg type type size 1500 1500 fsaa yes +dump_modify 2 pad 4 acolor * white/red/green/blue/aqua/magenta + +run 100 diff --git a/examples/wall/in.wall.sphere b/examples/wall/in.wall.sphere new file mode 100644 index 0000000000..5b412ad061 --- /dev/null +++ b/examples/wall/in.wall.sphere @@ -0,0 +1,40 @@ + +units real + +molecule water tip3p.mol +atom_style full + +variable radius equal 100.0 +region box block $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) $(-v_radius) $(v_radius) +create_box 2 box bond/types 1 angle/types 1 & + extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 + +mass 1 15.9994 +mass 2 1.008 + +bond_style zero +bond_coeff 1 0.9574 +angle_style zero +angle_coeff 1 104.52 + +region sphere1 sphere 0 0 0 $(v_radius-10) side in +region sphere2 sphere 0 0 0 $(v_radius-20) side in +create_atoms 0 random 1000 12345 sphere2 mol water 12345 overlap 1.33 + +thermo 1 +thermo_style custom step time spcpu temp press etotal pe + +fix wall all wall/region sphere1 harmonic 1000.0 0.0 2.5 +fix_modify wall energy yes + +pair_style lj/cut/coul/cut 8.0 +pair_coeff 1 1 0.1521 3.1507 +pair_coeff 2 2 0.0 1.0 +velocity all create 300.0 12345 +fix 1 all nvt temp 300 300 100.0 +fix 2 all shake 0.001 10 10000 b 1 a 1 + +dump 2 all movie 10 wall.sphere.mpg type type size 1500 1500 fsaa yes +dump_modify 2 pad 4 acolor * white/red/green/blue/aqua/magenta + +run 100 diff --git a/examples/wall/tip3p.mol b/examples/wall/tip3p.mol new file mode 100644 index 0000000000..fe8410632e --- /dev/null +++ b/examples/wall/tip3p.mol @@ -0,0 +1,62 @@ +# Water molecule. TIP3P geometry + +3 atoms +2 bonds +1 angles + +Coords + +1 0.00000 -0.06556 0.00000 +2 0.75695 0.52032 0.00000 +3 -0.75695 0.52032 0.00000 + +Types + +1 1 +2 2 +3 2 + +Charges + +1 -0.834 +2 0.417 +3 0.417 + +Bonds + +1 1 1 2 +2 1 1 3 + +Angles + +1 1 2 1 3 + +Shake Flags + +1 1 +2 1 +3 1 + +Shake Atoms + +1 1 2 3 +2 1 2 3 +3 1 2 3 + +Shake Bond Types + +1 1 1 1 +2 1 1 1 +3 1 1 1 + +Special Bond Counts + +1 2 0 0 +2 1 1 0 +3 1 1 0 + +Special Bonds + +1 2 3 +2 1 3 +3 1 2 From 5a7dd950fcd6f8140098a12b281d3e24857f67f4 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:28:45 -0400 Subject: [PATCH 162/294] Update region_sphere_kokkos.cpp --- src/KOKKOS/region_sphere_kokkos.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 23ab53c794..22e9e36046 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -64,8 +64,6 @@ int RegSphereKokkos::surface(double x, double y, double z, double cu double xs, ys, zs; double xnear[3], xorig[3]; - utils::logmesg(lmp, " *** RegSphereKokkos::surface\n"); - if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); From 9d2e892c0e4b2d221db43b152f47c4e360298933 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:46:15 -0400 Subject: [PATCH 163/294] Update fix_wall_region_kokkos.cpp --- src/KOKKOS/fix_wall_region_kokkos.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 8f488208b9..c8de893150 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -164,29 +164,29 @@ void FixWallRegionKokkos::wall_particle(int i, value_type result) co int n = region->surface(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); for ( int m = 0; m < n; m++) { - if (region->contact[m].r <= tooclose) + if (region->d_contact[m].r <= tooclose) Kokkos::abort("Particle outside surface of region used in fix wall/region"); else - rinv = 1.0 / region->contact[m].r; + rinv = 1.0 / region->d_contact[m].r; double fwallKK, engKK; if (style == LJ93) - engKK = lj93(region->contact[m].r,fwallKK); + engKK = lj93(region->d_contact[m].r,fwallKK); else if (style == LJ126) - engKK = lj126(region->contact[m].r,fwallKK); + engKK = lj126(region->d_contact[m].r,fwallKK); else if (style == LJ1043) - engKK = lj1043(region->contact[m].r,fwallKK); + engKK = lj1043(region->d_contact[m].r,fwallKK); else if (style == MORSE) - engKK = morse(region->contact[m].r,fwallKK); + engKK = morse(region->d_contact[m].r,fwallKK); else if (style == COLLOID) - engKK = colloid(region->contact[m].r,d_radius(i),fwallKK); + engKK = colloid(region->d_contact[m].r,d_radius(i),fwallKK); else - engKK = harmonic(region->contact[m].r,fwallKK); + engKK = harmonic(region->d_contact[m].r,fwallKK); - double delx = region->contact[m].delx; - double dely = region->contact[m].dely; - double delz = region->contact[m].delz; + double delx = region->d_contact[m].delx; + double dely = region->d_contact[m].dely; + double delz = region->d_contact[m].delz; double fx = fwall * delx * rinv; double fy = fwall * dely * rinv; double fz = fwall * delz * rinv; From e188b7d75fd961cd90fa814e743e964aa2c3844e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:56:59 -0400 Subject: [PATCH 164/294] remove "tags: generated" --- unittest/force-styles/tests/fix-timestep-efield_const.yaml | 1 - .../force-styles/tests/fix-timestep-efield_dipole_const.yaml | 1 - .../force-styles/tests/fix-timestep-efield_dipole_variable.yaml | 1 - unittest/force-styles/tests/fix-timestep-efield_region.yaml | 1 - unittest/force-styles/tests/fix-timestep-efield_variable.yaml | 1 - unittest/force-styles/tests/fix-timestep-gravity.yaml | 1 - unittest/force-styles/tests/fix-timestep-nph_sphere.yaml | 1 - unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml | 1 - unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml | 1 - unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml | 1 - unittest/force-styles/tests/fix-timestep-nve_sphere.yaml | 1 - unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml | 1 - .../force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml | 1 - unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml | 1 - unittest/force-styles/tests/fix-timestep-recenter-coords.yaml | 1 - unittest/force-styles/tests/fix-timestep-recenter-init.yaml | 1 - unittest/force-styles/tests/fix-timestep-recenter-null.yaml | 1 - unittest/force-styles/tests/fix-timestep-wall_reflect.yaml | 1 - unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml | 1 - unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml | 1 - 20 files changed, 20 deletions(-) diff --git a/unittest/force-styles/tests/fix-timestep-efield_const.yaml b/unittest/force-styles/tests/fix-timestep-efield_const.yaml index 932f11179e..21bfc808da 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Tue Aug 6 02:04:43 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml b/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml index d889968db6..0036e6372d 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_dipole_const.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 21:46:33 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml b/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml index f306034640..a3e686f5cd 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_dipole_variable.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 22:46:18 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_region.yaml b/unittest/force-styles/tests/fix-timestep-efield_region.yaml index d0770a1ab8..a155343e6d 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_region.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_region.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sat Aug 3 05:18:10 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-efield_variable.yaml b/unittest/force-styles/tests/fix-timestep-efield_variable.yaml index 7680373091..1ec1e4098a 100644 --- a/unittest/force-styles/tests/fix-timestep-efield_variable.yaml +++ b/unittest/force-styles/tests/fix-timestep-efield_variable.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sat Aug 3 05:18:19 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-gravity.yaml b/unittest/force-styles/tests/fix-timestep-gravity.yaml index 0fad7548ce..ce4801a418 100644 --- a/unittest/force-styles/tests/fix-timestep-gravity.yaml +++ b/unittest/force-styles/tests/fix-timestep-gravity.yaml @@ -1,6 +1,5 @@ --- lammps_version: 17 Apr 2024 -tags: generated date_generated: Fri Jun 7 18:18:25 2024 epsilon: 2e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml index 40a4fc1c6e..b95a0265de 100644 --- a/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nph_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:15:54 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml index 807fcdc46f..12022224df 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_aniso.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:03:14 2024 epsilon: 4e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml index f3ee607578..717ab63162 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_iso.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:03:30 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml b/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml index b31284da57..3724246956 100644 --- a/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml +++ b/unittest/force-styles/tests/fix-timestep-npt_sphere_tri.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:05:01 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml index 0eaa00db36..b794fb2416 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:05:44 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml index 4ea520aba9..5d03cc7a05 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:06:12 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml index d95542744f..aa39b8f4c2 100644 --- a/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml +++ b/unittest/force-styles/tests/fix-timestep-nve_sphere_dipole_dlm.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:06:24 2024 epsilon: 1e-09 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml b/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml index 41f021897a..ff8020cc27 100644 --- a/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-nvt_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 27 Jun 2024 -tags: generated date_generated: Sun Aug 4 23:11:49 2024 epsilon: 5e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml index 042f91545f..31c682fc07 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-coords.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 12:45:25 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml index 326cdb4f3d..ca539aa911 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-init.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-init.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 12:45:46 2024 epsilon: 1e-12 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml index f0db929f0c..a860180cd7 100644 --- a/unittest/force-styles/tests/fix-timestep-recenter-null.yaml +++ b/unittest/force-styles/tests/fix-timestep-recenter-null.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 12:45:37 2024 epsilon: 2e-13 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml b/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml index ed9ac69350..56b9de4462 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_reflect.yaml @@ -1,6 +1,5 @@ --- lammps_version: 17 Apr 2024 -tags: generated date_generated: Fri Jun 7 18:23:44 2024 epsilon: 4e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml index 82e2ead2f7..cc74769cb2 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_lj93.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Tue Oct 1 13:00:05 2024 epsilon: 4e-14 skip_tests: diff --git a/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml index 893537d02b..cf7348194e 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_region_sphere.yaml @@ -1,6 +1,5 @@ --- lammps_version: 29 Aug 2024 -tags: generated date_generated: Mon Oct 7 17:02:09 2024 epsilon: 4e-14 skip_tests: From 4ed7528ddcea5bf55ba35291a6b4942d41cbfcbc Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 17:57:32 -0400 Subject: [PATCH 165/294] add_contact() --- src/KOKKOS/region_block_kokkos.cpp | 20 +++++++++++++++++++- src/KOKKOS/region_block_kokkos.h | 8 +++++--- src/KOKKOS/region_sphere_kokkos.cpp | 22 ++++++++++++++++++++++ src/KOKKOS/region_sphere_kokkos.h | 8 +++++--- 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 91deeeac29..a9caad4f6c 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -234,13 +234,31 @@ int RegBlockKokkos::surface_exterior(double *x, double cutoff) } } - // FIXME: write add_contact inline for KOKKOS add_contact(0, x, xp, yp, zp); d_contact[0].iwall = 0; if (d_contact[0].r < cutoff) return 1; return 0; } +/* ---------------------------------------------------------------------- + add a single contact at Nth location in contact array + x = particle position + xp,yp,zp = region surface point +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegBlockKokkos::add_contact(int n, double *x, double xp, double yp, double zp) +{ + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; +} /* ---------------------------------------------------------------------- inside = 1 if x,y,z is inside or on surface diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index d311a10e8c..2e22f9ca24 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -47,6 +47,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagRegBlockMatchAll, const int&) const; + Kokkos::View d_contact; + private: int groupbit; typename AT::t_int_1d d_match; @@ -54,8 +56,6 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { typename AT::t_x_array_randomread x; typename AT::t_int_1d_randomread mask; - Kokkos::View d_contact; - KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION @@ -66,7 +66,9 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { void rotate(double &, double &, double &, double) const; KOKKOS_INLINE_FUNCTION - int surface(double, double, double, double); + void add_contact(int, double *, double, double, double) override; + KOKKOS_INLINE_FUNCTION + int surface(double, double, double, double) override; KOKKOS_INLINE_FUNCTION int surface_interior(double *, double) override; KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 22e9e36046..ec9c84291f 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -64,6 +64,8 @@ int RegSphereKokkos::surface(double x, double y, double z, double cu double xs, ys, zs; double xnear[3], xorig[3]; + utils::logmesg(lmp, " *** RegSphereKokkos::surface\n"); + if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); @@ -158,6 +160,26 @@ int RegSphereKokkos::surface_exterior(double *x, double cutoff) return 0; } +/* ---------------------------------------------------------------------- + add a single contact at Nth location in contact array + x = particle position + xp,yp,zp = region surface point +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegBlockKokkos::add_contact(int n, double *x, double xp, double yp, double zp) +{ + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; +} + /* ---------------------------------------------------------------------- inside = 1 if x,y,z is inside or on surface inside = 0 if x,y,z is outside and not on surface diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index b7e1e5dcf7..49cd3e4461 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -45,10 +45,10 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { //KOKKOS_INLINE_FUNCTION //void operator()(TagRegBlockMatchAll, const int&) const; - private: - Kokkos::View d_contact; + private: + KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION @@ -59,7 +59,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { void rotate(double &, double &, double &, double) const; KOKKOS_INLINE_FUNCTION - int surface(double, double, double, double); + void add_contact(int, double *, double, double, double) override; + KOKKOS_INLINE_FUNCTION + int surface(double, double, double, double) override; KOKKOS_INLINE_FUNCTION int surface_interior(double *, double) override; KOKKOS_INLINE_FUNCTION From a53840014cef7163bcd08d5034e679bbaee150e6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 7 Oct 2024 18:17:14 -0400 Subject: [PATCH 166/294] Update in.wall.sphere --- examples/wall/in.wall.sphere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/wall/in.wall.sphere b/examples/wall/in.wall.sphere index 5b412ad061..995a1b0f75 100644 --- a/examples/wall/in.wall.sphere +++ b/examples/wall/in.wall.sphere @@ -19,7 +19,7 @@ angle_coeff 1 104.52 region sphere1 sphere 0 0 0 $(v_radius-10) side in region sphere2 sphere 0 0 0 $(v_radius-20) side in -create_atoms 0 random 1000 12345 sphere2 mol water 12345 overlap 1.33 +create_atoms 0 random 5000 12345 sphere2 mol water 12345 overlap 2 thermo 1 thermo_style custom step time spcpu temp press etotal pe From d13fb58311106129ba03594ccde85b30370a4de0 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 17:55:03 -0400 Subject: [PATCH 167/294] surface_kokkos() --- src/KOKKOS/region_block_kokkos.cpp | 2 +- src/KOKKOS/region_block_kokkos.h | 9 +++++---- src/KOKKOS/region_sphere_kokkos.cpp | 4 ++-- src/KOKKOS/region_sphere_kokkos.h | 11 ++++------- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index a9caad4f6c..3831b405a5 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -53,7 +53,7 @@ RegBlockKokkos::~RegBlockKokkos() template KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::surface(double x, double y, double z, double cutoff) +int RegBlockKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; double xs, ys, zs; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 2e22f9ca24..e082a6a275 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -47,7 +47,10 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagRegBlockMatchAll, const int&) const; - Kokkos::View d_contact; + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double, double, double, double); + + Kokkos::View d_contact; private: int groupbit; @@ -66,9 +69,7 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { void rotate(double &, double &, double &, double) const; KOKKOS_INLINE_FUNCTION - void add_contact(int, double *, double, double, double) override; - KOKKOS_INLINE_FUNCTION - int surface(double, double, double, double) override; + void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION int surface_interior(double *, double) override; KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index ec9c84291f..e81a3aa57f 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -58,7 +58,7 @@ RegSphereKokkos::~RegSphereKokkos() template KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::surface(double x, double y, double z, double cutoff) +int RegSphereKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; double xs, ys, zs; @@ -168,7 +168,7 @@ int RegSphereKokkos::surface_exterior(double *x, double cutoff) template KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::add_contact(int n, double *x, double xp, double yp, double zp) +void RegSphereKokkos::add_contact(int n, double *x, double xp, double yp, double zp) { double delx = x[0] - xp; double dely = x[1] - yp; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 49cd3e4461..02a9cdd145 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -39,13 +39,12 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { RegSphereKokkos(class LAMMPS *, int, char **); ~RegSphereKokkos() override; - void match_all_kokkos(int, DAT::tdual_int_1d) override; - //KOKKOS_INLINE_FUNCTION - //void operator()(TagRegBlockMatchAll, const int&) const; + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double, double, double, double); - Kokkos::View d_contact; + Kokkos::View d_contact; private: @@ -59,9 +58,7 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { void rotate(double &, double &, double &, double) const; KOKKOS_INLINE_FUNCTION - void add_contact(int, double *, double, double, double) override; - KOKKOS_INLINE_FUNCTION - int surface(double, double, double, double) override; + void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION int surface_interior(double *, double) override; KOKKOS_INLINE_FUNCTION From 0f627059fc1092149ca1d55d578c269bc3a6a7cb Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:14:06 -0400 Subject: [PATCH 168/294] dynamic_cast --- src/KOKKOS/fix_wall_region_kokkos.cpp | 49 +++++++++++++++++---------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index c8de893150..eadc12109b 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -24,6 +24,8 @@ #include "kokkos_base.h" #include "memory_kokkos.h" #include "region.h" +#include "region_block_kokkos.h" +#include "region_sphere_kokkos.h" using namespace LAMMPS_NS; @@ -161,32 +163,43 @@ void FixWallRegionKokkos::wall_particle(int i, value_type result) co else tooclose = 0.0; - int n = region->surface(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); + int n; + + if(RegBlockKokkos *regionKK = dynamic_cast*>(region)) + n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); + else if (RegSphereKokkos *regionKK = dynamic_cast*>(region)) + n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); for ( int m = 0; m < n; m++) { - if (region->d_contact[m].r <= tooclose) + + double r, delx, dely, delz; + + if(RegBlockKokkos *regionKK = dynamic_cast*>(region)) { + r = regionKK->d_contact[m].r; + delx = regionKK->d_contact[m].delx; + dely = regionKK->d_contact[m].dely; + delz = regionKK->d_contact[m].delz; + } else if (RegSphereKokkos *regionKK = dynamic_cast*>(region)){ + r = regionKK->d_contact[m].r; + delx = regionKK->d_contact[m].delx; + dely = regionKK->d_contact[m].dely; + delz = regionKK->d_contact[m].delz; + } + + if (r <= tooclose) Kokkos::abort("Particle outside surface of region used in fix wall/region"); else - rinv = 1.0 / region->d_contact[m].r; + rinv = 1.0 / r; double fwallKK, engKK; - if (style == LJ93) - engKK = lj93(region->d_contact[m].r,fwallKK); - else if (style == LJ126) - engKK = lj126(region->d_contact[m].r,fwallKK); - else if (style == LJ1043) - engKK = lj1043(region->d_contact[m].r,fwallKK); - else if (style == MORSE) - engKK = morse(region->d_contact[m].r,fwallKK); - else if (style == COLLOID) - engKK = colloid(region->d_contact[m].r,d_radius(i),fwallKK); - else - engKK = harmonic(region->d_contact[m].r,fwallKK); + if (style == LJ93) engKK = lj93(r,fwallKK); + else if (style == LJ126) engKK = lj126(r,fwallKK); + else if (style == LJ1043) engKK = lj1043(r,fwallKK); + else if (style == MORSE) engKK = morse(r,fwallKK); + else if (style == COLLOID) engKK = colloid(r,d_radius(i),fwallKK); + else engKK = harmonic(r,fwallKK); - double delx = region->d_contact[m].delx; - double dely = region->d_contact[m].dely; - double delz = region->d_contact[m].delz; double fx = fwall * delx * rinv; double fy = fwall * dely * rinv; double fz = fwall * delz * rinv; From f7afc63b535e26d21c02a0353787f7d9318667b6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:18:03 -0400 Subject: [PATCH 169/294] Update region_block_kokkos.h --- src/KOKKOS/region_block_kokkos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index e082a6a275..5ea406133b 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -71,9 +71,9 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION - int surface_interior(double *, double) override; + int surface_interior(double *, double); KOKKOS_INLINE_FUNCTION - int surface_exterior(double *, double) override; + int surface_exterior(double *, double); }; From 183c99acc8857790d6b3aa4e191049865016328a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:18:06 -0400 Subject: [PATCH 170/294] Update region_sphere_kokkos.h --- src/KOKKOS/region_sphere_kokkos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 02a9cdd145..1769df90f8 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -60,9 +60,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION - int surface_interior(double *, double) override; + int surface_interior(double *, double); KOKKOS_INLINE_FUNCTION - int surface_exterior(double *, double) override; + int surface_exterior(double *, double); }; From 64499bfcb2aae3af79e306f1bb7686c2377e7bc2 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:20:58 -0400 Subject: [PATCH 171/294] _kokkos --- src/KOKKOS/region_block_kokkos.cpp | 10 +++++----- src/KOKKOS/region_block_kokkos.h | 4 ++-- src/KOKKOS/region_sphere_kokkos.cpp | 6 +++--- src/KOKKOS/region_sphere_kokkos.h | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 3831b405a5..285c1018b5 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -70,13 +70,13 @@ int RegBlockKokkos::surface_kokkos(double x, double y, double z, dou if (!openflag) { if (interior) - ncontact = surface_interior(xnear, cutoff); + ncontact = surface_interior_kokkos(xnear, cutoff); else - ncontact = surface_exterior(xnear, cutoff); + ncontact = surface_exterior_kokkos(xnear, cutoff); } else { // one of surface_int/ext() will return 0 // so no need to worry about offset of contact indices - ncontact = surface_exterior(xnear, cutoff) + surface_interior(xnear, cutoff); + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); } if (rotateflag && ncontact) { @@ -103,7 +103,7 @@ int RegBlockKokkos::surface_kokkos(double x, double y, double z, dou template KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::surface_interior(double *x, double cutoff) +int RegBlockKokkos::surface_interior_kokkos(double *x, double cutoff) { double delta; @@ -183,7 +183,7 @@ int RegBlockKokkos::surface_interior(double *x, double cutoff) template KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::surface_exterior(double *x, double cutoff) +int RegBlockKokkos::surface_exterior_kokkos(double *x, double cutoff) { double xp, yp, zp; double xc, yc, zc, dist, mindist; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 5ea406133b..ff18a86ff8 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -71,9 +71,9 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION - int surface_interior(double *, double); + int surface_interior_kokkos(double *, double); KOKKOS_INLINE_FUNCTION - int surface_exterior(double *, double); + int surface_exterior_kokkos(double *, double); }; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index e81a3aa57f..6c8677e340 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -75,13 +75,13 @@ int RegSphereKokkos::surface_kokkos(double x, double y, double z, do if (!openflag) { if (interior) - ncontact = surface_interior(xnear, cutoff); + ncontact = surface_interior_kokkos(xnear, cutoff); else - ncontact = surface_exterior(xnear, cutoff); + ncontact = surface_exterior_kokkos(xnear, cutoff); } else { // one of surface_int/ext() will return 0 // so no need to worry about offset of contact indices - ncontact = surface_exterior(xnear, cutoff) + surface_interior(xnear, cutoff); + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); } if (rotateflag && ncontact) { diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 1769df90f8..bb63ed68d4 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -60,9 +60,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION void add_contact(int, double *, double, double, double); KOKKOS_INLINE_FUNCTION - int surface_interior(double *, double); + int surface_interior_kokkos(double *, double); KOKKOS_INLINE_FUNCTION - int surface_exterior(double *, double); + int surface_exterior_kokkos(double *, double); }; From 7d9bc3ea24beda68fa0fad36bfb9321a544d8c0e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:21:42 -0400 Subject: [PATCH 172/294] oops --- src/KOKKOS/region_sphere_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 6c8677e340..72c18d3b14 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -108,7 +108,7 @@ int RegSphereKokkos::surface_kokkos(double x, double y, double z, do template KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::surface_interior(double *x, double cutoff) +int RegSphereKokkos::surface_interior_kokkos(double *x, double cutoff) { double delx = x[0] - xc; double dely = x[1] - yc; @@ -138,7 +138,7 @@ int RegSphereKokkos::surface_interior(double *x, double cutoff) template KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::surface_exterior(double *x, double cutoff) +int RegSphereKokkos::surface_exterior_kokkos(double *x, double cutoff) { double delx = x[0] - xc; double dely = x[1] - yc; From 1af3de1fad2d3092cf361037c21c914537ae65c1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 18:59:17 -0400 Subject: [PATCH 173/294] Update fix_recenter_kokkos.cpp --- src/KOKKOS/fix_recenter_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index 4738f29be7..bc5576d418 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -88,8 +88,8 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) // current COM - double xcm[3]; if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup); + double xcm[3]; groupKK->xcm(igroup,masstotal,xcm); // shift coords by difference between actual COM and requested COM From 80c8ecb7e70f01a775c89fa48025bd6f42219f2f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 8 Oct 2024 19:01:36 -0400 Subject: [PATCH 174/294] Update fix_recenter_kokkos.cpp --- src/KOKKOS/fix_recenter_kokkos.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index bc5576d418..c3a840ff10 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -99,7 +99,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) shift[2] = zflag ? (ztarget - xcm[2]) : 0.0; distance = sqrt(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); auto l_group2bit = group2bit; @@ -119,7 +118,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) }); copymode = 0; - atomKK->modified(execution_space,datamask_modify); } From bb5b8a1d344793128fa5b5ced7c87ef6090e8e5f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 13:50:17 -0400 Subject: [PATCH 175/294] functor templated on RegBlockKokkos and RegSphereKokkos --- src/KOKKOS/fix_wall_region_kokkos.cpp | 55 ++++++++++++--------------- src/KOKKOS/fix_wall_region_kokkos.h | 24 +++++++++--- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index eadc12109b..59d18c0d89 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -114,12 +114,21 @@ void FixWallRegionKokkos::post_force(int vflag) // eflag is used to track whether wall energies have been communicated. eflag = 0; - double result[10]; - copymode = 1; - FixWallRegionKokkosFunctor functor(this); - Kokkos::parallel_reduce(nlocal,functor,result); + + if(auto *regionKK = dynamic_cast*>(region)) { + + FixWallRegionKokkosFunctor> functor(this,regionKK); + Kokkos::parallel_reduce(nlocal,functor,result); + + } else if (auto *regionKK = dynamic_cast*>(region)){ + + FixWallRegionKokkosFunctor> functor(this,regionKK); + Kokkos::parallel_reduce(nlocal,functor,result); + + } + copymode = 0; for( int i=0 ; i<4 ; i++ ) ewall[i] = result[i]; @@ -139,7 +148,6 @@ void FixWallRegionKokkos::post_force(int vflag) k_vatom.template modify(); k_vatom.template sync(); } - } @@ -150,9 +158,10 @@ void FixWallRegionKokkos::post_force(int vflag) error if any particle is on or behind wall ------------------------------------------------------------------------- */ -template +template +template KOKKOS_INLINE_FUNCTION -void FixWallRegionKokkos::wall_particle(int i, value_type result) const { +void FixWallRegionKokkos::wall_particle(T regionKK, const int i, value_type result) const { if (d_mask(i) & groupbit) { if (!d_match[i]) Kokkos::abort("Particle outside surface of region used in fix wall/region"); @@ -163,28 +172,14 @@ void FixWallRegionKokkos::wall_particle(int i, value_type result) co else tooclose = 0.0; - int n; - - if(RegBlockKokkos *regionKK = dynamic_cast*>(region)) - n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); - else if (RegSphereKokkos *regionKK = dynamic_cast*>(region)) - n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); + int n = regionKK->surface_kokkos(d_x(i,0), d_x(i,1), d_x(i,2), cutoff); for ( int m = 0; m < n; m++) { - double r, delx, dely, delz; - - if(RegBlockKokkos *regionKK = dynamic_cast*>(region)) { - r = regionKK->d_contact[m].r; - delx = regionKK->d_contact[m].delx; - dely = regionKK->d_contact[m].dely; - delz = regionKK->d_contact[m].delz; - } else if (RegSphereKokkos *regionKK = dynamic_cast*>(region)){ - r = regionKK->d_contact[m].r; - delx = regionKK->d_contact[m].delx; - dely = regionKK->d_contact[m].dely; - delz = regionKK->d_contact[m].delz; - } + double r = regionKK->d_contact[m].r; + double delx = regionKK->d_contact[m].delx; + double dely = regionKK->d_contact[m].dely; + double delz = regionKK->d_contact[m].delz; if (r <= tooclose) Kokkos::abort("Particle outside surface of region used in fix wall/region"); @@ -200,16 +195,16 @@ void FixWallRegionKokkos::wall_particle(int i, value_type result) co else if (style == COLLOID) engKK = colloid(r,d_radius(i),fwallKK); else engKK = harmonic(r,fwallKK); - double fx = fwall * delx * rinv; - double fy = fwall * dely * rinv; - double fz = fwall * delz * rinv; + double fx = fwallKK * delx * rinv; + double fy = fwallKK * dely * rinv; + double fz = fwallKK * delz * rinv; d_f(i,0) += fx; d_f(i,1) += fy; d_f(i,2) += fz; result[1] -= fx; result[2] -= fy; result[3] -= fz; - result[0] += eng; + result[0] += engKK; if (evflag) { double v[6] = { fx * delx, diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h index ca98b15d3d..c9cb374a18 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.h +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -24,10 +24,15 @@ FixStyle(wall/region/kk/host,FixWallRegionKokkos); #define LMP_FIX_WALL_REGION_KOKKOS_H #include "fix_wall_region.h" + #include "kokkos_type.h" +#include "region_block_kokkos.h" +#include "region_sphere_kokkos.h" namespace LAMMPS_NS { +//template +//struct TagFixWallRegionKokkos{}; template class FixWallRegionKokkos : public FixWallRegion { @@ -40,8 +45,15 @@ class FixWallRegionKokkos : public FixWallRegion { ~FixWallRegionKokkos() override; void post_force(int) override; + template KOKKOS_INLINE_FUNCTION - void wall_particle(int, value_type) const; + void wall_particle(T, const int, value_type) const; + + //template + //KOKKOS_INLINE_FUNCTION + //void operator()(TagFixWallRegionKokkos(const int&, double&, double&, double&, double&) const; + + //regionKK private: @@ -78,16 +90,16 @@ class FixWallRegionKokkos : public FixWallRegion { }; - -template +template struct FixWallRegionKokkosFunctor { typedef DeviceType device_type; typedef double value_type[]; const int value_count; FixWallRegionKokkos c; + T *regionKK; - FixWallRegionKokkosFunctor(FixWallRegionKokkos* c_ptr): - value_count(10), c(*c_ptr) {} + FixWallRegionKokkosFunctor(FixWallRegionKokkos* c_ptr, T *regionKK): + value_count(10), c(*c_ptr), regionKK(regionKK) {} KOKKOS_INLINE_FUNCTION void init(value_type result) const { @@ -96,7 +108,7 @@ struct FixWallRegionKokkosFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type result) const { - c.wall_particle(i,result); + c.wall_particle(regionKK,i,result); } }; From 5d90879f35436e4c2ceeafd842f88c43913d3d7d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 14:14:23 -0400 Subject: [PATCH 176/294] cleanup --- src/KOKKOS/fix_wall_region_kokkos.cpp | 4 ---- src/KOKKOS/fix_wall_region_kokkos.h | 10 ---------- src/KOKKOS/region_block_kokkos.cpp | 2 -- src/KOKKOS/region_sphere_kokkos.cpp | 2 -- 4 files changed, 18 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 59d18c0d89..0eb64ee3f0 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -118,15 +118,11 @@ void FixWallRegionKokkos::post_force(int vflag) copymode = 1; if(auto *regionKK = dynamic_cast*>(region)) { - FixWallRegionKokkosFunctor> functor(this,regionKK); Kokkos::parallel_reduce(nlocal,functor,result); - } else if (auto *regionKK = dynamic_cast*>(region)){ - FixWallRegionKokkosFunctor> functor(this,regionKK); Kokkos::parallel_reduce(nlocal,functor,result); - } copymode = 0; diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h index c9cb374a18..220f9ad38b 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.h +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -31,9 +31,6 @@ FixStyle(wall/region/kk/host,FixWallRegionKokkos); namespace LAMMPS_NS { -//template -//struct TagFixWallRegionKokkos{}; - template class FixWallRegionKokkos : public FixWallRegion { public: @@ -49,12 +46,6 @@ class FixWallRegionKokkos : public FixWallRegion { KOKKOS_INLINE_FUNCTION void wall_particle(T, const int, value_type) const; - //template - //KOKKOS_INLINE_FUNCTION - //void operator()(TagFixWallRegionKokkos(const int&, double&, double&, double&, double&) const; - - //regionKK - private: typename AT::t_x_array d_x; @@ -110,7 +101,6 @@ struct FixWallRegionKokkosFunctor { void operator()(const int i, value_type result) const { c.wall_particle(regionKK,i,result); } - }; } diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 285c1018b5..9c71f6d6e5 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -59,8 +59,6 @@ int RegBlockKokkos::surface_kokkos(double x, double y, double z, dou double xs, ys, zs; double xnear[3], xorig[3]; - utils::logmesg(lmp, " *** RegBlockKokkos::surface\n"); - if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 72c18d3b14..4ddbd15aaa 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -64,8 +64,6 @@ int RegSphereKokkos::surface_kokkos(double x, double y, double z, do double xs, ys, zs; double xnear[3], xorig[3]; - utils::logmesg(lmp, " *** RegSphereKokkos::surface\n"); - if (dynamic) { xorig[0] = x; xorig[1] = y; xorig[2] = z; inverse_transform(x, y, z); From ae0627b2f15d642f59e8c128da9f5fb4185a6b0c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 14:43:18 -0400 Subject: [PATCH 177/294] remove warning --- src/KOKKOS/region_block_kokkos.cpp | 1 - src/KOKKOS/region_block_kokkos.h | 1 - src/KOKKOS/region_sphere_kokkos.cpp | 2 -- src/KOKKOS/region_sphere_kokkos.h | 1 - 4 files changed, 5 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 9c71f6d6e5..b3cc64395a 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -52,7 +52,6 @@ RegBlockKokkos::~RegBlockKokkos() ------------------------------------------------------------------------- */ template -KOKKOS_INLINE_FUNCTION int RegBlockKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index ff18a86ff8..9474413343 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -47,7 +47,6 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagRegBlockMatchAll, const int&) const; - KOKKOS_INLINE_FUNCTION int surface_kokkos(double, double, double, double); Kokkos::View d_contact; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 4ddbd15aaa..3eff14585f 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -43,7 +43,6 @@ RegSphereKokkos::~RegSphereKokkos() memoryKK->destroy_kokkos(d_contact); } - /* ---------------------------------------------------------------------- generate list of contact points for interior or exterior regions if region has variable shape, invoke shape_update() once per timestep @@ -57,7 +56,6 @@ RegSphereKokkos::~RegSphereKokkos() ------------------------------------------------------------------------- */ template -KOKKOS_INLINE_FUNCTION int RegSphereKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index bb63ed68d4..c5c2a78912 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -41,7 +41,6 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { ~RegSphereKokkos() override; void match_all_kokkos(int, DAT::tdual_int_1d) override; - KOKKOS_INLINE_FUNCTION int surface_kokkos(double, double, double, double); Kokkos::View d_contact; From 0007e568fcb8383747d966bd41c1eb036268bb3e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 14:46:49 -0400 Subject: [PATCH 178/294] KOKKOS_FUNCTION --- src/KOKKOS/region_block_kokkos.cpp | 1 + src/KOKKOS/region_block_kokkos.h | 1 + src/KOKKOS/region_sphere_kokkos.cpp | 1 + src/KOKKOS/region_sphere_kokkos.h | 1 + 4 files changed, 4 insertions(+) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index b3cc64395a..2f2a0dc7e2 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -52,6 +52,7 @@ RegBlockKokkos::~RegBlockKokkos() ------------------------------------------------------------------------- */ template +KOKKOS_FUNCTION int RegBlockKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 9474413343..ba7d7025eb 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -47,6 +47,7 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagRegBlockMatchAll, const int&) const; + KOKKOS_FUNCTION int surface_kokkos(double, double, double, double); Kokkos::View d_contact; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 3eff14585f..8ef2d4a8b3 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -56,6 +56,7 @@ RegSphereKokkos::~RegSphereKokkos() ------------------------------------------------------------------------- */ template +KOKKOS_FUNCTION int RegSphereKokkos::surface_kokkos(double x, double y, double z, double cutoff) { int ncontact; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index c5c2a78912..d11025da28 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -41,6 +41,7 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { ~RegSphereKokkos() override; void match_all_kokkos(int, DAT::tdual_int_1d) override; + KOKKOS_FUNCTION int surface_kokkos(double, double, double, double); Kokkos::View d_contact; From 1427ca1ff405e9809f71e9b1ad79cf7b82424637 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:04:07 -0400 Subject: [PATCH 179/294] forward_transform() and find_closest_point() --- src/KOKKOS/region_block_kokkos.cpp | 96 +++++++++++++++++++++++++++++ src/KOKKOS/region_block_kokkos.h | 5 ++ src/KOKKOS/region_sphere_kokkos.cpp | 17 +++++ src/KOKKOS/region_sphere_kokkos.h | 2 + 4 files changed, 120 insertions(+) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 2f2a0dc7e2..0ba172ef2e 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -324,6 +324,23 @@ int RegBlockKokkos::match(double x, double y, double z) const return !(k_inside(x,y,z) ^ interior); } +/* ---------------------------------------------------------------------- + transform a point x,y,z in region space to moved space + rotate first (around original P), then displace +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegBlockKokkos::forward_transform(double &x, double &y, double &z) const +{ + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } +} + /* ---------------------------------------------------------------------- transform a point x,y,z in moved space back to region space undisplace first, then unrotate (around original P) @@ -386,6 +403,85 @@ void RegBlockKokkos::rotate(double &x, double &y, double &z, double z = point[2] + c[2] + disp[2]; } +/*------------------------------------------------------------------------ + return distance to closest point on surface I of block region + store closest point in xc,yc,zc +--------------------------------------------------------------------------*/ + +template +KOKKOS_INLINE_FUNCTION +double RegBlockKokkos::find_closest_point(int i, double *x, double &xc, double &yc, double &zc) +{ + double dot, d2, d2min; + double xr[3], xproj[3], p[3]; + + xr[0] = x[0] - corners[i][0][0]; + xr[1] = x[1] - corners[i][0][1]; + xr[2] = x[2] - corners[i][0][2]; + dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; + xproj[0] = xr[0] - dot * face[i][0]; + xproj[1] = xr[1] - dot * face[i][1]; + xproj[2] = xr[2] - dot * face[i][2]; + + d2min = MAXDOUBLEINT; + + // check if point projects inside of face + + if (inside_face(xproj, i)) { + d2 = d2min = dot * dot; + xc = xproj[0] + corners[i][0][0]; + yc = xproj[1] + corners[i][0][1]; + zc = xproj[2] + corners[i][0][2]; + + // check each edge + + } else { + point_on_line_segment(corners[i][0], corners[i][1], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][1], corners[i][2], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][2], corners[i][3], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][3], corners[i][0], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + } + + return d2min; +} + + + namespace LAMMPS_NS { template class RegBlockKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index ba7d7025eb..fd4907e251 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -64,6 +64,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int match(double, double, double) const; KOKKOS_INLINE_FUNCTION + void forward_transform(double &, double &, double &) const; + KOKKOS_INLINE_FUNCTION void inverse_transform(double &, double &, double &) const; KOKKOS_INLINE_FUNCTION void rotate(double &, double &, double &, double) const; @@ -75,6 +77,9 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int surface_exterior_kokkos(double *, double); + KOKKOS_INLINE_FUNCTION + double find_closest_point(int, double*, double&, double&, double&); + }; } diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 8ef2d4a8b3..e27bfa114b 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -247,6 +247,23 @@ int RegSphereKokkos::match(double x, double y, double z) const return !(k_inside(x,y,z) ^ interior); } +/* ---------------------------------------------------------------------- + transform a point x,y,z in region space to moved space + rotate first (around original P), then displace +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegSphereKokkos::forward_transform(double &x, double &y, double &z) const +{ + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } +} + /* ---------------------------------------------------------------------- transform a point x,y,z in moved space back to region space undisplace first, then unrotate (around original P) diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index d11025da28..db0d8b2ad3 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -53,6 +53,8 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION int match(double, double, double) const; KOKKOS_INLINE_FUNCTION + void forward_transform(double &, double &, double &) const; + KOKKOS_INLINE_FUNCTION void inverse_transform(double &, double &, double &) const; KOKKOS_INLINE_FUNCTION void rotate(double &, double &, double &, double) const; From b4b49f5dec9c813dc37a624176436ad8a03eda16 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:11:24 -0400 Subject: [PATCH 180/294] point_on_line_segment() --- src/KOKKOS/region_block_kokkos.cpp | 35 ++++++++++++++++++++++++++++++ src/KOKKOS/region_block_kokkos.h | 16 +++++++------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 0ba172ef2e..43fd7e7607 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -16,6 +16,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" +#include "math_extra.h" #include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -403,6 +404,40 @@ void RegBlockKokkos::rotate(double &x, double &y, double &z, double z = point[2] + c[2] + disp[2]; } +/* ---------------------------------------------------------------------- + find nearest point to C on line segment A,B and return it as D + project (C-A) onto (B-A) + t = length of that projection, normalized by length of (B-A) + t <= 0, C is closest to A + t >= 1, C is closest to B + else closest point is between A and B +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegBlockKokkos::point_on_line_segment(double *a, double *b, double *c, double *d) +{ + double ba[3], ca[3]; + + MathExtra::sub3(b, a, ba); + MathExtra::sub3(c, a, ca); + double t = MathExtra::dot3(ca, ba) / MathExtra::dot3(ba, ba); + if (t <= 0.0) { + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + } else if (t >= 1.0) { + d[0] = b[0]; + d[1] = b[1]; + d[2] = b[2]; + } else { + d[0] = a[0] + t * ba[0]; + d[1] = a[1] + t * ba[1]; + d[2] = a[2] + t * ba[2]; + } +} + + /*------------------------------------------------------------------------ return distance to closest point on surface I of block region store closest point in xc,yc,zc diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index fd4907e251..8f967cbd7f 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -64,19 +64,19 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int match(double, double, double) const; KOKKOS_INLINE_FUNCTION - void forward_transform(double &, double &, double &) const; + void forward_transform(double&, double&, double&) const; KOKKOS_INLINE_FUNCTION - void inverse_transform(double &, double &, double &) const; + void inverse_transform(double&, double&, double&) const; KOKKOS_INLINE_FUNCTION - void rotate(double &, double &, double &, double) const; - + void rotate(double&, double&, double&, double) const; KOKKOS_INLINE_FUNCTION - void add_contact(int, double *, double, double, double); + void add_contact(int, double*, double, double, double); KOKKOS_INLINE_FUNCTION - int surface_interior_kokkos(double *, double); + int surface_interior_kokkos(double*, double); KOKKOS_INLINE_FUNCTION - int surface_exterior_kokkos(double *, double); - + int surface_exterior_kokkos(double*, double); + KOKKOS_INLINE_FUNCTION + void point_on_line_segment(double*, double*, double*, double*); KOKKOS_INLINE_FUNCTION double find_closest_point(int, double*, double&, double&, double&); From e4a92f96e159ea809f5219fa2be8497c4b139385 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:15:23 -0400 Subject: [PATCH 181/294] match_kokkos() --- src/KOKKOS/region_block_kokkos.cpp | 4 ++-- src/KOKKOS/region_block_kokkos.h | 2 +- src/KOKKOS/region_sphere_kokkos.cpp | 4 ++-- src/KOKKOS/region_sphere_kokkos.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 43fd7e7607..ef0d5d27b2 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -300,7 +300,7 @@ void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) c double x_tmp = x(i,0); double y_tmp = x(i,1); double z_tmp = x(i,2); - d_match[i] = match(x_tmp,y_tmp,z_tmp); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); } } @@ -318,7 +318,7 @@ void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) c template KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::match(double x, double y, double z) const +int RegBlockKokkos::match_kokkos(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); if (openflag) return 1; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 8f967cbd7f..417fb2f214 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -62,7 +62,7 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION - int match(double, double, double) const; + int match_kokkos(double, double, double) const; KOKKOS_INLINE_FUNCTION void forward_transform(double&, double&, double&) const; KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index e27bfa114b..86d3181048 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -218,7 +218,7 @@ void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_i double x_tmp = d_x(i,0); double y_tmp = d_x(i,1); double z_tmp = d_x(i,2); - d_match[i] = match(x_tmp,y_tmp,z_tmp); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); }}); copymode = 0; @@ -240,7 +240,7 @@ void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_i template KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::match(double x, double y, double z) const +int RegSphereKokkos::match_kokkos(double x, double y, double z) const { if (dynamic) inverse_transform(x,y,z); if (openflag) return 1; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index db0d8b2ad3..b573a59a83 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -51,7 +51,7 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION - int match(double, double, double) const; + int match_kokkos(double, double, double) const; KOKKOS_INLINE_FUNCTION void forward_transform(double &, double &, double &) const; KOKKOS_INLINE_FUNCTION From 08b0e8eab9d1be460768fe8e6830a21333711d66 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:19:05 -0400 Subject: [PATCH 182/294] inside_face() --- src/KOKKOS/region_block_kokkos.cpp | 19 +++++++++++++++++++ src/KOKKOS/region_block_kokkos.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index ef0d5d27b2..043384953b 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -437,6 +437,25 @@ void RegBlockKokkos::point_on_line_segment(double *a, double *b, dou } } +/*------------------------------------------------------------------------ + determine if projected point is inside given face of the block +--------------------------------------------------------------------------*/ + +template +KOKKOS_INLINE_FUNCTION +double RegBlockKokkos::inside_face(double *xproj, int iface) +{ + if (iface < 2) { + if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; + } else if (iface < 4) { + if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) + return 1; + } else { + if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; + } + + return 0; +} /*------------------------------------------------------------------------ return distance to closest point on surface I of block region diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 417fb2f214..7db9819095 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -79,6 +79,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { void point_on_line_segment(double*, double*, double*, double*); KOKKOS_INLINE_FUNCTION double find_closest_point(int, double*, double&, double&, double&); + KOKKOS_INLINE_FUNCTION + double inside_face(double*, int); }; From e48d2f9a31eece59b9c63da1982fd2dd769c64fc Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:26:34 -0400 Subject: [PATCH 183/294] Update fix_wall_region_kokkos.cpp --- src/KOKKOS/fix_wall_region_kokkos.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 0eb64ee3f0..95c9bd4f01 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -22,31 +22,17 @@ #include "atom_kokkos.h" #include "error.h" #include "kokkos_base.h" +#include "math_special_kokkos.h" #include "memory_kokkos.h" #include "region.h" #include "region_block_kokkos.h" #include "region_sphere_kokkos.h" using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; -KOKKOS_INLINE_FUNCTION double powint(const double &x, const int n) -{ - double yy, ww; - - if (n == 0) return 1.0; - if (x == 0.0) return 0.0; - int nn = (n > 0) ? n : -n; - ww = x; - - for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww) - if (nn & 1) yy *= ww; - - return (n > 0) ? yy : 1.0 / yy; -} - - /* ---------------------------------------------------------------------- */ template From 2d94187606d29646d5c2fe6e192bf738d451044b Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:34:31 -0400 Subject: [PATCH 184/294] sub3() and dot3() --- src/KOKKOS/math_special_kokkos.h | 24 ++++++++++++++++++++++++ src/KOKKOS/region_block_kokkos.cpp | 9 +++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/math_special_kokkos.h b/src/KOKKOS/math_special_kokkos.h index d8af28d33a..12e04db1c0 100644 --- a/src/KOKKOS/math_special_kokkos.h +++ b/src/KOKKOS/math_special_kokkos.h @@ -248,6 +248,30 @@ namespace MathSpecialKokkos { return yy; } + + /* ---------------------------------------------------------------------- + ans = v1 - v2 + ------------------------------------------------------------------------- */ + + KOKKOS_INLINE_FUNCTION + static void sub3(const double *v1, const double *v2, double *ans) + { + ans[0] = v1[0] - v2[0]; + ans[1] = v1[1] - v2[1]; + ans[2] = v1[2] - v2[2]; + } + + /* ---------------------------------------------------------------------- + dot product of 2 vectors + ------------------------------------------------------------------------- */ + + KOKKOS_INLINE_FUNCTION + static double dot3(const double *v1, const double *v2) + { + return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; + } + + } // namespace MathSpecialKokkos } // namespace LAMMPS_NS diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 043384953b..12b4f7b478 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -16,10 +16,11 @@ #include "atom_kokkos.h" #include "atom_masks.h" -#include "math_extra.h" +#include "math_special_kokkos.h" #include "memory_kokkos.h" using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; /* ---------------------------------------------------------------------- */ @@ -419,9 +420,9 @@ void RegBlockKokkos::point_on_line_segment(double *a, double *b, dou { double ba[3], ca[3]; - MathExtra::sub3(b, a, ba); - MathExtra::sub3(c, a, ca); - double t = MathExtra::dot3(ca, ba) / MathExtra::dot3(ba, ba); + sub3(b, a, ba); + sub3(c, a, ca); + double t = dot3(ca, ba) / dot3(ba, ba); if (t <= 0.0) { d[0] = a[0]; d[1] = a[1]; From fea0271d5a1159e2fe49aa1f9ff93494821cd1f4 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 15:36:30 -0400 Subject: [PATCH 185/294] Update region_block_kokkos.cpp --- src/KOKKOS/region_block_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 12b4f7b478..e9c183afe2 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -465,7 +465,7 @@ double RegBlockKokkos::inside_face(double *xproj, int iface) template KOKKOS_INLINE_FUNCTION -double RegBlockKokkos::find_closest_point(int i, double *x, double &xc, double &yc, double &zc) +double RegBlockKokkos::find_closest_point(int i, double *x, double &xc, double &yc, double &zc) { double dot, d2, d2min; double xr[3], xproj[3], p[3]; From b22797b7527b0ba85544fc10fad0fc8d37bf8fc0 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 16:13:48 -0400 Subject: [PATCH 186/294] refactor --- src/KOKKOS/fix_wall_region_kokkos.cpp | 14 +++-- src/KOKKOS/fix_wall_region_kokkos.h | 2 - src/KOKKOS/region_block_kokkos.cpp | 73 ++++++++------------------- src/KOKKOS/region_block_kokkos.h | 11 ++-- src/KOKKOS/region_sphere_kokkos.cpp | 73 ++++++++------------------- src/KOKKOS/region_sphere_kokkos.h | 9 ++-- 6 files changed, 55 insertions(+), 127 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 95c9bd4f01..8d0460aa31 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -33,6 +33,10 @@ using namespace MathSpecialKokkos; enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; +// make sure surface_kokkos() functions get resolved by linker +//template class RegBlockKokkos; +//template class RegSphereKokkos; + /* ---------------------------------------------------------------------- */ template @@ -80,11 +84,6 @@ void FixWallRegionKokkos::post_force(int vflag) int nlocal = atomKK->nlocal; region->prematch(); - DAT::tdual_int_1d k_match = DAT::tdual_int_1d("wall_region:k_match",nlocal); - KokkosBase* regionKKBase = dynamic_cast(region); - regionKKBase->match_all_kokkos(groupbit,k_match); - k_match.template sync(); - d_match = k_match.template view(); // virial setup @@ -112,7 +111,6 @@ void FixWallRegionKokkos::post_force(int vflag) } copymode = 0; - for( int i=0 ; i<4 ; i++ ) ewall[i] = result[i]; if (vflag_global) { @@ -132,7 +130,6 @@ void FixWallRegionKokkos::post_force(int vflag) } } - /* ---------------------------------------------------------------------- interaction of all particles in group with a wall m = index of wall coeffs @@ -145,7 +142,8 @@ template KOKKOS_INLINE_FUNCTION void FixWallRegionKokkos::wall_particle(T regionKK, const int i, value_type result) const { if (d_mask(i) & groupbit) { - if (!d_match[i]) Kokkos::abort("Particle outside surface of region used in fix wall/region"); + + if (!regionKK->match_kokkos(d_x(i,0), d_x(i,1), d_x(i,2))) Kokkos::abort("Particle outside surface of region used in fix wall/region"); double rinv, tooclose; diff --git a/src/KOKKOS/fix_wall_region_kokkos.h b/src/KOKKOS/fix_wall_region_kokkos.h index 220f9ad38b..e959ffc42c 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.h +++ b/src/KOKKOS/fix_wall_region_kokkos.h @@ -53,8 +53,6 @@ class FixWallRegionKokkos : public FixWallRegion { typename AT::t_float_1d d_radius; typename AT::t_int_1d d_mask; - typename AT::t_int_1d d_match; - DAT::tdual_virial_array k_vatom; typename AT::t_virial_array d_vatom; diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index e9c183afe2..6aa5856829 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -41,6 +41,27 @@ RegBlockKokkos::~RegBlockKokkos() memoryKK->destroy_kokkos(d_contact); } +/* ---------------------------------------------------------------------- + determine if point x,y,z is a match to region volume + XOR computes 0 if 2 args are the same, 1 if different + note that k_inside() returns 1 for points on surface of region + thus point on surface of exterior region will not match + if region has variable shape, invoke shape_update() once per timestep + if region is dynamic, apply inverse transform to x,y,z + unmove first, then unrotate, so don't have to change rotation point + caller is responsible for wrapping this call with + modify->clearstep_compute() and modify->addstep_compute() if needed +------------------------------------------------------------------------- */ + +template +KOKKOS_FUNCTION +int RegBlockKokkos::match_kokkos(double x, double y, double z) const +{ + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); +} + /* ---------------------------------------------------------------------- generate list of contact points for interior or exterior regions if region has variable shape, invoke shape_update() once per timestep @@ -274,58 +295,6 @@ int RegBlockKokkos::k_inside(double x, double y, double z) const return 0; } -template -void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) -{ - groupbit = groupbit_in; - d_match = k_match_in.template view(); - - auto execution_space = ExecutionSpaceFromDevice::space; - atomKK->sync(execution_space, X_MASK | MASK_MASK); - - x = atomKK->k_x.view(); - mask = atomKK->k_mask.view(); - int nlocal = atom->nlocal; - - copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - copymode = 0; - - k_match_in.template modify(); -} - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) const { - if (mask[i] & groupbit) { - double x_tmp = x(i,0); - double y_tmp = x(i,1); - double z_tmp = x(i,2); - d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); - } -} - -/* ---------------------------------------------------------------------- - determine if point x,y,z is a match to region volume - XOR computes 0 if 2 args are the same, 1 if different - note that k_inside() returns 1 for points on surface of region - thus point on surface of exterior region will not match - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic, apply inverse transform to x,y,z - unmove first, then unrotate, so don't have to change rotation point - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::match_kokkos(double x, double y, double z) const -{ - if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; - return !(k_inside(x,y,z) ^ interior); -} - /* ---------------------------------------------------------------------- transform a point x,y,z in region space to moved space rotate first (around original P), then displace diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 7db9819095..4ca7a3fdae 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -24,7 +24,6 @@ RegionStyle(block/kk/host,RegBlockKokkos); #define LMP_REGION_BLOCK_KOKKOS_H #include "region_block.h" -#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -32,7 +31,7 @@ namespace LAMMPS_NS { struct TagRegBlockMatchAll{}; template -class RegBlockKokkos : public RegBlock, public KokkosBase { +class RegBlockKokkos : public RegBlock { friend class FixPour; public: @@ -42,10 +41,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { RegBlockKokkos(class LAMMPS *, int, char **); ~RegBlockKokkos() override; - void match_all_kokkos(int, DAT::tdual_int_1d) override; - - KOKKOS_INLINE_FUNCTION - void operator()(TagRegBlockMatchAll, const int&) const; + KOKKOS_FUNCTION + int match_kokkos(double, double, double) const; KOKKOS_FUNCTION int surface_kokkos(double, double, double, double); @@ -62,8 +59,6 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION - int match_kokkos(double, double, double) const; - KOKKOS_INLINE_FUNCTION void forward_transform(double&, double&, double&) const; KOKKOS_INLINE_FUNCTION void inverse_transform(double&, double&, double&) const; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 86d3181048..f14a0d9ed8 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -43,6 +43,27 @@ RegSphereKokkos::~RegSphereKokkos() memoryKK->destroy_kokkos(d_contact); } +/* ---------------------------------------------------------------------- + determine if point x,y,z is a match to region volume + XOR computes 0 if 2 args are the same, 1 if different + note that k_inside() returns 1 for points on surface of region + thus point on surface of exterior region will not match + if region has variable shape, invoke shape_update() once per timestep + if region is dynamic, apply inverse transform to x,y,z + unmove first, then unrotate, so don't have to change rotation point + caller is responsible for wrapping this call with + modify->clearstep_compute() and modify->addstep_compute() if needed +------------------------------------------------------------------------- */ + +template +KOKKOS_FUNCTION +int RegSphereKokkos::match_kokkos(double x, double y, double z) const +{ + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); +} + /* ---------------------------------------------------------------------- generate list of contact points for interior or exterior regions if region has variable shape, invoke shape_update() once per timestep @@ -195,58 +216,6 @@ int RegSphereKokkos::k_inside(double x, double y, double z) const return 0; } -template -void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) -{ - - auto execution_space = ExecutionSpaceFromDevice::space; - atomKK->sync(execution_space, X_MASK | MASK_MASK); - - auto d_x = atomKK->k_x.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_match = k_match_in.template view(); - auto l_groupbit = groupbit_in; - - copymode = 1; - - // capture lambda reference to KOKKOS_INLINE_FUNCTION match() - // use KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA - // https://github.com/kokkos/kokkos/issues/695 - - Kokkos::parallel_for(atom->nlocal, KOKKOS_CLASS_LAMBDA( const int &i ) { - if (d_mask[i] & l_groupbit) { - double x_tmp = d_x(i,0); - double y_tmp = d_x(i,1); - double z_tmp = d_x(i,2); - d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); - }}); - - copymode = 0; - - k_match_in.template modify(); -} - -/* ---------------------------------------------------------------------- - determine if point x,y,z is a match to region volume - XOR computes 0 if 2 args are the same, 1 if different - note that k_inside() returns 1 for points on surface of region - thus point on surface of exterior region will not match - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic, apply inverse transform to x,y,z - unmove first, then unrotate, so don't have to change rotation point - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::match_kokkos(double x, double y, double z) const -{ - if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; - return !(k_inside(x,y,z) ^ interior); -} - /* ---------------------------------------------------------------------- transform a point x,y,z in region space to moved space rotate first (around original P), then displace diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index b573a59a83..61a09275f4 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -24,13 +24,12 @@ RegionStyle(sphere/kk/host,RegSphereKokkos); #define LMP_REGION_SPHERE_KOKKOS_H #include "region_sphere.h" -#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { template -class RegSphereKokkos : public RegSphere, public KokkosBase { +class RegSphereKokkos : public RegSphere { friend class FixPour; public: @@ -39,7 +38,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { RegSphereKokkos(class LAMMPS *, int, char **); ~RegSphereKokkos() override; - void match_all_kokkos(int, DAT::tdual_int_1d) override; + + KOKKOS_FUNCTION + int match_kokkos(double, double, double) const; KOKKOS_FUNCTION int surface_kokkos(double, double, double, double); @@ -51,8 +52,6 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION - int match_kokkos(double, double, double) const; - KOKKOS_INLINE_FUNCTION void forward_transform(double &, double &, double &) const; KOKKOS_INLINE_FUNCTION void inverse_transform(double &, double &, double &) const; From dd11385f6fdda206e7b85918a9bdc91a78571443 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 16:45:50 -0400 Subject: [PATCH 187/294] all device function in headers to avoid device relocatable code with nvcc --- src/KOKKOS/region_block_kokkos.cpp | 466 +--------------------------- src/KOKKOS/region_block_kokkos.h | 366 +++++++++++++++++++++- src/KOKKOS/region_sphere_kokkos.cpp | 252 +-------------- src/KOKKOS/region_sphere_kokkos.h | 184 +++++++++-- 4 files changed, 530 insertions(+), 738 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 6aa5856829..9ba607d4fe 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -16,7 +16,6 @@ #include "atom_kokkos.h" #include "atom_masks.h" -#include "math_special_kokkos.h" #include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -41,470 +40,7 @@ RegBlockKokkos::~RegBlockKokkos() memoryKK->destroy_kokkos(d_contact); } -/* ---------------------------------------------------------------------- - determine if point x,y,z is a match to region volume - XOR computes 0 if 2 args are the same, 1 if different - note that k_inside() returns 1 for points on surface of region - thus point on surface of exterior region will not match - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic, apply inverse transform to x,y,z - unmove first, then unrotate, so don't have to change rotation point - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_FUNCTION -int RegBlockKokkos::match_kokkos(double x, double y, double z) const -{ - if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; - return !(k_inside(x,y,z) ^ interior); -} - -/* ---------------------------------------------------------------------- - generate list of contact points for interior or exterior regions - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic: - before: inverse transform x,y,z (unmove, then unrotate) - after: forward transform contact point xs,yx,zs (rotate, then move), - then reset contact delx,dely,delz based on new contact point - no need to do this if no rotation since delxyz doesn't change - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_FUNCTION -int RegBlockKokkos::surface_kokkos(double x, double y, double z, double cutoff) -{ - int ncontact; - double xs, ys, zs; - double xnear[3], xorig[3]; - - if (dynamic) { - xorig[0] = x; xorig[1] = y; xorig[2] = z; - inverse_transform(x, y, z); - } - - xnear[0] = x; xnear[1] = y; xnear[2] = z; - - if (!openflag) { - if (interior) - ncontact = surface_interior_kokkos(xnear, cutoff); - else - ncontact = surface_exterior_kokkos(xnear, cutoff); - } else { - // one of surface_int/ext() will return 0 - // so no need to worry about offset of contact indices - ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); - } - - if (rotateflag && ncontact) { - for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - d_contact[i].delx; - ys = xnear[1] - d_contact[i].dely; - zs = xnear[2] - d_contact[i].delz; - forward_transform(xs, ys, zs); - d_contact[i].delx = xorig[0] - xs; - d_contact[i].dely = xorig[1] - ys; - d_contact[i].delz = xorig[2] - zs; - } - } - - return ncontact; -} - -/* ---------------------------------------------------------------------- - contact if 0 <= x < cutoff from one or more inner surfaces of block - can be one contact for each of 6 faces - no contact if outside (possible if called from union/intersect) - delxyz = vector from nearest point on block to x -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::surface_interior_kokkos(double *x, double cutoff) -{ - double delta; - - // x is exterior to block - - if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; - - // x is interior to block or on its surface - - int n = 0; - - delta = x[0] - xlo; - if (delta < cutoff && !open_faces[0]) { - d_contact[n].r = delta; - d_contact[n].delx = delta; - d_contact[n].dely = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 0; - n++; - } - delta = xhi - x[0]; - if (delta < cutoff && !open_faces[1]) { - d_contact[n].r = delta; - d_contact[n].delx = -delta; - d_contact[n].dely = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 1; - n++; - } - - delta = x[1] - ylo; - if (delta < cutoff && !open_faces[2]) { - d_contact[n].r = delta; - d_contact[n].dely = delta; - d_contact[n].delx = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 2; - n++; - } - delta = yhi - x[1]; - if (delta < cutoff && !open_faces[3]) { - d_contact[n].r = delta; - d_contact[n].dely = -delta; - d_contact[n].delx = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 3; - n++; - } - - delta = x[2] - zlo; - if (delta < cutoff && !open_faces[4]) { - d_contact[n].r = delta; - d_contact[n].delz = delta; - d_contact[n].delx = d_contact[n].dely = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 4; - n++; - } - delta = zhi - x[2]; - if (delta < cutoff && !open_faces[5]) { - d_contact[n].r = delta; - d_contact[n].delz = -delta; - d_contact[n].delx = d_contact[n].dely = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 5; - n++; - } - - return n; -} - -/* ---------------------------------------------------------------------- - one contact if 0 <= x < cutoff from outer surface of block - no contact if inside (possible if called from union/intersect) - delxyz = vector from nearest point on block to x -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::surface_exterior_kokkos(double *x, double cutoff) -{ - double xp, yp, zp; - double xc, yc, zc, dist, mindist; - - // x is far enough from block that there is no contact - // x is interior to block - - if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || - x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) - return 0; - if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; - - // x is exterior to block or on its surface - // xp,yp,zp = point on surface of block that x is closest to - // could be edge or corner pt of block - // do not add contact point if r >= cutoff - - if (!openflag) { - if (x[0] < xlo) - xp = xlo; - else if (x[0] > xhi) - xp = xhi; - else - xp = x[0]; - if (x[1] < ylo) - yp = ylo; - else if (x[1] > yhi) - yp = yhi; - else - yp = x[1]; - if (x[2] < zlo) - zp = zlo; - else if (x[2] > zhi) - zp = zhi; - else - zp = x[2]; - } else { - mindist = MAXDOUBLEINT; - for (int i = 0; i < 6; i++) { - if (open_faces[i]) continue; - dist = find_closest_point(i, x, xc, yc, zc); - if (dist < mindist) { - xp = xc; - yp = yc; - zp = zc; - mindist = dist; - } - } - } - - add_contact(0, x, xp, yp, zp); - d_contact[0].iwall = 0; - if (d_contact[0].r < cutoff) return 1; - return 0; -} - -/* ---------------------------------------------------------------------- - add a single contact at Nth location in contact array - x = particle position - xp,yp,zp = region surface point -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::add_contact(int n, double *x, double xp, double yp, double zp) -{ - double delx = x[0] - xp; - double dely = x[1] - yp; - double delz = x[2] - zp; - d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); - d_contact[n].radius = 0; - d_contact[n].delx = delx; - d_contact[n].dely = dely; - d_contact[n].delz = delz; -} - -/* ---------------------------------------------------------------------- - inside = 1 if x,y,z is inside or on surface - inside = 0 if x,y,z is outside and not on surface -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegBlockKokkos::k_inside(double x, double y, double z) const -{ - if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) - return 1; - return 0; -} - -/* ---------------------------------------------------------------------- - transform a point x,y,z in region space to moved space - rotate first (around original P), then displace -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::forward_transform(double &x, double &y, double &z) const -{ - if (rotateflag) rotate(x, y, z, theta); - if (moveflag) { - x += dx; - y += dy; - z += dz; - } -} - -/* ---------------------------------------------------------------------- - transform a point x,y,z in moved space back to region space - undisplace first, then unrotate (around original P) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::inverse_transform(double &x, double &y, double &z) const -{ - if (moveflag) { - x -= dx; - y -= dy; - z -= dz; - } - if (rotateflag) rotate(x,y,z,-theta); -} - -/* ---------------------------------------------------------------------- - rotate x,y,z by angle via right-hand rule around point and runit normal - sign of angle determines whether rotating forward/backward in time - return updated x,y,z - R = vector axis of rotation - P = point = point to rotate around - R0 = runit = unit vector for R - X0 = x,y,z = initial coord of atom - D = X0 - P = vector from P to X0 - C = (D dot R0) R0 = projection of D onto R, i.e. Dparallel - A = D - C = vector from R line to X0, i.e. Dperp - B = R0 cross A = vector perp to A in plane of rotation, same len as A - A,B define plane of circular rotation around R line - new x,y,z = P + C + A cos(angle) + B sin(angle) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::rotate(double &x, double &y, double &z, double angle) const -{ - double a[3],b[3],c[3],d[3],disp[3]; - - double sine = sin(angle); - double cosine = cos(angle); - d[0] = x - point[0]; - d[1] = y - point[1]; - d[2] = z - point[2]; - double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; - c[0] = x0dotr * runit[0]; - c[1] = x0dotr * runit[1]; - c[2] = x0dotr * runit[2]; - a[0] = d[0] - c[0]; - a[1] = d[1] - c[1]; - a[2] = d[2] - c[2]; - b[0] = runit[1]*a[2] - runit[2]*a[1]; - b[1] = runit[2]*a[0] - runit[0]*a[2]; - b[2] = runit[0]*a[1] - runit[1]*a[0]; - disp[0] = a[0]*cosine + b[0]*sine; - disp[1] = a[1]*cosine + b[1]*sine; - disp[2] = a[2]*cosine + b[2]*sine; - x = point[0] + c[0] + disp[0]; - y = point[1] + c[1] + disp[1]; - z = point[2] + c[2] + disp[2]; -} - -/* ---------------------------------------------------------------------- - find nearest point to C on line segment A,B and return it as D - project (C-A) onto (B-A) - t = length of that projection, normalized by length of (B-A) - t <= 0, C is closest to A - t >= 1, C is closest to B - else closest point is between A and B -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegBlockKokkos::point_on_line_segment(double *a, double *b, double *c, double *d) -{ - double ba[3], ca[3]; - - sub3(b, a, ba); - sub3(c, a, ca); - double t = dot3(ca, ba) / dot3(ba, ba); - if (t <= 0.0) { - d[0] = a[0]; - d[1] = a[1]; - d[2] = a[2]; - } else if (t >= 1.0) { - d[0] = b[0]; - d[1] = b[1]; - d[2] = b[2]; - } else { - d[0] = a[0] + t * ba[0]; - d[1] = a[1] + t * ba[1]; - d[2] = a[2] + t * ba[2]; - } -} - -/*------------------------------------------------------------------------ - determine if projected point is inside given face of the block ---------------------------------------------------------------------------*/ - -template -KOKKOS_INLINE_FUNCTION -double RegBlockKokkos::inside_face(double *xproj, int iface) -{ - if (iface < 2) { - if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; - } else if (iface < 4) { - if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) - return 1; - } else { - if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; - } - - return 0; -} - -/*------------------------------------------------------------------------ - return distance to closest point on surface I of block region - store closest point in xc,yc,zc ---------------------------------------------------------------------------*/ - -template -KOKKOS_INLINE_FUNCTION -double RegBlockKokkos::find_closest_point(int i, double *x, double &xc, double &yc, double &zc) -{ - double dot, d2, d2min; - double xr[3], xproj[3], p[3]; - - xr[0] = x[0] - corners[i][0][0]; - xr[1] = x[1] - corners[i][0][1]; - xr[2] = x[2] - corners[i][0][2]; - dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; - xproj[0] = xr[0] - dot * face[i][0]; - xproj[1] = xr[1] - dot * face[i][1]; - xproj[2] = xr[2] - dot * face[i][2]; - - d2min = MAXDOUBLEINT; - - // check if point projects inside of face - - if (inside_face(xproj, i)) { - d2 = d2min = dot * dot; - xc = xproj[0] + corners[i][0][0]; - yc = xproj[1] + corners[i][0][1]; - zc = xproj[2] + corners[i][0][2]; - - // check each edge - - } else { - point_on_line_segment(corners[i][0], corners[i][1], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][1], corners[i][2], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][2], corners[i][3], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][3], corners[i][0], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - } - - return d2min; -} - - +/* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { template class RegBlockKokkos; diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 4ca7a3fdae..117326492f 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -25,9 +25,12 @@ RegionStyle(block/kk/host,RegBlockKokkos); #include "region_block.h" #include "kokkos_type.h" +#include "math_special_kokkos.h" namespace LAMMPS_NS { +using namespace MathSpecialKokkos; + struct TagRegBlockMatchAll{}; template @@ -41,12 +44,53 @@ class RegBlockKokkos : public RegBlock { RegBlockKokkos(class LAMMPS *, int, char **); ~RegBlockKokkos() override; - KOKKOS_FUNCTION - int match_kokkos(double, double, double) const; + KOKKOS_INLINE_FUNCTION + int match_kokkos(double x, double y, double z) const + { + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); + } - KOKKOS_FUNCTION - int surface_kokkos(double, double, double, double); + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double x, double y, double z, double cutoff) +{ + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior_kokkos(xnear, cutoff); + else + ncontact = surface_exterior_kokkos(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; +} Kokkos::View d_contact; private: @@ -56,6 +100,7 @@ class RegBlockKokkos : public RegBlock { typename AT::t_x_array_randomread x; typename AT::t_int_1d_randomread mask; +/* KOKKOS_INLINE_FUNCTION int k_inside(double, double, double) const; KOKKOS_INLINE_FUNCTION @@ -76,6 +121,319 @@ class RegBlockKokkos : public RegBlock { double find_closest_point(int, double*, double&, double&, double&); KOKKOS_INLINE_FUNCTION double inside_face(double*, int); +*/ + +KOKKOS_INLINE_FUNCTION +int surface_interior_kokkos(double *x, double cutoff) +{ + double delta; + + // x is exterior to block + + if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; + + // x is interior to block or on its surface + + int n = 0; + + delta = x[0] - xlo; + if (delta < cutoff && !open_faces[0]) { + d_contact[n].r = delta; + d_contact[n].delx = delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 0; + n++; + } + delta = xhi - x[0]; + if (delta < cutoff && !open_faces[1]) { + d_contact[n].r = delta; + d_contact[n].delx = -delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 1; + n++; + } + + delta = x[1] - ylo; + if (delta < cutoff && !open_faces[2]) { + d_contact[n].r = delta; + d_contact[n].dely = delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 2; + n++; + } + delta = yhi - x[1]; + if (delta < cutoff && !open_faces[3]) { + d_contact[n].r = delta; + d_contact[n].dely = -delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 3; + n++; + } + + delta = x[2] - zlo; + if (delta < cutoff && !open_faces[4]) { + d_contact[n].r = delta; + d_contact[n].delz = delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 4; + n++; + } + delta = zhi - x[2]; + if (delta < cutoff && !open_faces[5]) { + d_contact[n].r = delta; + d_contact[n].delz = -delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 5; + n++; + } + + return n; +} + +KOKKOS_INLINE_FUNCTION +int surface_exterior_kokkos(double *x, double cutoff) +{ + double xp, yp, zp; + double xc, yc, zc, dist, mindist; + + // x is far enough from block that there is no contact + // x is interior to block + + if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || + x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) + return 0; + if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; + + // x is exterior to block or on its surface + // xp,yp,zp = point on surface of block that x is closest to + // could be edge or corner pt of block + // do not add contact point if r >= cutoff + + if (!openflag) { + if (x[0] < xlo) + xp = xlo; + else if (x[0] > xhi) + xp = xhi; + else + xp = x[0]; + if (x[1] < ylo) + yp = ylo; + else if (x[1] > yhi) + yp = yhi; + else + yp = x[1]; + if (x[2] < zlo) + zp = zlo; + else if (x[2] > zhi) + zp = zhi; + else + zp = x[2]; + } else { + mindist = MAXDOUBLEINT; + for (int i = 0; i < 6; i++) { + if (open_faces[i]) continue; + dist = find_closest_point(i, x, xc, yc, zc); + if (dist < mindist) { + xp = xc; + yp = yc; + zp = zc; + mindist = dist; + } + } + } + + add_contact(0, x, xp, yp, zp); + d_contact[0].iwall = 0; + if (d_contact[0].r < cutoff) return 1; + return 0; +} + +KOKKOS_INLINE_FUNCTION +void add_contact(int n, double *x, double xp, double yp, double zp) +{ + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; +} + +KOKKOS_INLINE_FUNCTION +int k_inside(double x, double y, double z) const +{ + if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) + return 1; + return 0; +} + +KOKKOS_INLINE_FUNCTION +void forward_transform(double &x, double &y, double &z) const +{ + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } +} + +KOKKOS_INLINE_FUNCTION +void inverse_transform(double &x, double &y, double &z) const +{ + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); +} + +KOKKOS_INLINE_FUNCTION +void rotate(double &x, double &y, double &z, double angle) const +{ + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; +} + +KOKKOS_INLINE_FUNCTION +void point_on_line_segment(double *a, double *b, double *c, double *d) +{ + double ba[3], ca[3]; + + sub3(b, a, ba); + sub3(c, a, ca); + double t = dot3(ca, ba) / dot3(ba, ba); + if (t <= 0.0) { + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + } else if (t >= 1.0) { + d[0] = b[0]; + d[1] = b[1]; + d[2] = b[2]; + } else { + d[0] = a[0] + t * ba[0]; + d[1] = a[1] + t * ba[1]; + d[2] = a[2] + t * ba[2]; + } +} + +KOKKOS_INLINE_FUNCTION +double inside_face(double *xproj, int iface) +{ + if (iface < 2) { + if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; + } else if (iface < 4) { + if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) + return 1; + } else { + if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; + } + + return 0; +} + +KOKKOS_INLINE_FUNCTION +double find_closest_point(int i, double *x, double &xc, double &yc, double &zc) +{ + double dot, d2, d2min; + double xr[3], xproj[3], p[3]; + + xr[0] = x[0] - corners[i][0][0]; + xr[1] = x[1] - corners[i][0][1]; + xr[2] = x[2] - corners[i][0][2]; + dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; + xproj[0] = xr[0] - dot * face[i][0]; + xproj[1] = xr[1] - dot * face[i][1]; + xproj[2] = xr[2] - dot * face[i][2]; + + d2min = MAXDOUBLEINT; + + // check if point projects inside of face + + if (inside_face(xproj, i)) { + d2 = d2min = dot * dot; + xc = xproj[0] + corners[i][0][0]; + yc = xproj[1] + corners[i][0][1]; + zc = xproj[2] + corners[i][0][2]; + + // check each edge + + } else { + point_on_line_segment(corners[i][0], corners[i][1], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][1], corners[i][2], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][2], corners[i][3], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][3], corners[i][0], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + } + + return d2min; +} + }; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index f14a0d9ed8..477ca30149 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -43,257 +43,7 @@ RegSphereKokkos::~RegSphereKokkos() memoryKK->destroy_kokkos(d_contact); } -/* ---------------------------------------------------------------------- - determine if point x,y,z is a match to region volume - XOR computes 0 if 2 args are the same, 1 if different - note that k_inside() returns 1 for points on surface of region - thus point on surface of exterior region will not match - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic, apply inverse transform to x,y,z - unmove first, then unrotate, so don't have to change rotation point - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_FUNCTION -int RegSphereKokkos::match_kokkos(double x, double y, double z) const -{ - if (dynamic) inverse_transform(x,y,z); - if (openflag) return 1; - return !(k_inside(x,y,z) ^ interior); -} - -/* ---------------------------------------------------------------------- - generate list of contact points for interior or exterior regions - if region has variable shape, invoke shape_update() once per timestep - if region is dynamic: - before: inverse transform x,y,z (unmove, then unrotate) - after: forward transform contact point xs,yx,zs (rotate, then move), - then reset contact delx,dely,delz based on new contact point - no need to do this if no rotation since delxyz doesn't change - caller is responsible for wrapping this call with - modify->clearstep_compute() and modify->addstep_compute() if needed -------------------------------------------------------------------------- */ - -template -KOKKOS_FUNCTION -int RegSphereKokkos::surface_kokkos(double x, double y, double z, double cutoff) -{ - int ncontact; - double xs, ys, zs; - double xnear[3], xorig[3]; - - if (dynamic) { - xorig[0] = x; xorig[1] = y; xorig[2] = z; - inverse_transform(x, y, z); - } - - xnear[0] = x; xnear[1] = y; xnear[2] = z; - - if (!openflag) { - if (interior) - ncontact = surface_interior_kokkos(xnear, cutoff); - else - ncontact = surface_exterior_kokkos(xnear, cutoff); - } else { - // one of surface_int/ext() will return 0 - // so no need to worry about offset of contact indices - ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); - } - - if (rotateflag && ncontact) { - for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - d_contact[i].delx; - ys = xnear[1] - d_contact[i].dely; - zs = xnear[2] - d_contact[i].delz; - forward_transform(xs, ys, zs); - d_contact[i].delx = xorig[0] - xs; - d_contact[i].dely = xorig[1] - ys; - d_contact[i].delz = xorig[2] - zs; - } - } - - return ncontact; -} - -/* ---------------------------------------------------------------------- - one contact if 0 <= x < cutoff from inner surface of sphere - no contact if outside (possible if called from union/intersect) - delxyz = vector from nearest point on sphere to x - special case: no contact if x is at center of sphere -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::surface_interior_kokkos(double *x, double cutoff) -{ - double delx = x[0] - xc; - double dely = x[1] - yc; - double delz = x[2] - zc; - double r = sqrt(delx * delx + dely * dely + delz * delz); - if (r > radius || r == 0.0) return 0; - - double delta = radius - r; - if (delta < cutoff) { - d_contact[0].r = delta; - d_contact[0].delx = delx * (1.0 - radius / r); - d_contact[0].dely = dely * (1.0 - radius / r); - d_contact[0].delz = delz * (1.0 - radius / r); - d_contact[0].radius = -radius; - d_contact[0].iwall = 0; - d_contact[0].varflag = 1; - return 1; - } - return 0; -} - -/* ---------------------------------------------------------------------- - one contact if 0 <= x < cutoff from outer surface of sphere - no contact if inside (possible if called from union/intersect) - delxyz = vector from nearest point on sphere to x -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::surface_exterior_kokkos(double *x, double cutoff) -{ - double delx = x[0] - xc; - double dely = x[1] - yc; - double delz = x[2] - zc; - double r = sqrt(delx * delx + dely * dely + delz * delz); - if (r < radius) return 0; - - double delta = r - radius; - if (delta < cutoff) { - d_contact[0].r = delta; - d_contact[0].delx = delx * (1.0 - radius / r); - d_contact[0].dely = dely * (1.0 - radius / r); - d_contact[0].delz = delz * (1.0 - radius / r); - d_contact[0].radius = radius; - d_contact[0].iwall = 0; - d_contact[0].varflag = 1; - return 1; - } - return 0; -} - -/* ---------------------------------------------------------------------- - add a single contact at Nth location in contact array - x = particle position - xp,yp,zp = region surface point -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegSphereKokkos::add_contact(int n, double *x, double xp, double yp, double zp) -{ - double delx = x[0] - xp; - double dely = x[1] - yp; - double delz = x[2] - zp; - d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); - d_contact[n].radius = 0; - d_contact[n].delx = delx; - d_contact[n].dely = dely; - d_contact[n].delz = delz; -} - -/* ---------------------------------------------------------------------- - inside = 1 if x,y,z is inside or on surface - inside = 0 if x,y,z is outside and not on surface -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int RegSphereKokkos::k_inside(double x, double y, double z) const -{ - const double delx = x - xc; - const double dely = y - yc; - const double delz = z - zc; - const double r = sqrt(delx * delx + dely * dely + delz * delz); - - if (r <= radius) return 1; - return 0; -} - -/* ---------------------------------------------------------------------- - transform a point x,y,z in region space to moved space - rotate first (around original P), then displace -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegSphereKokkos::forward_transform(double &x, double &y, double &z) const -{ - if (rotateflag) rotate(x, y, z, theta); - if (moveflag) { - x += dx; - y += dy; - z += dz; - } -} - -/* ---------------------------------------------------------------------- - transform a point x,y,z in moved space back to region space - undisplace first, then unrotate (around original P) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegSphereKokkos::inverse_transform(double &x, double &y, double &z) const -{ - if (moveflag) { - x -= dx; - y -= dy; - z -= dz; - } - if (rotateflag) rotate(x,y,z,-theta); -} - -/* ---------------------------------------------------------------------- - rotate x,y,z by angle via right-hand rule around point and runit normal - sign of angle determines whether rotating forward/backward in time - return updated x,y,z - R = vector axis of rotation - P = point = point to rotate around - R0 = runit = unit vector for R - X0 = x,y,z = initial coord of atom - D = X0 - P = vector from P to X0 - C = (D dot R0) R0 = projection of D onto R, i.e. Dparallel - A = D - C = vector from R line to X0, i.e. Dperp - B = R0 cross A = vector perp to A in plane of rotation, same len as A - A,B define plane of circular rotation around R line - new x,y,z = P + C + A cos(angle) + B sin(angle) -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void RegSphereKokkos::rotate(double &x, double &y, double &z, double angle) const -{ - double a[3],b[3],c[3],d[3],disp[3]; - - double sine = sin(angle); - double cosine = cos(angle); - d[0] = x - point[0]; - d[1] = y - point[1]; - d[2] = z - point[2]; - double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; - c[0] = x0dotr * runit[0]; - c[1] = x0dotr * runit[1]; - c[2] = x0dotr * runit[2]; - a[0] = d[0] - c[0]; - a[1] = d[1] - c[1]; - a[2] = d[2] - c[2]; - b[0] = runit[1]*a[2] - runit[2]*a[1]; - b[1] = runit[2]*a[0] - runit[0]*a[2]; - b[2] = runit[0]*a[1] - runit[1]*a[0]; - disp[0] = a[0]*cosine + b[0]*sine; - disp[1] = a[1]*cosine + b[1]*sine; - disp[2] = a[2]*cosine + b[2]*sine; - x = point[0] + c[0] + disp[0]; - y = point[1] + c[1] + disp[1]; - z = point[2] + c[2] + disp[2]; -} +/* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { template class RegSphereKokkos; diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 61a09275f4..e56b1b714e 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -39,31 +39,179 @@ class RegSphereKokkos : public RegSphere { RegSphereKokkos(class LAMMPS *, int, char **); ~RegSphereKokkos() override; - KOKKOS_FUNCTION - int match_kokkos(double, double, double) const; + KOKKOS_INLINE_FUNCTION + int match_kokkos(double x, double y, double z) const + { + if (dynamic) inverse_transform(x,y,z); + if (openflag) return 1; + return !(k_inside(x,y,z) ^ interior); + } - KOKKOS_FUNCTION - int surface_kokkos(double, double, double, double); + KOKKOS_INLINE_FUNCTION + int surface_kokkos(double x, double y, double z, double cutoff) +{ + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; + + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior_kokkos(xnear, cutoff); + else + ncontact = surface_exterior_kokkos(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; +} Kokkos::View d_contact; private: - KOKKOS_INLINE_FUNCTION - int k_inside(double, double, double) const; - KOKKOS_INLINE_FUNCTION - void forward_transform(double &, double &, double &) const; - KOKKOS_INLINE_FUNCTION - void inverse_transform(double &, double &, double &) const; - KOKKOS_INLINE_FUNCTION - void rotate(double &, double &, double &, double) const; - KOKKOS_INLINE_FUNCTION - void add_contact(int, double *, double, double, double); - KOKKOS_INLINE_FUNCTION - int surface_interior_kokkos(double *, double); - KOKKOS_INLINE_FUNCTION - int surface_exterior_kokkos(double *, double); +KOKKOS_INLINE_FUNCTION +int surface_interior_kokkos(double *x, double cutoff) +{ + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r > radius || r == 0.0) return 0; + + double delta = radius - r; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = -radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; +} + +KOKKOS_INLINE_FUNCTION +int surface_exterior_kokkos(double *x, double cutoff) +{ + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r < radius) return 0; + + double delta = r - radius; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; +} +KOKKOS_INLINE_FUNCTION +void add_contact(int n, double *x, double xp, double yp, double zp) +{ + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; +} + +KOKKOS_INLINE_FUNCTION +int k_inside(double x, double y, double z) const +{ + const double delx = x - xc; + const double dely = y - yc; + const double delz = z - zc; + const double r = sqrt(delx * delx + dely * dely + delz * delz); + + if (r <= radius) return 1; + return 0; +} + +KOKKOS_INLINE_FUNCTION +void forward_transform(double &x, double &y, double &z) const +{ + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } +} + +KOKKOS_INLINE_FUNCTION +void inverse_transform(double &x, double &y, double &z) const +{ + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); +} + +KOKKOS_INLINE_FUNCTION +void rotate(double &x, double &y, double &z, double angle) const +{ + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; +} + }; From 147cf7face21e41d231e85d381502fecbd12b0b6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 17:22:35 -0400 Subject: [PATCH 188/294] -diag_suppress (single dash) --- cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index afae18745f..02352230a6 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) From 79c9d69931f9d24891d3ef404efaca7eecba6dda Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 17:26:30 -0400 Subject: [PATCH 189/294] Update CMakeLists.txt --- cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 02352230a6..89087c2ea9 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) From 3b60ae43b3f733909808e6f1a2067a4764d677ce Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 17:44:49 -0400 Subject: [PATCH 190/294] fix nvcc option --- src/MAKE/MACHINES/Makefile.perlmutter_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos index 81164aa040..26df2dc17e 100644 --- a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos +++ b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 57c25702aa..2207b0ff56 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index fd173b5588..720ee5ce2e 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -10,7 +10,7 @@ KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 SHFLAGS = -fPIC # uncomment when compiling with Intel 21.5 or older FMTFLAGS = # -std=c++11 From badb3eae5fc76abb26cb1d13c694025ba3e93a86 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 18:06:24 -0400 Subject: [PATCH 191/294] add tests with region sphere --- ... => fix-timestep-efield_region_block.yaml} | 0 .../fix-timestep-efield_region_sphere.yaml | 82 +++++++++++++++++++ ...> fix-timestep-setforce_region_block.yaml} | 0 .../fix-timestep-setforce_region_sphere.yaml | 78 ++++++++++++++++++ 4 files changed, 160 insertions(+) rename unittest/force-styles/tests/{fix-timestep-efield_region.yaml => fix-timestep-efield_region_block.yaml} (100%) create mode 100644 unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml rename unittest/force-styles/tests/{fix-timestep-setforce_region.yaml => fix-timestep-setforce_region_block.yaml} (100%) create mode 100644 unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml diff --git a/unittest/force-styles/tests/fix-timestep-efield_region.yaml b/unittest/force-styles/tests/fix-timestep-efield_region_block.yaml similarity index 100% rename from unittest/force-styles/tests/fix-timestep-efield_region.yaml rename to unittest/force-styles/tests/fix-timestep-efield_region_block.yaml diff --git a/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml new file mode 100644 index 0000000000..ac3748ae11 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-efield_region_sphere.yaml @@ -0,0 +1,82 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Wed Oct 9 18:02:13 2024 +epsilon: 2e-13 +skip_tests: +prerequisites: ! | + atom full + fix efield +pre_commands: ! "" +post_commands: ! | + region 1 sphere 0 0 0 1 + fix move all nve + fix test solute efield 0.1 0.1 0.1 region 1 + fix_modify test virial yes +input_file: in.fourmol +natoms: 29 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: 0 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-setforce_region.yaml b/unittest/force-styles/tests/fix-timestep-setforce_region_block.yaml similarity index 100% rename from unittest/force-styles/tests/fix-timestep-setforce_region.yaml rename to unittest/force-styles/tests/fix-timestep-setforce_region_block.yaml diff --git a/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml b/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml new file mode 100644 index 0000000000..25990ef478 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-setforce_region_sphere.yaml @@ -0,0 +1,78 @@ +--- +lammps_version: 29 Aug 2024 +date_generated: Wed Oct 9 17:59:56 2024 +epsilon: 5e-12 +skip_tests: +prerequisites: ! | + atom full + fix setforce +pre_commands: ! "" +post_commands: ! | + region 1 sphere 0 0 0 1 + fix move all nve + fix test solute setforce 0.0 0.0 0.0 region 1 +input_file: in.fourmol +natoms: 29 +global_vector: ! |- + 3 0 0 0 +run_pos: ! |2 + 1 -2.7045559775384032e-01 2.4912159905679729e+00 -1.6695851791541888e-01 + 2 3.1004029573899528e-01 2.9612354631094391e+00 -8.5466363037021464e-01 + 3 -7.0398551400789477e-01 1.2305509955830618e+00 -6.2777526944456274e-01 + 4 -1.5818159336499285e+00 1.4837407818929933e+00 -1.2538710836062004e+00 + 5 -9.0719763672789266e-01 9.2652103885675297e-01 3.9954210488374786e-01 + 6 2.4831720524855985e-01 2.8313021497871271e-01 -1.2314233331711453e+00 + 7 3.4143527641386412e-01 -2.2646551041391422e-02 -2.5292291414903052e+00 + 8 1.1743552229100009e+00 -4.8863228565853950e-01 -6.3783432910825522e-01 + 9 1.3800524229500313e+00 -2.5274721030406683e-01 2.8353985887095157e-01 + 10 2.0510765220543883e+00 -1.4604063740302866e+00 -9.8323745081712954e-01 + 11 1.7878031944442556e+00 -1.9921863272948861e+00 -1.8890602447625777e+00 + 12 3.0063007039340053e+00 -4.9013350496963293e-01 -1.6231898107386229e+00 + 13 4.0515402959192999e+00 -8.9202011606653986e-01 -1.6400005529924957e+00 + 14 2.6066963345543819e+00 -4.1789253965514156e-01 -2.6634003608794394e+00 + 15 2.9695287185712913e+00 5.5422613165234036e-01 -1.2342022021790127e+00 + 16 2.6747029695228521e+00 -2.4124119054564295e+00 -2.3435746150616148e-02 + 17 2.2153577785283796e+00 -2.0897985186907717e+00 1.1963150794479436e+00 + 18 2.1369701704115704e+00 3.0158507413630606e+00 -3.5179348337215015e+00 + 19 1.5355837136087378e+00 2.6255292355375675e+00 -4.2353987779879052e+00 + 20 2.7727573005678776e+00 3.6923910449610169e+00 -3.9330842459133493e+00 + 21 4.9040128073204299e+00 -4.0752348172957946e+00 -3.6210314709891711e+00 + 22 4.3582355554440841e+00 -4.2126119427287048e+00 -4.4612844196314052e+00 + 23 5.7439382849307599e+00 -3.5821957939275029e+00 -3.8766361295935821e+00 + 24 2.0689243582422630e+00 3.1513346907271012e+00 3.1550389754828800e+00 + 25 1.3045351331492134e+00 3.2665125705842848e+00 2.5111855257433504e+00 + 26 2.5809237402711274e+00 4.0117602605482832e+00 3.2212060529089896e+00 + 27 -1.9611343130357228e+00 -4.3563411931359752e+00 2.1098293115523705e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433954e+00 + 29 -1.3126000191359855e+00 -3.5962518039482929e+00 2.2746342468737835e+00 +run_vel: ! |2 + 1 8.1705744183262364e-03 1.6516406176274284e-02 4.7902264318912908e-03 + 2 5.4501493445687802e-03 5.1791699408496447e-03 -1.4372931530376594e-03 + 3 -8.2298292722385660e-03 -1.2926551614621379e-02 -4.0984181178163794e-03 + 4 -3.7699042590093549e-03 -6.5722892098813894e-03 -1.1184640360133316e-03 + 5 -1.1021961004346582e-02 -9.8906780939336109e-03 -2.8410737829284421e-03 + 6 -3.9676663166400034e-02 4.6817061464710256e-02 3.7148491979476124e-02 + 7 9.1033953013898601e-04 -1.0128524411938794e-02 -5.1568251805019748e-02 + 8 7.9064712058855690e-03 -3.3507254552631780e-03 3.4557098492564636e-02 + 9 1.5644176117320919e-03 3.7365546102722177e-03 1.5047408822037646e-02 + 10 2.9201446820573178e-02 -2.9249578745486140e-02 -1.5018077424322538e-02 + 11 -4.7835961513517560e-03 -3.7481385134185202e-03 -2.3464104142290089e-03 + 12 2.2696451841920581e-03 -3.4774154398129452e-04 -3.0640770327796884e-03 + 13 2.7531740451953108e-03 5.8171061612840597e-03 -7.9467454022159748e-04 + 14 3.5246182371994170e-03 -5.7939995585585503e-03 -3.9478431172751327e-03 + 15 -1.8547943640122978e-03 -5.8554729942777769e-03 6.2938485140538701e-03 + 16 1.8681499973445235e-02 -1.3262466204585334e-02 -4.5638651457003243e-02 + 17 -1.2896269981100382e-02 9.7527665265956451e-03 3.7296535360836762e-02 + 18 -8.0065794848261610e-04 -8.6270473212554308e-04 -1.4483040697508777e-03 + 19 1.2452390836182583e-03 -2.5061097118772749e-03 7.2998631009713062e-03 + 20 3.5930060229597072e-03 3.6938860309252974e-03 3.2322732687893115e-03 + 21 -1.4689220370766539e-03 -2.7352129761527648e-04 7.0581624215243120e-04 + 22 -7.0694199254630382e-03 -4.2577148924878598e-03 2.8079117614252034e-04 + 23 6.0446963117374939e-03 -1.4000131614795382e-03 2.5819754847014320e-03 + 24 3.1926367902287864e-04 -9.9445664749276113e-04 1.4999996959365281e-04 + 25 1.3789754514814445e-04 -4.4335894884532700e-03 -8.1808136725080140e-04 + 26 2.0485904035217606e-03 2.7813358633835958e-03 4.3245727149206761e-03 + 27 4.5604120293369819e-04 -1.0305523026921102e-03 2.1188058381358391e-04 + 28 -6.2544520861855151e-03 1.4127711176146864e-03 -1.8429821884794260e-03 + 29 6.4110631534402261e-04 3.1273432719593807e-03 3.7253671105656745e-03 +... From 315ac762cc6fd2afb2750051b00e9650282332c9 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 18:32:32 -0400 Subject: [PATCH 192/294] Update fix_setforce_kokkos.cpp --- src/KOKKOS/fix_setforce_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index e8f376643f..8c69c33fcf 100644 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -84,7 +84,7 @@ void FixSetForceKokkos::post_force(int /*vflag*/) // update region if necessary if (region) { - if (!utils::strmatch(region->style, "^block")) + if (!(utils::strmatch(region->style, "^block") || utils::strmatch(region->style, "^sphere"))) error->all(FLERR,"Cannot (yet) use {}-style region with fix setforce/kk",region->style); region->prematch(); DAT::tdual_int_1d k_match = DAT::tdual_int_1d("setforce:k_match",nlocal); From bc2267c9c88541cf49feecb8a6000ebfcee5c7d7 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 18:32:38 -0400 Subject: [PATCH 193/294] Update fix_efield_kokkos.cpp --- src/KOKKOS/fix_efield_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index b08542fd17..45af29e36d 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -106,7 +106,7 @@ void FixEfieldKokkos::post_force(int vflag) // update region if necessary if (region) { - if (!utils::strmatch(region->style, "^block")) + if (!(utils::strmatch(region->style, "^block") || utils::strmatch(region->style, "^sphere"))) error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style); region->prematch(); DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); From a5b76410e1b545c4eba5f5fd24e4d17bc32e3c62 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 18:32:54 -0400 Subject: [PATCH 194/294] cleanup --- src/KOKKOS/fix_wall_region_kokkos.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 8d0460aa31..2eea884472 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -33,10 +33,6 @@ using namespace MathSpecialKokkos; enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE }; -// make sure surface_kokkos() functions get resolved by linker -//template class RegBlockKokkos; -//template class RegSphereKokkos; - /* ---------------------------------------------------------------------- */ template From a8c9b57b627532fc37e85ff552b3c4e7b0506f7e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 9 Oct 2024 18:34:17 -0400 Subject: [PATCH 195/294] partial revert of refactoring that broke efield and setforce --- src/KOKKOS/region_block_kokkos.cpp | 32 +++++++++++++++++++++++++++++ src/KOKKOS/region_block_kokkos.h | 32 ++++++++--------------------- src/KOKKOS/region_sphere_kokkos.cpp | 31 ++++++++++++++++++++++++++++ src/KOKKOS/region_sphere_kokkos.h | 7 +++++-- 4 files changed, 76 insertions(+), 26 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 9ba607d4fe..dcf6bd66e7 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -42,6 +42,38 @@ RegBlockKokkos::~RegBlockKokkos() /* ---------------------------------------------------------------------- */ +template +void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) +{ + groupbit = groupbit_in; + d_match = k_match_in.template view(); + + auto execution_space = ExecutionSpaceFromDevice::space; + atomKK->sync(execution_space, X_MASK | MASK_MASK); + x = atomKK->k_x.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; + k_match_in.template modify(); +} + +/* ---------------------------------------------------------------------- */ + + +template +KOKKOS_INLINE_FUNCTION +void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) const { + if (mask[i] & groupbit) { + double x_tmp = x(i,0); + double y_tmp = x(i,1); + double z_tmp = x(i,2); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); + } +} + namespace LAMMPS_NS { template class RegBlockKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 117326492f..60fe8c056b 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -24,6 +24,8 @@ RegionStyle(block/kk/host,RegBlockKokkos); #define LMP_REGION_BLOCK_KOKKOS_H #include "region_block.h" + +#include "kokkos_base.h" #include "kokkos_type.h" #include "math_special_kokkos.h" @@ -34,7 +36,7 @@ using namespace MathSpecialKokkos; struct TagRegBlockMatchAll{}; template -class RegBlockKokkos : public RegBlock { +class RegBlockKokkos : public RegBlock, public KokkosBase { friend class FixPour; public: @@ -44,6 +46,11 @@ class RegBlockKokkos : public RegBlock { RegBlockKokkos(class LAMMPS *, int, char **); ~RegBlockKokkos() override; + void match_all_kokkos(int, DAT::tdual_int_1d) override; + + KOKKOS_INLINE_FUNCTION + void operator()(TagRegBlockMatchAll, const int&) const; + KOKKOS_INLINE_FUNCTION int match_kokkos(double x, double y, double z) const { @@ -100,29 +107,6 @@ class RegBlockKokkos : public RegBlock { typename AT::t_x_array_randomread x; typename AT::t_int_1d_randomread mask; -/* - KOKKOS_INLINE_FUNCTION - int k_inside(double, double, double) const; - KOKKOS_INLINE_FUNCTION - void forward_transform(double&, double&, double&) const; - KOKKOS_INLINE_FUNCTION - void inverse_transform(double&, double&, double&) const; - KOKKOS_INLINE_FUNCTION - void rotate(double&, double&, double&, double) const; - KOKKOS_INLINE_FUNCTION - void add_contact(int, double*, double, double, double); - KOKKOS_INLINE_FUNCTION - int surface_interior_kokkos(double*, double); - KOKKOS_INLINE_FUNCTION - int surface_exterior_kokkos(double*, double); - KOKKOS_INLINE_FUNCTION - void point_on_line_segment(double*, double*, double*, double*); - KOKKOS_INLINE_FUNCTION - double find_closest_point(int, double*, double&, double&, double&); - KOKKOS_INLINE_FUNCTION - double inside_face(double*, int); -*/ - KOKKOS_INLINE_FUNCTION int surface_interior_kokkos(double *x, double cutoff) { diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index 477ca30149..a7167a0a10 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -45,6 +45,37 @@ RegSphereKokkos::~RegSphereKokkos() /* ---------------------------------------------------------------------- */ +template +void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) +{ + auto execution_space = ExecutionSpaceFromDevice::space; + atomKK->sync(execution_space, X_MASK | MASK_MASK); + + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_match = k_match_in.template view(); + auto l_groupbit = groupbit_in; + + copymode = 1; + + // capture lambda reference to KOKKOS_INLINE_FUNCTION match() + // use KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA + // https://github.com/kokkos/kokkos/issues/695 + + Kokkos::parallel_for(atom->nlocal, KOKKOS_CLASS_LAMBDA( const int &i ) { + if (d_mask[i] & l_groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); + }}); + + copymode = 0; + k_match_in.template modify(); +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class RegSphereKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index e56b1b714e..04f85d7e81 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -24,12 +24,14 @@ RegionStyle(sphere/kk/host,RegSphereKokkos); #define LMP_REGION_SPHERE_KOKKOS_H #include "region_sphere.h" + +#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { template -class RegSphereKokkos : public RegSphere { +class RegSphereKokkos : public RegSphere, public KokkosBase { friend class FixPour; public: @@ -39,6 +41,8 @@ class RegSphereKokkos : public RegSphere { RegSphereKokkos(class LAMMPS *, int, char **); ~RegSphereKokkos() override; + void match_all_kokkos(int, DAT::tdual_int_1d) override; + KOKKOS_INLINE_FUNCTION int match_kokkos(double x, double y, double z) const { @@ -91,7 +95,6 @@ class RegSphereKokkos : public RegSphere { private: - KOKKOS_INLINE_FUNCTION int surface_interior_kokkos(double *x, double cutoff) { From 422ced4d822c820ef6316281dc5e15f56b0a099b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 9 Oct 2024 20:55:00 -0400 Subject: [PATCH 196/294] silence compiler warnings --- src/KOKKOS/fix_cmap_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 0cc88960e8..7bfc23e704 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -688,7 +688,7 @@ int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) template int FixCMAPKokkos::pack_exchange_kokkos( const int &nsend, DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d /*k_copylist*/, ExecutionSpace space) { @@ -749,7 +749,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( template void FixCMAPKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, - int nrecv1, int nextrarecv1, + int /*nrecv1*/, int /*nextrarecv1*/, ExecutionSpace /*space*/) { k_buf.template sync(); From bbafad145b9e3aa3ff8ec704aafd808b7c6fa7c5 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 10 Oct 2024 12:23:09 -0400 Subject: [PATCH 197/294] fixed identation --- src/KOKKOS/region_sphere_kokkos.h | 275 +++++++++++++++--------------- 1 file changed, 137 insertions(+), 138 deletions(-) diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index 04f85d7e81..bc9038be78 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -53,168 +53,167 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { KOKKOS_INLINE_FUNCTION int surface_kokkos(double x, double y, double z, double cutoff) -{ - int ncontact; - double xs, ys, zs; - double xnear[3], xorig[3]; + { + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; - if (dynamic) { - xorig[0] = x; xorig[1] = y; xorig[2] = z; - inverse_transform(x, y, z); - } + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); + } - xnear[0] = x; xnear[1] = y; xnear[2] = z; + xnear[0] = x; xnear[1] = y; xnear[2] = z; - if (!openflag) { - if (interior) - ncontact = surface_interior_kokkos(xnear, cutoff); + if (!openflag) { + if (interior) ncontact = surface_interior_kokkos(xnear, cutoff); else ncontact = surface_exterior_kokkos(xnear, cutoff); - } else { - // one of surface_int/ext() will return 0 - // so no need to worry about offset of contact indices - ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); - } - - if (rotateflag && ncontact) { - for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - d_contact[i].delx; - ys = xnear[1] - d_contact[i].dely; - zs = xnear[2] - d_contact[i].delz; - forward_transform(xs, ys, zs); - d_contact[i].delx = xorig[0] - xs; - d_contact[i].dely = xorig[1] - ys; - d_contact[i].delz = xorig[2] - zs; + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); } - } - return ncontact; -} + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; + } Kokkos::View d_contact; private: -KOKKOS_INLINE_FUNCTION -int surface_interior_kokkos(double *x, double cutoff) -{ - double delx = x[0] - xc; - double dely = x[1] - yc; - double delz = x[2] - zc; - double r = sqrt(delx * delx + dely * dely + delz * delz); - if (r > radius || r == 0.0) return 0; + KOKKOS_INLINE_FUNCTION + int surface_interior_kokkos(double *x, double cutoff) + { + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r > radius || r == 0.0) return 0; - double delta = radius - r; - if (delta < cutoff) { - d_contact[0].r = delta; - d_contact[0].delx = delx * (1.0 - radius / r); - d_contact[0].dely = dely * (1.0 - radius / r); - d_contact[0].delz = delz * (1.0 - radius / r); - d_contact[0].radius = -radius; - d_contact[0].iwall = 0; - d_contact[0].varflag = 1; - return 1; + double delta = radius - r; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = -radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; } - return 0; -} -KOKKOS_INLINE_FUNCTION -int surface_exterior_kokkos(double *x, double cutoff) -{ - double delx = x[0] - xc; - double dely = x[1] - yc; - double delz = x[2] - zc; - double r = sqrt(delx * delx + dely * dely + delz * delz); - if (r < radius) return 0; + KOKKOS_INLINE_FUNCTION + int surface_exterior_kokkos(double *x, double cutoff) + { + double delx = x[0] - xc; + double dely = x[1] - yc; + double delz = x[2] - zc; + double r = sqrt(delx * delx + dely * dely + delz * delz); + if (r < radius) return 0; - double delta = r - radius; - if (delta < cutoff) { - d_contact[0].r = delta; - d_contact[0].delx = delx * (1.0 - radius / r); - d_contact[0].dely = dely * (1.0 - radius / r); - d_contact[0].delz = delz * (1.0 - radius / r); - d_contact[0].radius = radius; - d_contact[0].iwall = 0; - d_contact[0].varflag = 1; - return 1; + double delta = r - radius; + if (delta < cutoff) { + d_contact[0].r = delta; + d_contact[0].delx = delx * (1.0 - radius / r); + d_contact[0].dely = dely * (1.0 - radius / r); + d_contact[0].delz = delz * (1.0 - radius / r); + d_contact[0].radius = radius; + d_contact[0].iwall = 0; + d_contact[0].varflag = 1; + return 1; + } + return 0; } - return 0; -} -KOKKOS_INLINE_FUNCTION -void add_contact(int n, double *x, double xp, double yp, double zp) -{ - double delx = x[0] - xp; - double dely = x[1] - yp; - double delz = x[2] - zp; - d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); - d_contact[n].radius = 0; - d_contact[n].delx = delx; - d_contact[n].dely = dely; - d_contact[n].delz = delz; -} -KOKKOS_INLINE_FUNCTION -int k_inside(double x, double y, double z) const -{ - const double delx = x - xc; - const double dely = y - yc; - const double delz = z - zc; - const double r = sqrt(delx * delx + dely * dely + delz * delz); - - if (r <= radius) return 1; - return 0; -} - -KOKKOS_INLINE_FUNCTION -void forward_transform(double &x, double &y, double &z) const -{ - if (rotateflag) rotate(x, y, z, theta); - if (moveflag) { - x += dx; - y += dy; - z += dz; + KOKKOS_INLINE_FUNCTION + void add_contact(int n, double *x, double xp, double yp, double zp) + { + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; } -} -KOKKOS_INLINE_FUNCTION -void inverse_transform(double &x, double &y, double &z) const -{ - if (moveflag) { - x -= dx; - y -= dy; - z -= dz; + KOKKOS_INLINE_FUNCTION + int k_inside(double x, double y, double z) const + { + const double delx = x - xc; + const double dely = y - yc; + const double delz = z - zc; + const double r = sqrt(delx * delx + dely * dely + delz * delz); + + if (r <= radius) return 1; + return 0; } - if (rotateflag) rotate(x,y,z,-theta); -} -KOKKOS_INLINE_FUNCTION -void rotate(double &x, double &y, double &z, double angle) const -{ - double a[3],b[3],c[3],d[3],disp[3]; + KOKKOS_INLINE_FUNCTION + void forward_transform(double &x, double &y, double &z) const + { + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; + } + } - double sine = sin(angle); - double cosine = cos(angle); - d[0] = x - point[0]; - d[1] = y - point[1]; - d[2] = z - point[2]; - double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; - c[0] = x0dotr * runit[0]; - c[1] = x0dotr * runit[1]; - c[2] = x0dotr * runit[2]; - a[0] = d[0] - c[0]; - a[1] = d[1] - c[1]; - a[2] = d[2] - c[2]; - b[0] = runit[1]*a[2] - runit[2]*a[1]; - b[1] = runit[2]*a[0] - runit[0]*a[2]; - b[2] = runit[0]*a[1] - runit[1]*a[0]; - disp[0] = a[0]*cosine + b[0]*sine; - disp[1] = a[1]*cosine + b[1]*sine; - disp[2] = a[2]*cosine + b[2]*sine; - x = point[0] + c[0] + disp[0]; - y = point[1] + c[1] + disp[1]; - z = point[2] + c[2] + disp[2]; -} + KOKKOS_INLINE_FUNCTION + void inverse_transform(double &x, double &y, double &z) const + { + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); + } + KOKKOS_INLINE_FUNCTION + void rotate(double &x, double &y, double &z, double angle) const + { + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; + } }; From 5c07882a187ede4f1145a818c4744c0735600d8e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sun, 20 Oct 2024 18:30:19 -0400 Subject: [PATCH 198/294] fix warning(s) --- src/KOKKOS/region_block_kokkos.cpp | 14 +++++------ src/KOKKOS/region_block_kokkos.h | 5 ++-- src/KOKKOS/region_sphere_kokkos.cpp | 38 +++++++++++++++-------------- src/KOKKOS/region_sphere_kokkos.h | 9 +++++++ 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index dcf6bd66e7..05ac8eea2a 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -47,11 +47,10 @@ void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_in { groupbit = groupbit_in; d_match = k_match_in.template view(); - auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); - x = atomKK->k_x.view(); - mask = atomKK->k_mask.view(); + d_x = atomKK->k_x.view(); + d_mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; copymode = 1; @@ -62,14 +61,13 @@ void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_in /* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void RegBlockKokkos::operator()(TagRegBlockMatchAll, const int &i) const { - if (mask[i] & groupbit) { - double x_tmp = x(i,0); - double y_tmp = x(i,1); - double z_tmp = x(i,2); + if (d_mask[i] & groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); } } diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 60fe8c056b..d0dcc12b88 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -103,9 +103,8 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { private: int groupbit; typename AT::t_int_1d d_match; - - typename AT::t_x_array_randomread x; - typename AT::t_int_1d_randomread mask; + typename AT::t_x_array_randomread d_x; + typename AT::t_int_1d_randomread d_mask; KOKKOS_INLINE_FUNCTION int surface_interior_kokkos(double *x, double cutoff) diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index a7167a0a10..b9a305d9fb 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -48,32 +48,34 @@ RegSphereKokkos::~RegSphereKokkos() template void RegSphereKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_int_1d k_match_in) { + groupbit = groupbit_in; + d_match = k_match_in.template view(); auto execution_space = ExecutionSpaceFromDevice::space; atomKK->sync(execution_space, X_MASK | MASK_MASK); - - auto d_x = atomKK->k_x.template view(); - auto d_mask = atomKK->k_mask.template view(); - auto d_match = k_match_in.template view(); - auto l_groupbit = groupbit_in; + d_x = atomKK->k_x.view(); + d_mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; copymode = 1; - - // capture lambda reference to KOKKOS_INLINE_FUNCTION match() - // use KOKKOS_CLASS_LAMBDA instead of KOKKOS_LAMBDA - // https://github.com/kokkos/kokkos/issues/695 - - Kokkos::parallel_for(atom->nlocal, KOKKOS_CLASS_LAMBDA( const int &i ) { - if (d_mask[i] & l_groupbit) { - double x_tmp = d_x(i,0); - double y_tmp = d_x(i,1); - double z_tmp = d_x(i,2); - d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); - }}); - + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; k_match_in.template modify(); } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void RegSphereKokkos::operator()(TagRegSphereMatchAll, const int &i) const { + if (d_mask[i] & groupbit) { + double x_tmp = d_x(i,0); + double y_tmp = d_x(i,1); + double z_tmp = d_x(i,2); + d_match[i] = match_kokkos(x_tmp,y_tmp,z_tmp); + } +} + + /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { diff --git a/src/KOKKOS/region_sphere_kokkos.h b/src/KOKKOS/region_sphere_kokkos.h index bc9038be78..08951138c3 100644 --- a/src/KOKKOS/region_sphere_kokkos.h +++ b/src/KOKKOS/region_sphere_kokkos.h @@ -30,6 +30,8 @@ RegionStyle(sphere/kk/host,RegSphereKokkos); namespace LAMMPS_NS { +struct TagRegSphereMatchAll{}; + template class RegSphereKokkos : public RegSphere, public KokkosBase { friend class FixPour; @@ -43,6 +45,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { void match_all_kokkos(int, DAT::tdual_int_1d) override; + KOKKOS_INLINE_FUNCTION + void operator()(TagRegSphereMatchAll, const int&) const; + KOKKOS_INLINE_FUNCTION int match_kokkos(double x, double y, double z) const { @@ -93,6 +98,10 @@ class RegSphereKokkos : public RegSphere, public KokkosBase { Kokkos::View d_contact; private: + int groupbit; + typename AT::t_int_1d d_match; + typename AT::t_x_array_randomread d_x; + typename AT::t_int_1d_randomread d_mask; KOKKOS_INLINE_FUNCTION int surface_interior_kokkos(double *x, double cutoff) From 5bdd616bcd71f2a1618c003b1647569a9650c6d9 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 22 Oct 2024 12:42:49 -0400 Subject: [PATCH 199/294] cuda bugfix --- src/KOKKOS/group_kokkos.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index 3a3a7b8288..01cf15f6c5 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -88,17 +88,16 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) auto d_mask = atomKK->k_mask.template view(); auto d_type = atomKK->k_type.template view(); auto d_image = atomKK->k_image.template view(); - auto d_mass = atomKK->k_mass.template view(); - auto d_rmass = atomKK->k_rmass.template view(); - - auto l_prd = domain->prd; - auto l_h = domain->h; + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; double cmone[3]; if (atomKK->rmass) { + auto d_rmass = atomKK->k_rmass.template view(); + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { double massone = d_rmass(i); @@ -115,6 +114,8 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) } else { + auto d_mass = atomKK->k_mass.template view(); + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { double massone = d_mass(d_type(i)); From 22a15c7cf87a2ae55a1fe49991162339d444c193 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 22 Oct 2024 14:15:02 -0400 Subject: [PATCH 200/294] cuda bugfix again --- src/KOKKOS/fix_cmap_kokkos.cpp | 257 ++++++++++++++------------------- src/KOKKOS/fix_cmap_kokkos.h | 3 + 2 files changed, 114 insertions(+), 146 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 7bfc23e704..e597c34334 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -234,6 +234,7 @@ void FixCMAPKokkos::post_force(int vflag) ev_init(eflag,vflag); copymode = 1; + nlocal = atomKK->nlocal; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,ncrosstermlist),*this,ecmap); copymode = 0; atomKK->modified(execution_space,F_MASK); @@ -245,27 +246,6 @@ template KOKKOS_INLINE_FUNCTION void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, double &ecmapKK) const { - - int i1,i2,i3,i4,i5,type; - int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; - - // vectors needed to calculate the cross-term dihedral angles - double vb21x,vb21y,vb21z,vb32x,vb32y,vb32z,vb34x,vb34y,vb34z; - double vb23x,vb23y,vb23z; - double vb43x,vb43y,vb43z,vb45x,vb45y,vb45z,a1x,a1y,a1z,b1x,b1y,b1z; - double a2x,a2y,a2z,b2x,b2y,b2z,r32,a1sq,b1sq,a2sq,b2sq,dpr21r32,dpr34r32; - double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z; - // cross-term dihedral angles - double phi,psi,phi1,psi1; - double f1[3],f2[3],f3[3],f4[3],f5[3]; - double gs[4],d1gs[4],d2gs[4],d12gs[4]; - - // vectors needed for the gradient/force calculation - double dphidr1x,dphidr1y,dphidr1z,dphidr2x,dphidr2y,dphidr2z; - double dphidr3x,dphidr3y,dphidr3z,dphidr4x,dphidr4y,dphidr4z; - double dpsidr1x,dpsidr1y,dpsidr1z,dpsidr2x,dpsidr2y,dpsidr2z; - double dpsidr3x,dpsidr3y,dpsidr3z,dpsidr4x,dpsidr4y,dpsidr4z; - // Definition of cross-term dihedrals // phi dihedral @@ -275,14 +255,12 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou // |--------------------| // psi dihedral - int nlocal = atomKK->nlocal; - - i1 = d_crosstermlist(n,0); - i2 = d_crosstermlist(n,1); - i3 = d_crosstermlist(n,2); - i4 = d_crosstermlist(n,3); - i5 = d_crosstermlist(n,4); - type = d_crosstermlist(n,5); + int i1 = d_crosstermlist(n,0); + int i2 = d_crosstermlist(n,1); + int i3 = d_crosstermlist(n,2); + int i4 = d_crosstermlist(n,3); + int i5 = d_crosstermlist(n,4); + int type = d_crosstermlist(n,5); if (type == 0) return; // calculate bond vectors for both dihedrals @@ -290,99 +268,103 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou // phi // vb21 = r2 - r1 - vb21x = d_x(i2,0) - d_x(i1,0); - vb21y = d_x(i2,1) - d_x(i1,1); - vb21z = d_x(i2,2) - d_x(i1,2); - vb12x = -1.0*vb21x; - vb12y = -1.0*vb21y; - vb12z = -1.0*vb21z; - vb32x = d_x(i3,0) - d_x(i2,0); - vb32y = d_x(i3,1) - d_x(i2,1); - vb32z = d_x(i3,2) - d_x(i2,2); - vb23x = -1.0*vb32x; - vb23y = -1.0*vb32y; - vb23z = -1.0*vb32z; + double vb21x = d_x(i2,0) - d_x(i1,0); + double vb21y = d_x(i2,1) - d_x(i1,1); + double vb21z = d_x(i2,2) - d_x(i1,2); + double vb12x = -1.0*vb21x; + double vb12y = -1.0*vb21y; + double vb12z = -1.0*vb21z; + double vb32x = d_x(i3,0) - d_x(i2,0); + double vb32y = d_x(i3,1) - d_x(i2,1); + double vb32z = d_x(i3,2) - d_x(i2,2); + double vb23x = -1.0*vb32x; + double vb23y = -1.0*vb32y; + double vb23z = -1.0*vb32z; - vb34x = d_x(i3,0) - d_x(i4,0); - vb34y = d_x(i3,1) - d_x(i4,1); - vb34z = d_x(i3,2) - d_x(i4,2); + double vb34x = d_x(i3,0) - d_x(i4,0); + double vb34y = d_x(i3,1) - d_x(i4,1); + double vb34z = d_x(i3,2) - d_x(i4,2); // psi // bond vectors same as for phi: vb32 - vb43x = -1.0*vb34x; - vb43y = -1.0*vb34y; - vb43z = -1.0*vb34z; + double vb43x = -1.0*vb34x; + double vb43y = -1.0*vb34y; + double vb43z = -1.0*vb34z; - vb45x = d_x(i4,0) - d_x(i5,0); - vb45y = d_x(i4,1) - d_x(i5,1); - vb45z = d_x(i4,2) - d_x(i5,2); + double vb45x = d_x(i4,0) - d_x(i5,0); + double vb45y = d_x(i4,1) - d_x(i5,1); + double vb45z = d_x(i4,2) - d_x(i5,2); // calculate normal vectors for planes that define the dihedral angles - a1x = vb12y*vb23z - vb12z*vb23y; - a1y = vb12z*vb23x - vb12x*vb23z; - a1z = vb12x*vb23y - vb12y*vb23x; + double a1x = vb12y*vb23z - vb12z*vb23y; + double a1y = vb12z*vb23x - vb12x*vb23z; + double a1z = vb12x*vb23y - vb12y*vb23x; - b1x = vb43y*vb23z - vb43z*vb23y; - b1y = vb43z*vb23x - vb43x*vb23z; - b1z = vb43x*vb23y - vb43y*vb23x; + double b1x = vb43y*vb23z - vb43z*vb23y; + double b1y = vb43z*vb23x - vb43x*vb23z; + double b1z = vb43x*vb23y - vb43y*vb23x; - a2x = vb23y*vb34z - vb23z*vb34y; - a2y = vb23z*vb34x - vb23x*vb34z; - a2z = vb23x*vb34y - vb23y*vb34x; + double a2x = vb23y*vb34z - vb23z*vb34y; + double a2y = vb23z*vb34x - vb23x*vb34z; + double a2z = vb23x*vb34y - vb23y*vb34x; - b2x = vb45y*vb43z - vb45z*vb43y; - b2y = vb45z*vb43x - vb45x*vb43z; - b2z = vb45x*vb43y - vb45y*vb43x; + double b2x = vb45y*vb43z - vb45z*vb43y; + double b2y = vb45z*vb43x - vb45x*vb43z; + double b2z = vb45x*vb43y - vb45y*vb43x; // calculate terms used later in calculations - r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); - a1sq = a1x*a1x + a1y*a1y + a1z*a1z; - b1sq = b1x*b1x + b1y*b1y + b1z*b1z; + double r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); + double a1sq = a1x*a1x + a1y*a1y + a1z*a1z; + double b1sq = b1x*b1x + b1y*b1y + b1z*b1z; - r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); - a2sq = a2x*a2x + a2y*a2y + a2z*a2z; - b2sq = b2x*b2x + b2y*b2y + b2z*b2z; + double r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); + double a2sq = a2x*a2x + a2y*a2y + a2z*a2z; + double b2sq = b2x*b2x + b2y*b2y + b2z*b2z; if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; - dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; - dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; - dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; - dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; + // vectors needed to calculate the cross-term dihedral angles + double dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; + double dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; + double dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; + double dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; + + // cross-term dihedral angles // calculate the backbone dihedral angles as VMD and GROMACS - phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); - psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); + double phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); + double psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; - phi1 = phi; + double phi1 = phi; if (phi1 < 0.0) phi1 += 360.0; - psi1 = psi; + double psi1 = psi; if (psi1 < 0.0) psi1 += 360.0; // find the neighbor grid point index - li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); - li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); - - li3 = int((phi-CMAPXMIN2)/CMAPDX); - li4 = int((psi-CMAPXMIN2)/CMAPDX); - mli3 = li3 % CMAPDIM; - mli4 = li4 % CMAPDIM; - mli31 = (li3+1) % CMAPDIM; - mli41 = (li4+1) %CMAPDIM; - mli1 = li1 % CMAPDIM; - mli2 = li2 % CMAPDIM; - mli11 = (li1+1) % CMAPDIM; - mli21 = (li2+1) %CMAPDIM; - t1 = type-1; + int li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + int li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); + int li3 = int((phi-CMAPXMIN2)/CMAPDX); + int li4 = int((psi-CMAPXMIN2)/CMAPDX); + int mli3 = li3 % CMAPDIM; + int mli4 = li4 % CMAPDIM; + int mli31 = (li3+1) % CMAPDIM; + int mli41 = (li4+1) %CMAPDIM; + int mli1 = li1 % CMAPDIM; + int mli2 = li2 % CMAPDIM; + int mli11 = (li1+1) % CMAPDIM; + int mli21 = (li2+1) %CMAPDIM; + int t1 = type-1; if (t1 < 0 || t1 > 5) Kokkos::abort("Invalid CMAP crossterm_type"); // determine the values and derivatives for the grid square points + double gs[4],d1gs[4],d2gs[4],d12gs[4]; + gs[0] = d_cmapgrid(t1,mli3,mli4); gs[1] = d_cmapgrid(t1,mli31,mli4); gs[2] = d_cmapgrid(t1,mli31,mli41); @@ -417,84 +399,67 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou // calculate the derivatives dphi/dr_i - dphidr1x = 1.0*r32/a1sq*a1x; - dphidr1y = 1.0*r32/a1sq*a1y; - dphidr1z = 1.0*r32/a1sq*a1z; + double dphidr1x = 1.0*r32/a1sq*a1x; + double dphidr1y = 1.0*r32/a1sq*a1y; + double dphidr1z = 1.0*r32/a1sq*a1z; - dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + dpr34r32/b1sq/r32*b1x; - dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + dpr34r32/b1sq/r32*b1y; - dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + dpr34r32/b1sq/r32*b1z; + double dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + dpr34r32/b1sq/r32*b1x; + double dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + dpr34r32/b1sq/r32*b1y; + double dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + dpr34r32/b1sq/r32*b1z; - dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; - dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; - dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; + double dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; + double dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; + double dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; - dphidr4x = r32/b1sq*b1x; - dphidr4y = r32/b1sq*b1y; - dphidr4z = r32/b1sq*b1z; + double dphidr4x = r32/b1sq*b1x; + double dphidr4y = r32/b1sq*b1y; + double dphidr4z = r32/b1sq*b1z; // calculate the derivatives dpsi/dr_i - dpsidr1x = 1.0*r43/a2sq*a2x; - dpsidr1y = 1.0*r43/a2sq*a2y; - dpsidr1z = 1.0*r43/a2sq*a2z; + double dpsidr1x = 1.0*r43/a2sq*a2x; + double dpsidr1y = 1.0*r43/a2sq*a2y; + double dpsidr1z = 1.0*r43/a2sq*a2z; - dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; - dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; - dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; + double dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; + double dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; + double dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; - dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; - dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; - dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; + double dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; + double dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; + double dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; - dpsidr4x = r43/b2sq*b2x; - dpsidr4y = r43/b2sq*b2y; - dpsidr4z = r43/b2sq*b2z; + double dpsidr4x = r43/b2sq*b2x; + double dpsidr4y = r43/b2sq*b2y; + double dpsidr4z = r43/b2sq*b2z; // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) - - f1[0] = dEdPhi*dphidr1x; - f1[1] = dEdPhi*dphidr1y; - f1[2] = dEdPhi*dphidr1z; - f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x; - f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y; - f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z; - f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x; - f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y; - f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z; - f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x; - f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y; - f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z; - f5[0] = -dEdPsi*dpsidr4x; - f5[1] = -dEdPsi*dpsidr4y; - f5[2] = -dEdPsi*dpsidr4z; - // apply force to each of the 5 atoms if (i1 < nlocal) { - d_f(i1,0) += f1[0]; - d_f(i1,1) += f1[1]; - d_f(i1,2) += f1[2]; + d_f(i1,0) += dEdPhi*dphidr1x; + d_f(i1,1) += dEdPhi*dphidr1y; + d_f(i1,2) += dEdPhi*dphidr1z; } if (i2 < nlocal) { - d_f(i2,0) += f2[0]; - d_f(i2,1) += f2[1]; - d_f(i2,2) += f2[2]; + d_f(i2,0) += dEdPhi*dphidr2x + dEdPsi*dpsidr1x; + d_f(i2,1) += dEdPhi*dphidr2y + dEdPsi*dpsidr1y; + d_f(i2,2) += dEdPhi*dphidr2z + dEdPsi*dpsidr1z; } if (i3 < nlocal) { - d_f(i3,0) += f3[0]; - d_f(i3,1) += f3[1]; - d_f(i3,2) += f3[2]; + d_f(i3,0) += (-dEdPhi*dphidr3x - dEdPsi*dpsidr2x); + d_f(i3,1) += (-dEdPhi*dphidr3y - dEdPsi*dpsidr2y); + d_f(i3,2) += (-dEdPhi*dphidr3z - dEdPsi*dpsidr2z); } if (i4 < nlocal) { - d_f(i4,0) += f4[0]; - d_f(i4,1) += f4[1]; - d_f(i4,2) += f4[2]; + d_f(i4,0) += (-dEdPhi*dphidr4x - dEdPsi*dpsidr3x); + d_f(i4,1) += (-dEdPhi*dphidr4y - dEdPsi*dpsidr3y); + d_f(i4,2) += (-dEdPhi*dphidr4z - dEdPsi*dpsidr3z); } if (i5 < nlocal) { - d_f(i5,0) += f5[0]; - d_f(i5,1) += f5[1]; - d_f(i5,2) += f5[2]; + d_f(i5,0) -= dEdPsi*dpsidr4x; + d_f(i5,1) -= dEdPsi*dpsidr4y; + d_f(i5,2) -= dEdPsi*dpsidr4z; } } diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index 2e1b00d923..efa6a78c09 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -69,6 +69,9 @@ class FixCMAPKokkos : public FixCMAP, public KokkosBase { ExecutionSpace space) override; protected: + + int nlocal; + typename AT::t_x_array d_x; typename AT::t_f_array d_f; From 915f636d50170fd0dceb5d92a4103220fc777831 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Oct 2024 10:28:14 -0600 Subject: [PATCH 201/294] Update GNU Make for new files --- src/KOKKOS/Install.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ee2e8e61fe..ce4634ede9 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -131,6 +131,8 @@ action fft3d_kokkos.h fft3d.h action fftdata_kokkos.h fft3d.h action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h +action fix_cmap_kokkos.cpp fix_cmap.cpp +action fix_cmap_kokkos.h fix_cmap.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp @@ -163,6 +165,8 @@ action fix_npt_kokkos.cpp action fix_npt_kokkos.h action fix_nve_kokkos.cpp action fix_nve_kokkos.h +action fix_nve_limit_kokkos.cpp +action fix_nve_limit_kokkos.h action fix_nve_sphere_kokkos.cpp action fix_nve_sphere_kokkos.h action fix_nvt_kokkos.cpp @@ -179,6 +183,8 @@ action compute_reaxff_atom_kokkos.cpp compute_reaxff_atom.cpp action compute_reaxff_atom_kokkos.h compute_reaxff_atom.h action fix_reaxff_species_kokkos.cpp fix_reaxff_species.cpp action fix_reaxff_species_kokkos.h fix_reaxff_species.h +action fix_recenter_kokkos.cpp +action fix_recenter_kokkos.h action fix_rx_kokkos.cpp fix_rx.cpp action fix_rx_kokkos.h fix_rx.h action fix_setforce_kokkos.cpp @@ -205,8 +211,12 @@ action fix_wall_lj93_kokkos.cpp action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h +action fix_wall_region_kokkos.cpp +action fix_wall_region_kokkos.h action grid3d_kokkos.cpp fft3d.h action grid3d_kokkos.h fft3d.h +action group_kokkos.cpp +action group_kokkos.h action improper_class2_kokkos.cpp improper_class2.cpp action improper_class2_kokkos.h improper_class2.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp @@ -409,6 +419,8 @@ action rand_pool_wrap_kokkos.cpp action rand_pool_wrap_kokkos.h action region_block_kokkos.cpp action region_block_kokkos.h +action region_sphere_kokkos.cpp +action region_sphere_kokkos.h action remap_kokkos.cpp remap.cpp action remap_kokkos.h remap.h action sna_kokkos_impl.h sna.cpp From 820fe1ee8a3c25b3d9ed54e0c54a840de3ff5f86 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Oct 2024 12:18:20 -0600 Subject: [PATCH 202/294] Fix compile issue with gcc --- cmake/CMakeLists.txt | 2 +- src/MAKE/MACHINES/Makefile.perlmutter_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3df1ac4927..8d57e237b3 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) diff --git a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos index 26df2dc17e..81164aa040 100644 --- a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos +++ b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 2207b0ff56..57c25702aa 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index 720ee5ce2e..fd173b5588 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -10,7 +10,7 @@ KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe -diag-suppress 128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 SHFLAGS = -fPIC # uncomment when compiling with Intel 21.5 or older FMTFLAGS = # -std=c++11 From 40e5d533454e059b63a82bbafecccca963f728c5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Oct 2024 15:53:56 -0600 Subject: [PATCH 203/294] Prevent deduplication of -Xcudafe flag --- cmake/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8d57e237b3..ac31d6ebb8 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma SHELL:-Xcudafe --diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) @@ -591,7 +591,7 @@ separate_arguments(CMAKE_TUNE_FLAGS) foreach(_FLAG ${CMAKE_TUNE_FLAGS}) target_compile_options(lammps PRIVATE ${_FLAG}) # skip these flags when linking the main executable - if(NOT (("${_FLAG}" STREQUAL "-Xcudafe") OR (("${_FLAG}" STREQUAL "--diag_suppress=unrecognized_pragma")))) + if(NOT (("${_FLAG}" STREQUAL "-Xcudafe") OR ("${_FLAG}" STREQUAL "--diag_suppress=unrecognized_pragma") OR ("${_FLAG}" STREQUAL "--diag_suppress=128"))) target_compile_options(lmp PRIVATE ${_FLAG}) endif() endforeach() From a844959a07294b299fd98b4cec0950af69753907 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Oct 2024 23:00:17 -0600 Subject: [PATCH 204/294] Combine into a single -Xcudafe flag to simplify everything --- cmake/CMakeLists.txt | 11 +++-------- src/MAKE/MACHINES/Makefile.perlmutter_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8d57e237b3..cf10e8b544 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT}" "-Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) @@ -588,13 +588,8 @@ endif() set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler and machine specific optimization flags (compilation only)") separate_arguments(CMAKE_TUNE_FLAGS) -foreach(_FLAG ${CMAKE_TUNE_FLAGS}) - target_compile_options(lammps PRIVATE ${_FLAG}) - # skip these flags when linking the main executable - if(NOT (("${_FLAG}" STREQUAL "-Xcudafe") OR (("${_FLAG}" STREQUAL "--diag_suppress=unrecognized_pragma")))) - target_compile_options(lmp PRIVATE ${_FLAG}) - endif() -endforeach() +target_compile_options(lammps PRIVATE ${CMAKE_TUNE_FLAGS}) +target_compile_options(lmp PRIVATE ${CMAKE_TUNE_FLAGS}) ######################################################################## # Basic system tests (standard libraries, headers, functions, types) # ######################################################################## diff --git a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos index 81164aa040..decb26b5f6 100644 --- a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos +++ b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 57c25702aa..ad91e7e203 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -9,7 +9,7 @@ SHELL = /bin/sh KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index fd173b5588..d4fafed2dc 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -10,7 +10,7 @@ KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma -Xcudafe --diag_suppress=128 +CCFLAGS = -g -O3 -DNDEBUG -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128 SHFLAGS = -fPIC # uncomment when compiling with Intel 21.5 or older FMTFLAGS = # -std=c++11 From f18850397dd691c8222ac90cef2866c0bd7edb89 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 24 Oct 2024 09:43:36 -0600 Subject: [PATCH 205/294] whitespace --- src/KOKKOS/fix_cmap_kokkos.cpp | 17 +- src/KOKKOS/fix_nve_limit_kokkos.cpp | 2 - src/KOKKOS/fix_recenter_kokkos.cpp | 2 - src/KOKKOS/fix_wall_region_kokkos.cpp | 6 +- src/KOKKOS/group_kokkos.cpp | 1 - src/KOKKOS/group_kokkos.h | 2 +- src/KOKKOS/math_special_kokkos.h | 1 - src/KOKKOS/region_block_kokkos.h | 640 +++++++++++++------------- src/KOKKOS/region_sphere_kokkos.cpp | 1 - src/MOLECULE/fix_cmap.cpp | 1 + src/region.cpp | 1 + src/region_block.cpp | 1 + src/region_sphere.cpp | 1 + 13 files changed, 335 insertions(+), 341 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e597c34334..ece0ece1db 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -56,6 +56,7 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_modify = F_MASK; // allocate memory for CMAP data + memoryKK->create_kokkos(k_g_axis,g_axis,CMAPDIM,"cmap:g_axis"); memoryKK->create_kokkos(k_cmapgrid,cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:grid"); memoryKK->create_kokkos(k_d1cmapgrid,d1cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d1grid"); @@ -69,6 +70,7 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : d_d12cmapgrid = k_d12cmapgrid.template view(); // read and setup CMAP data + read_grid_map(arg[3]); int i = 0; @@ -88,6 +90,7 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : for( int i=0 ; i::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : k_d1cmapgrid.template sync(); k_d2cmapgrid.template sync(); k_d12cmapgrid.template sync(); - } /* ---------------------------------------------------------------------- */ @@ -136,7 +138,6 @@ FixCMAPKokkos::~FixCMAPKokkos() memoryKK->destroy_kokkos(k_crossterm_atom5,crossterm_atom5); memoryKK->destroy_kokkos(d_crosstermlist); - } /* ---------------------------------------------------------------------- */ @@ -148,6 +149,7 @@ void FixCMAPKokkos::init() error->all(FLERR,"Cannot yet use respa with Kokkos"); // on KOKKOS, allocate enough for all crossterms on each GPU to avoid grow operation in device code + maxcrossterm = ncmap; memoryKK->create_kokkos(d_crosstermlist,maxcrossterm,CMAPMAX,"cmap:crosstermlist"); } @@ -159,7 +161,6 @@ void FixCMAPKokkos::init() template void FixCMAPKokkos::pre_neighbor() { - atomKK->sync(execution_space,X_MASK); d_x = atomKK->k_x.view(); int nlocal = atomKK->nlocal; @@ -179,14 +180,12 @@ void FixCMAPKokkos::pre_neighbor() copymode = 1; Kokkos::parallel_scan(Kokkos::RangePolicy(0,nlocal),*this,ncrosstermlist); copymode = 0; - } template KOKKOS_INLINE_FUNCTION void FixCMAPKokkos::operator()(TagFixCmapPreNeighbor, const int i, int &l_ncrosstermlist, const bool is_final ) const { - for( int m = 0; m < d_num_crossterm(i); m++) { int atom1 = AtomKokkos::map_kokkos(d_crossterm_atom1(i,m),map_style,k_map_array,k_map_hash); @@ -297,6 +296,7 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou double vb45z = d_x(i4,2) - d_x(i5,2); // calculate normal vectors for planes that define the dihedral angles + double a1x = vb12y*vb23z - vb12z*vb23y; double a1y = vb12z*vb23x - vb12x*vb23z; double a1z = vb12x*vb23y - vb12y*vb23x; @@ -325,6 +325,7 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) return; // vectors needed to calculate the cross-term dihedral angles + double dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; double dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; double dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; @@ -388,8 +389,8 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs,E,dEdPhi,dEdPsi); // sum up cmap energy contributions - // needed for compute_scalar() + double engfraction = 0.2 * E; if (i1 < nlocal) ecmapKK += engfraction; if (i2 < nlocal) ecmapKK += engfraction; @@ -479,6 +480,7 @@ void FixCMAPKokkos::grow_arrays(int nmax) k_crossterm_atom5.template sync(); // force reallocation on host + k_num_crossterm.template modify(); k_crossterm_type.template modify(); k_crossterm_atom1.template modify(); @@ -877,8 +879,6 @@ void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int dEdPsi *= (180.0/MY_PI/CMAPDX); } - - /* ---------------------------------------------------------------------- return local index of atom J or any of its images that is closest to atom I if J is not a valid index like -1, just return it @@ -917,7 +917,6 @@ int FixCMAPKokkos::closest_image(const int i, int j) const return closest; } - namespace LAMMPS_NS { template class FixCMAPKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_nve_limit_kokkos.cpp b/src/KOKKOS/fix_nve_limit_kokkos.cpp index 942ee41f3a..de77427e49 100644 --- a/src/KOKKOS/fix_nve_limit_kokkos.cpp +++ b/src/KOKKOS/fix_nve_limit_kokkos.cpp @@ -120,7 +120,6 @@ void FixNVELimitKokkos::initial_integrate(int /*vflag*/) ncount += d_ncount; atomKK->modified(execution_space, X_MASK | V_MASK ); - } /* ---------------------------------------------------------------------- */ @@ -190,7 +189,6 @@ void FixNVELimitKokkos::final_integrate() ncount += d_ncount; atomKK->modified(execution_space, V_MASK ); - } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_recenter_kokkos.cpp b/src/KOKKOS/fix_recenter_kokkos.cpp index c3a840ff10..607f5ce8d9 100644 --- a/src/KOKKOS/fix_recenter_kokkos.cpp +++ b/src/KOKKOS/fix_recenter_kokkos.cpp @@ -50,7 +50,6 @@ FixRecenterKokkos::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a template void FixRecenterKokkos::initial_integrate(int /*vflag*/) { - atomKK->sync(execution_space,datamask_read); int nlocal = atomKK->nlocal; if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; @@ -121,7 +120,6 @@ void FixRecenterKokkos::initial_integrate(int /*vflag*/) atomKK->modified(execution_space,datamask_modify); } - namespace LAMMPS_NS { template class FixRecenterKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_wall_region_kokkos.cpp b/src/KOKKOS/fix_wall_region_kokkos.cpp index 2eea884472..ab6f7186a1 100644 --- a/src/KOKKOS/fix_wall_region_kokkos.cpp +++ b/src/KOKKOS/fix_wall_region_kokkos.cpp @@ -91,8 +91,8 @@ void FixWallRegionKokkos::post_force(int vflag) // initilize ewall after region->prematch(), // so a dynamic region can access last timestep values - // energy intialize. - // eflag is used to track whether wall energies have been communicated. + // energy intialize + // eflag is used to track whether wall energies have been communicated eflag = 0; double result[10]; @@ -330,7 +330,6 @@ template KOKKOS_INLINE_FUNCTION void FixWallRegionKokkos::v_tally(value_type result, int i, double *v) const { - if (vflag_global) { result[4] += v[0]; result[5] += v[1]; @@ -348,7 +347,6 @@ void FixWallRegionKokkos::v_tally(value_type result, int i, double * Kokkos::atomic_add(&(d_vatom(i,4)),v[4]); Kokkos::atomic_add(&(d_vatom(i,5)),v[5]); } - } namespace LAMMPS_NS { diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index 01cf15f6c5..fb115eca0e 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -71,7 +71,6 @@ double GroupKokkos::mass(int igroup) return all; } - /* ---------------------------------------------------------------------- compute the center-of-mass coords of group of atoms masstotal = total mass diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h index c8573b0d74..f62f192b84 100644 --- a/src/KOKKOS/group_kokkos.h +++ b/src/KOKKOS/group_kokkos.h @@ -23,7 +23,7 @@ template class GroupKokkos : public Group { public: GroupKokkos(class LAMMPS *); - double mass(int); // total mass of atoms in group + double mass(int); // total mass of atoms in group void xcm(int, double, double *); // center-of-mass coords of group }; diff --git a/src/KOKKOS/math_special_kokkos.h b/src/KOKKOS/math_special_kokkos.h index 12e04db1c0..1cc35e1969 100644 --- a/src/KOKKOS/math_special_kokkos.h +++ b/src/KOKKOS/math_special_kokkos.h @@ -271,7 +271,6 @@ namespace MathSpecialKokkos { return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; } - } // namespace MathSpecialKokkos } // namespace LAMMPS_NS diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index d0dcc12b88..052a6a4bcf 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -61,43 +61,44 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { KOKKOS_INLINE_FUNCTION int surface_kokkos(double x, double y, double z, double cutoff) -{ - int ncontact; - double xs, ys, zs; - double xnear[3], xorig[3]; + { + int ncontact; + double xs, ys, zs; + double xnear[3], xorig[3]; - if (dynamic) { - xorig[0] = x; xorig[1] = y; xorig[2] = z; - inverse_transform(x, y, z); - } - - xnear[0] = x; xnear[1] = y; xnear[2] = z; - - if (!openflag) { - if (interior) - ncontact = surface_interior_kokkos(xnear, cutoff); - else - ncontact = surface_exterior_kokkos(xnear, cutoff); - } else { - // one of surface_int/ext() will return 0 - // so no need to worry about offset of contact indices - ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); - } - - if (rotateflag && ncontact) { - for (int i = 0; i < ncontact; i++) { - xs = xnear[0] - d_contact[i].delx; - ys = xnear[1] - d_contact[i].dely; - zs = xnear[2] - d_contact[i].delz; - forward_transform(xs, ys, zs); - d_contact[i].delx = xorig[0] - xs; - d_contact[i].dely = xorig[1] - ys; - d_contact[i].delz = xorig[2] - zs; + if (dynamic) { + xorig[0] = x; xorig[1] = y; xorig[2] = z; + inverse_transform(x, y, z); } + + xnear[0] = x; xnear[1] = y; xnear[2] = z; + + if (!openflag) { + if (interior) + ncontact = surface_interior_kokkos(xnear, cutoff); + else + ncontact = surface_exterior_kokkos(xnear, cutoff); + } else { + // one of surface_int/ext() will return 0 + // so no need to worry about offset of contact indices + ncontact = surface_exterior_kokkos(xnear, cutoff) + surface_interior_kokkos(xnear, cutoff); + } + + if (rotateflag && ncontact) { + for (int i = 0; i < ncontact; i++) { + xs = xnear[0] - d_contact[i].delx; + ys = xnear[1] - d_contact[i].dely; + zs = xnear[2] - d_contact[i].delz; + forward_transform(xs, ys, zs); + d_contact[i].delx = xorig[0] - xs; + d_contact[i].dely = xorig[1] - ys; + d_contact[i].delz = xorig[2] - zs; + } + } + + return ncontact; } - return ncontact; -} Kokkos::View d_contact; private: @@ -106,317 +107,316 @@ class RegBlockKokkos : public RegBlock, public KokkosBase { typename AT::t_x_array_randomread d_x; typename AT::t_int_1d_randomread d_mask; -KOKKOS_INLINE_FUNCTION -int surface_interior_kokkos(double *x, double cutoff) -{ - double delta; + KOKKOS_INLINE_FUNCTION + int surface_interior_kokkos(double *x, double cutoff) + { + double delta; - // x is exterior to block + // x is exterior to block - if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; + if (x[0] < xlo || x[0] > xhi || x[1] < ylo || x[1] > yhi || x[2] < zlo || x[2] > zhi) return 0; - // x is interior to block or on its surface + // x is interior to block or on its surface - int n = 0; + int n = 0; - delta = x[0] - xlo; - if (delta < cutoff && !open_faces[0]) { - d_contact[n].r = delta; - d_contact[n].delx = delta; - d_contact[n].dely = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 0; - n++; - } - delta = xhi - x[0]; - if (delta < cutoff && !open_faces[1]) { - d_contact[n].r = delta; - d_contact[n].delx = -delta; - d_contact[n].dely = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 1; - n++; + delta = x[0] - xlo; + if (delta < cutoff && !open_faces[0]) { + d_contact[n].r = delta; + d_contact[n].delx = delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 0; + n++; + } + delta = xhi - x[0]; + if (delta < cutoff && !open_faces[1]) { + d_contact[n].r = delta; + d_contact[n].delx = -delta; + d_contact[n].dely = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 1; + n++; + } + + delta = x[1] - ylo; + if (delta < cutoff && !open_faces[2]) { + d_contact[n].r = delta; + d_contact[n].dely = delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 2; + n++; + } + delta = yhi - x[1]; + if (delta < cutoff && !open_faces[3]) { + d_contact[n].r = delta; + d_contact[n].dely = -delta; + d_contact[n].delx = d_contact[n].delz = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 3; + n++; + } + + delta = x[2] - zlo; + if (delta < cutoff && !open_faces[4]) { + d_contact[n].r = delta; + d_contact[n].delz = delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 4; + n++; + } + delta = zhi - x[2]; + if (delta < cutoff && !open_faces[5]) { + d_contact[n].r = delta; + d_contact[n].delz = -delta; + d_contact[n].delx = d_contact[n].dely = 0.0; + d_contact[n].radius = 0; + d_contact[n].iwall = 5; + n++; + } + + return n; } - delta = x[1] - ylo; - if (delta < cutoff && !open_faces[2]) { - d_contact[n].r = delta; - d_contact[n].dely = delta; - d_contact[n].delx = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 2; - n++; - } - delta = yhi - x[1]; - if (delta < cutoff && !open_faces[3]) { - d_contact[n].r = delta; - d_contact[n].dely = -delta; - d_contact[n].delx = d_contact[n].delz = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 3; - n++; - } + KOKKOS_INLINE_FUNCTION + int surface_exterior_kokkos(double *x, double cutoff) + { + double xp, yp, zp; + double xc, yc, zc, dist, mindist; - delta = x[2] - zlo; - if (delta < cutoff && !open_faces[4]) { - d_contact[n].r = delta; - d_contact[n].delz = delta; - d_contact[n].delx = d_contact[n].dely = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 4; - n++; - } - delta = zhi - x[2]; - if (delta < cutoff && !open_faces[5]) { - d_contact[n].r = delta; - d_contact[n].delz = -delta; - d_contact[n].delx = d_contact[n].dely = 0.0; - d_contact[n].radius = 0; - d_contact[n].iwall = 5; - n++; - } + // x is far enough from block that there is no contact + // x is interior to block - return n; -} + if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || + x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) + return 0; + if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; -KOKKOS_INLINE_FUNCTION -int surface_exterior_kokkos(double *x, double cutoff) -{ - double xp, yp, zp; - double xc, yc, zc, dist, mindist; + // x is exterior to block or on its surface + // xp,yp,zp = point on surface of block that x is closest to + // could be edge or corner pt of block + // do not add contact point if r >= cutoff - // x is far enough from block that there is no contact - // x is interior to block - - if (x[0] <= xlo - cutoff || x[0] >= xhi + cutoff || x[1] <= ylo - cutoff || - x[1] >= yhi + cutoff || x[2] <= zlo - cutoff || x[2] >= zhi + cutoff) - return 0; - if (x[0] > xlo && x[0] < xhi && x[1] > ylo && x[1] < yhi && x[2] > zlo && x[2] < zhi) return 0; - - // x is exterior to block or on its surface - // xp,yp,zp = point on surface of block that x is closest to - // could be edge or corner pt of block - // do not add contact point if r >= cutoff - - if (!openflag) { - if (x[0] < xlo) - xp = xlo; - else if (x[0] > xhi) - xp = xhi; - else - xp = x[0]; - if (x[1] < ylo) - yp = ylo; - else if (x[1] > yhi) - yp = yhi; - else - yp = x[1]; - if (x[2] < zlo) - zp = zlo; - else if (x[2] > zhi) - zp = zhi; - else - zp = x[2]; - } else { - mindist = MAXDOUBLEINT; - for (int i = 0; i < 6; i++) { - if (open_faces[i]) continue; - dist = find_closest_point(i, x, xc, yc, zc); - if (dist < mindist) { - xp = xc; - yp = yc; - zp = zc; - mindist = dist; + if (!openflag) { + if (x[0] < xlo) + xp = xlo; + else if (x[0] > xhi) + xp = xhi; + else + xp = x[0]; + if (x[1] < ylo) + yp = ylo; + else if (x[1] > yhi) + yp = yhi; + else + yp = x[1]; + if (x[2] < zlo) + zp = zlo; + else if (x[2] > zhi) + zp = zhi; + else + zp = x[2]; + } else { + mindist = MAXDOUBLEINT; + for (int i = 0; i < 6; i++) { + if (open_faces[i]) continue; + dist = find_closest_point(i, x, xc, yc, zc); + if (dist < mindist) { + xp = xc; + yp = yc; + zp = zc; + mindist = dist; + } } } + + add_contact(0, x, xp, yp, zp); + d_contact[0].iwall = 0; + if (d_contact[0].r < cutoff) return 1; + return 0; } - add_contact(0, x, xp, yp, zp); - d_contact[0].iwall = 0; - if (d_contact[0].r < cutoff) return 1; - return 0; -} - -KOKKOS_INLINE_FUNCTION -void add_contact(int n, double *x, double xp, double yp, double zp) -{ - double delx = x[0] - xp; - double dely = x[1] - yp; - double delz = x[2] - zp; - d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); - d_contact[n].radius = 0; - d_contact[n].delx = delx; - d_contact[n].dely = dely; - d_contact[n].delz = delz; -} - -KOKKOS_INLINE_FUNCTION -int k_inside(double x, double y, double z) const -{ - if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) - return 1; - return 0; -} - -KOKKOS_INLINE_FUNCTION -void forward_transform(double &x, double &y, double &z) const -{ - if (rotateflag) rotate(x, y, z, theta); - if (moveflag) { - x += dx; - y += dy; - z += dz; + KOKKOS_INLINE_FUNCTION + void add_contact(int n, double *x, double xp, double yp, double zp) + { + double delx = x[0] - xp; + double dely = x[1] - yp; + double delz = x[2] - zp; + d_contact[n].r = sqrt(delx * delx + dely * dely + delz * delz); + d_contact[n].radius = 0; + d_contact[n].delx = delx; + d_contact[n].dely = dely; + d_contact[n].delz = delz; } -} -KOKKOS_INLINE_FUNCTION -void inverse_transform(double &x, double &y, double &z) const -{ - if (moveflag) { - x -= dx; - y -= dy; - z -= dz; - } - if (rotateflag) rotate(x,y,z,-theta); -} - -KOKKOS_INLINE_FUNCTION -void rotate(double &x, double &y, double &z, double angle) const -{ - double a[3],b[3],c[3],d[3],disp[3]; - - double sine = sin(angle); - double cosine = cos(angle); - d[0] = x - point[0]; - d[1] = y - point[1]; - d[2] = z - point[2]; - double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; - c[0] = x0dotr * runit[0]; - c[1] = x0dotr * runit[1]; - c[2] = x0dotr * runit[2]; - a[0] = d[0] - c[0]; - a[1] = d[1] - c[1]; - a[2] = d[2] - c[2]; - b[0] = runit[1]*a[2] - runit[2]*a[1]; - b[1] = runit[2]*a[0] - runit[0]*a[2]; - b[2] = runit[0]*a[1] - runit[1]*a[0]; - disp[0] = a[0]*cosine + b[0]*sine; - disp[1] = a[1]*cosine + b[1]*sine; - disp[2] = a[2]*cosine + b[2]*sine; - x = point[0] + c[0] + disp[0]; - y = point[1] + c[1] + disp[1]; - z = point[2] + c[2] + disp[2]; -} - -KOKKOS_INLINE_FUNCTION -void point_on_line_segment(double *a, double *b, double *c, double *d) -{ - double ba[3], ca[3]; - - sub3(b, a, ba); - sub3(c, a, ca); - double t = dot3(ca, ba) / dot3(ba, ba); - if (t <= 0.0) { - d[0] = a[0]; - d[1] = a[1]; - d[2] = a[2]; - } else if (t >= 1.0) { - d[0] = b[0]; - d[1] = b[1]; - d[2] = b[2]; - } else { - d[0] = a[0] + t * ba[0]; - d[1] = a[1] + t * ba[1]; - d[2] = a[2] + t * ba[2]; - } -} - -KOKKOS_INLINE_FUNCTION -double inside_face(double *xproj, int iface) -{ - if (iface < 2) { - if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; - } else if (iface < 4) { - if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) + KOKKOS_INLINE_FUNCTION + int k_inside(double x, double y, double z) const + { + if (x >= xlo && x <= xhi && y >= ylo && y <= yhi && z >= zlo && z <= zhi) return 1; - } else { - if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; + return 0; } - return 0; -} - -KOKKOS_INLINE_FUNCTION -double find_closest_point(int i, double *x, double &xc, double &yc, double &zc) -{ - double dot, d2, d2min; - double xr[3], xproj[3], p[3]; - - xr[0] = x[0] - corners[i][0][0]; - xr[1] = x[1] - corners[i][0][1]; - xr[2] = x[2] - corners[i][0][2]; - dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; - xproj[0] = xr[0] - dot * face[i][0]; - xproj[1] = xr[1] - dot * face[i][1]; - xproj[2] = xr[2] - dot * face[i][2]; - - d2min = MAXDOUBLEINT; - - // check if point projects inside of face - - if (inside_face(xproj, i)) { - d2 = d2min = dot * dot; - xc = xproj[0] + corners[i][0][0]; - yc = xproj[1] + corners[i][0][1]; - zc = xproj[2] + corners[i][0][2]; - - // check each edge - - } else { - point_on_line_segment(corners[i][0], corners[i][1], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][1], corners[i][2], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][2], corners[i][3], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; - } - - point_on_line_segment(corners[i][3], corners[i][0], x, p); - d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + - (p[2] - x[2]) * (p[2] - x[2]); - if (d2 < d2min) { - d2min = d2; - xc = p[0]; - yc = p[1]; - zc = p[2]; + KOKKOS_INLINE_FUNCTION + void forward_transform(double &x, double &y, double &z) const + { + if (rotateflag) rotate(x, y, z, theta); + if (moveflag) { + x += dx; + y += dy; + z += dz; } } - return d2min; -} + KOKKOS_INLINE_FUNCTION + void inverse_transform(double &x, double &y, double &z) const + { + if (moveflag) { + x -= dx; + y -= dy; + z -= dz; + } + if (rotateflag) rotate(x,y,z,-theta); + } + KOKKOS_INLINE_FUNCTION + void rotate(double &x, double &y, double &z, double angle) const + { + double a[3],b[3],c[3],d[3],disp[3]; + + double sine = sin(angle); + double cosine = cos(angle); + d[0] = x - point[0]; + d[1] = y - point[1]; + d[2] = z - point[2]; + double x0dotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2]; + c[0] = x0dotr * runit[0]; + c[1] = x0dotr * runit[1]; + c[2] = x0dotr * runit[2]; + a[0] = d[0] - c[0]; + a[1] = d[1] - c[1]; + a[2] = d[2] - c[2]; + b[0] = runit[1]*a[2] - runit[2]*a[1]; + b[1] = runit[2]*a[0] - runit[0]*a[2]; + b[2] = runit[0]*a[1] - runit[1]*a[0]; + disp[0] = a[0]*cosine + b[0]*sine; + disp[1] = a[1]*cosine + b[1]*sine; + disp[2] = a[2]*cosine + b[2]*sine; + x = point[0] + c[0] + disp[0]; + y = point[1] + c[1] + disp[1]; + z = point[2] + c[2] + disp[2]; + } + + KOKKOS_INLINE_FUNCTION + void point_on_line_segment(double *a, double *b, double *c, double *d) + { + double ba[3], ca[3]; + + sub3(b, a, ba); + sub3(c, a, ca); + double t = dot3(ca, ba) / dot3(ba, ba); + if (t <= 0.0) { + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + } else if (t >= 1.0) { + d[0] = b[0]; + d[1] = b[1]; + d[2] = b[2]; + } else { + d[0] = a[0] + t * ba[0]; + d[1] = a[1] + t * ba[1]; + d[2] = a[2] + t * ba[2]; + } + } + + KOKKOS_INLINE_FUNCTION + double inside_face(double *xproj, int iface) + { + if (iface < 2) { + if (xproj[1] > 0 && (xproj[1] < yhi - ylo) && xproj[2] > 0 && (xproj[2] < zhi - zlo)) return 1; + } else if (iface < 4) { + if (xproj[0] > 0 && (xproj[0] < (xhi - xlo)) && xproj[2] > 0 && (xproj[2] < (zhi - zlo))) + return 1; + } else { + if (xproj[0] > 0 && xproj[0] < (xhi - xlo) && xproj[1] > 0 && xproj[1] < (yhi - ylo)) return 1; + } + + return 0; + } + + KOKKOS_INLINE_FUNCTION + double find_closest_point(int i, double *x, double &xc, double &yc, double &zc) + { + double dot, d2, d2min; + double xr[3], xproj[3], p[3]; + + xr[0] = x[0] - corners[i][0][0]; + xr[1] = x[1] - corners[i][0][1]; + xr[2] = x[2] - corners[i][0][2]; + dot = face[i][0] * xr[0] + face[i][1] * xr[1] + face[i][2] * xr[2]; + xproj[0] = xr[0] - dot * face[i][0]; + xproj[1] = xr[1] - dot * face[i][1]; + xproj[2] = xr[2] - dot * face[i][2]; + + d2min = MAXDOUBLEINT; + + // check if point projects inside of face + + if (inside_face(xproj, i)) { + d2 = d2min = dot * dot; + xc = xproj[0] + corners[i][0][0]; + yc = xproj[1] + corners[i][0][1]; + zc = xproj[2] + corners[i][0][2]; + + // check each edge + + } else { + point_on_line_segment(corners[i][0], corners[i][1], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][1], corners[i][2], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][2], corners[i][3], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + + point_on_line_segment(corners[i][3], corners[i][0], x, p); + d2 = (p[0] - x[0]) * (p[0] - x[0]) + (p[1] - x[1]) * (p[1] - x[1]) + + (p[2] - x[2]) * (p[2] - x[2]); + if (d2 < d2min) { + d2min = d2; + xc = p[0]; + yc = p[1]; + zc = p[2]; + } + } + + return d2min; + } }; diff --git a/src/KOKKOS/region_sphere_kokkos.cpp b/src/KOKKOS/region_sphere_kokkos.cpp index b9a305d9fb..07275ee69e 100644 --- a/src/KOKKOS/region_sphere_kokkos.cpp +++ b/src/KOKKOS/region_sphere_kokkos.cpp @@ -75,7 +75,6 @@ void RegSphereKokkos::operator()(TagRegSphereMatchAll, const int &i) } } - /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index d9859326c9..02116965b5 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -131,6 +131,7 @@ FixCMAP::~FixCMAP() if (copymode) return; // unregister callbacks to this fix from Atom class + atom->delete_callback(id,Atom::GROW); atom->delete_callback(id,Atom::RESTART); diff --git a/src/region.cpp b/src/region.cpp index 6bcbc4470a..7399b14adb 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -50,6 +50,7 @@ Region::Region(LAMMPS *lmp, int /*narg*/, char **arg) : Region::~Region() { if (copymode) return; + delete[] id; delete[] style; delete[] xstr; diff --git a/src/region_block.cpp b/src/region_block.cpp index 9376016843..36c38f517c 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -262,6 +262,7 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : RegBlock::~RegBlock() { if (copymode) return; + delete[] xlostr; delete[] xhistr; delete[] ylostr; diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index ea6e39d894..ec472c031c 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -102,6 +102,7 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : RegSphere::~RegSphere() { if (copymode) return; + delete[] xstr; delete[] ystr; delete[] zstr; From 4a64b3d9ced759f7c8c189bab6bea3af6036443f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 24 Oct 2024 12:04:41 -0400 Subject: [PATCH 206/294] add d_exchange_sendlist to pack_exchange_kokkos() --- src/KOKKOS/fix_cmap_kokkos.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e597c34334..5772dffdbc 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -658,15 +658,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( { k_buf.template sync(); - //k_copylist.template sync(); + k_copylist.template sync(); k_exchange_sendlist.template sync(); auto d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); - //d_copylist = k_copylist.view(); + d_copylist = k_copylist.view(); auto d_exchange_sendlist = k_exchange_sendlist.view(); - //this->nsend = nsend; int n; copymode = 1; @@ -695,6 +694,19 @@ int FixCMAPKokkos::pack_exchange_kokkos( d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); } + + const int k = d_copylist(mysend); + if (k > -1) { + l_num_crossterm(i) = l_num_crossterm(k); + for (int m = 0; m < l_num_crossterm(k); m++) { + l_crossterm_type(i,m) = l_crossterm_type(k,m); + l_crossterm_atom1(i,m) = l_crossterm_atom1(k,m); + l_crossterm_atom2(i,m) = l_crossterm_atom2(k,m); + l_crossterm_atom3(i,m) = l_crossterm_atom3(k,m); + l_crossterm_atom4(i,m) = l_crossterm_atom4(k,m); + l_crossterm_atom5(i,m) = l_crossterm_atom5(k,m); + } + } } },n); From 00ff895c0d50ffdad56ccf8d14423df036fbdfd6 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 24 Oct 2024 12:11:12 -0400 Subject: [PATCH 207/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index d65a72e63f..c701ac1a91 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -655,7 +655,7 @@ int FixCMAPKokkos::unpack_exchange(int nlocal, double *buf) template int FixCMAPKokkos::pack_exchange_kokkos( const int &nsend, DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d /*k_copylist*/, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space) { @@ -666,8 +666,8 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); - d_copylist = k_copylist.view(); - auto d_exchange_sendlist = k_exchange_sendlist.view(); + d_copylist = k_copylist.template view(); + auto d_exchange_sendlist = k_exchange_sendlist.template view(); int n; copymode = 1; From ce830fcdbde61fef35e04cfc109f8df0c9485bb8 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 24 Oct 2024 12:20:02 -0400 Subject: [PATCH 208/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index c701ac1a91..13233b65a2 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -666,7 +666,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); - d_copylist = k_copylist.template view(); + auto d_copylist = k_copylist.template view(); auto d_exchange_sendlist = k_exchange_sendlist.template view(); int n; @@ -718,6 +718,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( if (space == Host) k_buf.sync(); else k_buf.sync(); + k_num_crossterm.template modify(); + k_crossterm_type.template modify(); + k_crossterm_atom1.template modify(); + k_crossterm_atom2.template modify(); + k_crossterm_atom3.template modify(); + k_crossterm_atom4.template modify(); + k_crossterm_atom5.template modify(); + return n; } From a5ab8be0a24a18fd3114d21c510f8629cb692f2a Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 25 Oct 2024 14:16:25 +0100 Subject: [PATCH 209/294] Clarify restriction on periodic cell dimensions --- doc/src/fix_acks2_reaxff.rst | 3 ++- doc/src/fix_qeq_reaxff.rst | 3 ++- doc/src/fix_qtpie_reaxff.rst | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/src/fix_acks2_reaxff.rst b/doc/src/fix_acks2_reaxff.rst index ebb1b48051..566e17a330 100644 --- a/doc/src/fix_acks2_reaxff.rst +++ b/doc/src/fix_acks2_reaxff.rst @@ -111,7 +111,8 @@ LAMMPS was built with that package. See the :doc:`Build package This fix does not correctly handle interactions involving multiple periodic images of the same atom. Hence, it should not be used for -periodic cell dimensions less than :math:`10~\AA`. +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, diff --git a/doc/src/fix_qeq_reaxff.rst b/doc/src/fix_qeq_reaxff.rst index e90842ea6a..c449c8cda9 100644 --- a/doc/src/fix_qeq_reaxff.rst +++ b/doc/src/fix_qeq_reaxff.rst @@ -124,7 +124,8 @@ LAMMPS was built with that package. See the :doc:`Build package This fix does not correctly handle interactions involving multiple periodic images of the same atom. Hence, it should not be used for -periodic cell dimensions less than 10 Angstroms. +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index b7faa772af..cf59e4701a 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -150,7 +150,8 @@ LAMMPS was built with that package. See the :doc:`Build package This fix does not correctly handle interactions involving multiple periodic images of the same atom. Hence, it should not be used for -periodic cell dimensions less than 10 Angstroms. +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. This fix may be used in combination with :doc:`fix efield ` and will apply the external electric field during charge equilibration, @@ -166,7 +167,8 @@ the *potential* keyword for `fix efield`. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff ` +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, +:doc:`fix acks2/reaxff ` Default """"""" From e0c3022ec356cd81e15eabd1d569da5b8a6c97a5 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 25 Oct 2024 14:55:51 -0400 Subject: [PATCH 210/294] printf debugging --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 13233b65a2..74322115ff 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -695,6 +695,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( d_buf(j++) = static_cast (l_crossterm_atom3(i,m)); d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); + Kokkos::printf(" *** ok 1 ... i %i j %i m %i\n", i, j, m); } const int k = d_copylist(mysend); @@ -707,6 +708,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( l_crossterm_atom3(i,m) = l_crossterm_atom3(k,m); l_crossterm_atom4(i,m) = l_crossterm_atom4(k,m); l_crossterm_atom5(i,m) = l_crossterm_atom5(k,m); + Kokkos::printf(" *** ok 2 ... i %i k %i m %i\n", i, k, m); } } } From e91b5dce787ec68fa5a6e89b6dbe53e6c1326e7d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 25 Oct 2024 18:52:50 -0400 Subject: [PATCH 211/294] fix typos --- src/group.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/group.cpp b/src/group.cpp index 909e741c6b..136faa4059 100644 --- a/src/group.cpp +++ b/src/group.cpp @@ -1141,10 +1141,10 @@ void Group::xcm(int igroup, double masstotal, double *cm, Region *region) /* ---------------------------------------------------------------------- compute the center-of-mass velocity of group of atoms masstotal = total mass - return center-of-mass velocity in cm[] + return center-of-mass velocity in vcm[] ------------------------------------------------------------------------- */ -void Group::vcm(int igroup, double masstotal, double *cm) +void Group::vcm(int igroup, double masstotal, double *vcm) { int groupbit = bitmask[igroup]; @@ -1176,21 +1176,21 @@ void Group::vcm(int igroup, double masstotal, double *cm) } } - MPI_Allreduce(p, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); if (masstotal > 0.0) { - cm[0] /= masstotal; - cm[1] /= masstotal; - cm[2] /= masstotal; + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; } } /* ---------------------------------------------------------------------- compute the center-of-mass velocity of group of atoms in region masstotal = total mass - return center-of-mass velocity in cm[] + return center-of-mass velocity in vcm[] ------------------------------------------------------------------------- */ -void Group::vcm(int igroup, double masstotal, double *cm, Region *region) +void Group::vcm(int igroup, double masstotal, double *vcm, Region *region) { int groupbit = bitmask[igroup]; region->prematch(); @@ -1224,11 +1224,11 @@ void Group::vcm(int igroup, double masstotal, double *cm, Region *region) } } - MPI_Allreduce(p, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); if (masstotal > 0.0) { - cm[0] /= masstotal; - cm[1] /= masstotal; - cm[2] /= masstotal; + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; } } @@ -1236,7 +1236,7 @@ void Group::vcm(int igroup, double masstotal, double *cm, Region *region) compute the total force on group of atoms ------------------------------------------------------------------------- */ -void Group::fcm(int igroup, double *cm) +void Group::fcm(int igroup, double *fcm) { int groupbit = bitmask[igroup]; @@ -1254,14 +1254,14 @@ void Group::fcm(int igroup, double *cm) flocal[2] += f[i][2]; } - MPI_Allreduce(flocal, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(flocal, fcm, 3, MPI_DOUBLE, MPI_SUM, world); } /* ---------------------------------------------------------------------- compute the total force on group of atoms in region ------------------------------------------------------------------------- */ -void Group::fcm(int igroup, double *cm, Region *region) +void Group::fcm(int igroup, double *fcm, Region *region) { int groupbit = bitmask[igroup]; region->prematch(); @@ -1281,7 +1281,7 @@ void Group::fcm(int igroup, double *cm, Region *region) flocal[2] += f[i][2]; } - MPI_Allreduce(flocal, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(flocal, fcm, 3, MPI_DOUBLE, MPI_SUM, world); } /* ---------------------------------------------------------------------- From ea7fd079ce33bde4fb8de0a0c83c1cc6e6397c0c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 25 Oct 2024 19:14:20 -0400 Subject: [PATCH 212/294] add vcm() and angmom() --- src/KOKKOS/group_kokkos.cpp | 127 +++++++++++++++++++++++++++++++++++- src/KOKKOS/group_kokkos.h | 2 + 2 files changed, 126 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index fb115eca0e..6a64dd79ca 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -82,15 +82,12 @@ template void GroupKokkos::xcm(int igroup, double masstotal, double *cm) { int groupbit = bitmask[igroup]; - auto d_x = atomKK->k_x.template view(); auto d_mask = atomKK->k_mask.template view(); - auto d_type = atomKK->k_type.template view(); auto d_image = atomKK->k_image.template view(); auto l_prd = Few(domain->prd); auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; - double cmone[3]; if (atomKK->rmass) { @@ -114,6 +111,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) } else { auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { @@ -139,6 +137,129 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) } } +/* ---------------------------------------------------------------------- + compute the center-of-mass velocity of group of atoms + masstotal = total mass + return center-of-mass velocity in vcm[] +------------------------------------------------------------------------- */ + +template +void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) +{ + int groupbit = bitmask[igroup]; + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + + double p[3], massone; + p[0] = p[1] = p[2] = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + l_px += d_v(i,0) * massone; + l_py += d_v(i,1) * massone; + l_pz += d_v(i,2) * massone; + } + }, p[0], p[1], p[2]); + + } + + MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world); + if (masstotal > 0.0) { + vcm[0] /= masstotal; + vcm[1] /= masstotal; + vcm[2] /= masstotal; + } +} + + + +/* ---------------------------------------------------------------------- + compute the angular momentum L (lmom) of group + around center-of-mass cm + must unwrap atoms to compute L correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::angmom(int igroup, double *cm, double *lmom) +{ + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_v = atomKK->k_v.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + + double p[3] = {0.0, 0.0, 0.0}; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - cm[0]; + double dy = unwrapKK[1] - cm[1]; + double dz = unwrapKK[2] - cm[2]; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - cm[0]; + double dy = unwrapKK[1] - cm[1]; + double dz = unwrapKK[2] - cm[2]; + l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); + l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); + l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); + } + }, p[0], p[1], p[2]); + + } + MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world); +} + namespace LAMMPS_NS { template class GroupKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h index f62f192b84..a51e339b50 100644 --- a/src/KOKKOS/group_kokkos.h +++ b/src/KOKKOS/group_kokkos.h @@ -25,6 +25,8 @@ class GroupKokkos : public Group { GroupKokkos(class LAMMPS *); double mass(int); // total mass of atoms in group void xcm(int, double, double *); // center-of-mass coords of group + void vcm(int, double, double *); // center-of-mass velocity of group + void angmom(int, double *, double *); // angular momentum of group }; } // namespace LAMMPS_NS From 71bbc52feb5d8cd2fa3cdc49b501c18176d5fc0f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 25 Oct 2024 20:03:45 -0400 Subject: [PATCH 213/294] recommit changes from @stanmoore1 --- cmake/CMakeLists.txt | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ac31d6ebb8..cf10e8b544 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -141,7 +141,7 @@ endif() # silence nvcc warnings if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) - set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT} -Xcudafe --diag_suppress=unrecognized_pragma SHELL:-Xcudafe --diag_suppress=128") + set(CMAKE_TUNE_DEFAULT "${CMAKE_TUNE_DEFAULT}" "-Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128") endif() # we require C++11 without extensions. Kokkos requires at least C++17 (currently) @@ -588,13 +588,8 @@ endif() set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler and machine specific optimization flags (compilation only)") separate_arguments(CMAKE_TUNE_FLAGS) -foreach(_FLAG ${CMAKE_TUNE_FLAGS}) - target_compile_options(lammps PRIVATE ${_FLAG}) - # skip these flags when linking the main executable - if(NOT (("${_FLAG}" STREQUAL "-Xcudafe") OR ("${_FLAG}" STREQUAL "--diag_suppress=unrecognized_pragma") OR ("${_FLAG}" STREQUAL "--diag_suppress=128"))) - target_compile_options(lmp PRIVATE ${_FLAG}) - endif() -endforeach() +target_compile_options(lammps PRIVATE ${CMAKE_TUNE_FLAGS}) +target_compile_options(lmp PRIVATE ${CMAKE_TUNE_FLAGS}) ######################################################################## # Basic system tests (standard libraries, headers, functions, types) # ######################################################################## From 12f0eff7a687fcde3530f906a4c4b973beb3358f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 25 Oct 2024 20:06:13 -0400 Subject: [PATCH 214/294] make group kokkos aware --- src/KOKKOS/fix_momentum_kokkos.cpp | 42 +++++++++++------------------- src/KOKKOS/fix_momentum_kokkos.h | 4 +++ 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/src/KOKKOS/fix_momentum_kokkos.cpp b/src/KOKKOS/fix_momentum_kokkos.cpp index fa959cd582..b41a3530cb 100644 --- a/src/KOKKOS/fix_momentum_kokkos.cpp +++ b/src/KOKKOS/fix_momentum_kokkos.cpp @@ -24,7 +24,8 @@ using namespace LAMMPS_NS; using namespace FixConst; /* ---------------------------------------------------------------------- - Contributing author: Dan Ibanez (SNL) + Contributing authors: Dan Ibanez (SNL) + Mitch Murphy (alphataubio at gmail) ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ @@ -35,6 +36,7 @@ FixMomentumKokkos::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a { kokkosable = 1; atomKK = (AtomKokkos *) atom; + groupKK = (GroupKokkos *)group; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; @@ -92,8 +94,7 @@ void FixMomentumKokkos::end_of_step() double ekin_old,ekin_new; ekin_old = ekin_new = 0.0; - if (dynamic) - masstotal = group->mass(igroup); // change once Group is ported to Kokkos + if (dynamic) masstotal = groupKK->mass(igroup); // do nothing if group is empty, i.e. mass is zero; @@ -107,12 +108,8 @@ void FixMomentumKokkos::end_of_step() auto groupbit2 = groupbit; if (linear) { - /* this is needed because Group is not Kokkos-aware ! */ - atomKK->sync(ExecutionSpaceFromDevice::space, - V_MASK | MASK_MASK | TYPE_MASK | RMASS_MASK); - Few tmpvcm; - group->vcm(igroup,masstotal,&tmpvcm[0]); - const Few vcm(tmpvcm); + double vcm[3]; + groupKK->vcm(igroup,masstotal,vcm); // adjust velocities by vcm to zero linear momentum // only adjust a component if flag is set @@ -133,20 +130,11 @@ void FixMomentumKokkos::end_of_step() } if (angular) { - Few tmpxcm, tmpangmom, tmpomega; - double inertia[3][3]; - /* syncs for each Kokkos-unaware Group method */ - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->xcm(igroup,masstotal,&tmpxcm[0]); - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | V_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->angmom(igroup,&tmpxcm[0],&tmpangmom[0]); - atomKK->sync(ExecutionSpaceFromDevice::space, - X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK); - group->inertia(igroup,&tmpxcm[0],inertia); - group->omega(&tmpangmom[0],inertia,&tmpomega[0]); - const Few xcm(tmpxcm), angmom(tmpangmom), omega(tmpomega); + double xcm[3],angmom[3],omega[3],inertia[3][3]; + groupKK->xcm(igroup,masstotal,xcm); + groupKK->angmom(igroup,xcm,angmom); + groupKK->inertia(igroup,xcm,inertia); + group->omega(angmom,inertia,omega); // adjust velocities to zero omega // vnew_i = v_i - w x r_i @@ -167,10 +155,10 @@ void FixMomentumKokkos::end_of_step() x_i[0] = x(i,0); x_i[1] = x(i,1); x_i[2] = x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - auto dx = unwrap[0] - xcm[0]; - auto dy = unwrap[1] - xcm[1]; - auto dz = unwrap[2] - xcm[2]; + auto unwrapKK = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); + auto dx = unwrapKK[0] - xcm[0]; + auto dy = unwrapKK[1] - xcm[1]; + auto dz = unwrapKK[2] - xcm[2]; v(i,0) -= omega[1]*dz - omega[2]*dy; v(i,1) -= omega[2]*dx - omega[0]*dz; v(i,2) -= omega[0]*dy - omega[1]*dx; diff --git a/src/KOKKOS/fix_momentum_kokkos.h b/src/KOKKOS/fix_momentum_kokkos.h index 3bb46035fe..0ab91c423d 100644 --- a/src/KOKKOS/fix_momentum_kokkos.h +++ b/src/KOKKOS/fix_momentum_kokkos.h @@ -24,6 +24,8 @@ FixStyle(momentum/kk/host,FixMomentumKokkos); #define LMP_FIX_MOMENTUM_KOKKOS_H #include "fix_momentum.h" + +#include "group_kokkos.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -35,6 +37,8 @@ class FixMomentumKokkos : public FixMomentum { FixMomentumKokkos(class LAMMPS *, int, char **); void end_of_step() override; + private: + GroupKokkos *groupKK; }; } From b725c01c0decbeade142ca738551f3d8073dfcfb Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 25 Oct 2024 23:22:25 -0500 Subject: [PATCH 215/294] split pair list into 2 separate lists and use 5 jobs for more even time balancing --- .github/workflows/kokkos-regression.yaml | 8 ++++--- tools/regression-tests/get_kokkos_input.py | 25 +++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index 0756b080b0..5cde7d67f2 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -17,9 +17,9 @@ jobs: env: CCACHE_DIR: ${{ github.workspace }}/.ccache strategy: - max-parallel: 4 + max-parallel: 5 matrix: - idx: [ 'pair', 'fix', 'compute', 'misc' ] + idx: [ 'pair-0', 'pair-1', 'fix', 'compute', 'misc' ] steps: - name: Checkout repository @@ -93,8 +93,10 @@ jobs: run: | source linuxenv/bin/activate python3 tools/regression-tests/get_kokkos_input.py \ - --examples-top-level=examples \ + --examples-top-level=examples --batch-size=100 \ --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap" + cat input-list-fix-1-kk.txt >> input-list-fix-0-kk.txt + mv input-list-fix-0-kk.txt input-list-fix-kk.txt python3 tools/regression-tests/run_tests.py \ --lmp-bin=build/lmp \ diff --git a/tools/regression-tests/get_kokkos_input.py b/tools/regression-tests/get_kokkos_input.py index c03a813456..ca4108a9ec 100644 --- a/tools/regression-tests/get_kokkos_input.py +++ b/tools/regression-tests/get_kokkos_input.py @@ -55,20 +55,35 @@ if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level") parser.add_argument("--filter-out", dest="filter_out", default="", help="Filter out input scripts that contain strings") + parser.add_argument("--batch-size", dest="batch_size", default=50, help="Batch size of scripts per input list") args = parser.parse_args() example_toplevel = args.example_toplevel filter_out = args.filter_out.split(";") - + batch_size = int(args.batch_size) + # print the list of the input scripts that has each feature to a separate file features = [ 'pair', 'fix', 'compute' ] for feature in features: input_list = [] generate_list(feature, example_toplevel, filter_out, input_list) - with open(f"input-list-{feature}-kk.txt", "w") as f: - for input in input_list: - if input != "": - f.write(f"{input}\n") + + num_batches = int((len(input_list) + batch_size - 1) / batch_size) + if num_batches < 2: + with open(f"input-list-{feature}-kk.txt", "w") as f: + for input in input_list: + if input != "": + f.write(f"{input}\n") + else: + for idx in range(num_batches): + with open(f"input-list-{feature}-{idx}-kk.txt", "w") as f: + start = idx * batch_size + for i in range(batch_size): + if start + i < len(input_list): + input = input_list[start + i] + if input != "": + f.write(f"{input}\n") + # combine the list of the input scripts that have these feature to a single file input-list-misc-kk.txt features = [ 'angle', 'bond', 'dihedral', 'improper', 'min' ] From 67d1c7d17d1dc5833edd487576776358f3c29fae Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 26 Oct 2024 09:01:39 -0500 Subject: [PATCH 216/294] set OMP_PROC_BIND=false and adjust timeout --- .github/workflows/kokkos-regression.yaml | 4 ++-- tools/regression-tests/config_kokkos_openmp.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index 5cde7d67f2..0f5f5f67c0 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -95,9 +95,9 @@ jobs: python3 tools/regression-tests/get_kokkos_input.py \ --examples-top-level=examples --batch-size=100 \ --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap" - cat input-list-fix-1-kk.txt >> input-list-fix-0-kk.txt - mv input-list-fix-0-kk.txt input-list-fix-kk.txt + cat input-list-fix-*-kk.txt > input-list-fix-kk.txt + export OMP_PROC_BIND=false python3 tools/regression-tests/run_tests.py \ --lmp-bin=build/lmp \ --config-file=tools/regression-tests/config_kokkos_openmp.yaml \ diff --git a/tools/regression-tests/config_kokkos_openmp.yaml b/tools/regression-tests/config_kokkos_openmp.yaml index 1979d54b6a..217888c762 100644 --- a/tools/regression-tests/config_kokkos_openmp.yaml +++ b/tools/regression-tests/config_kokkos_openmp.yaml @@ -21,6 +21,6 @@ abs: 1e-3 rel: 1e-7 - timeout: 120 + timeout: 180 nugget: 1.0 epsilon: 1e-16 From dd0dfd3c7fee003fb950f273b97b2eb73c9a0a95 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 27 Oct 2024 00:00:10 -0500 Subject: [PATCH 217/294] split randomly the pair input list by batch size --- .github/workflows/kokkos-regression.yaml | 5 +++-- tools/regression-tests/get_kokkos_input.py | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index 0f5f5f67c0..49a610fb31 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -83,6 +83,7 @@ jobs: -D PKG_REAXFF=on \ -D PKG_REPLICA=on \ -D PKG_SRD=on \ + -D PKG_SPH=on \ -D PKG_VORONOI=on \ -G Ninja cmake --build build @@ -93,8 +94,8 @@ jobs: run: | source linuxenv/bin/activate python3 tools/regression-tests/get_kokkos_input.py \ - --examples-top-level=examples --batch-size=100 \ - --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap" + --examples-top-level=examples --batch-size=50 \ + --filter-out="balance;fire;gcmc;granregion;hyper;mc;mdi;mliap;neb;pace;prd;pour;python;rigid;snap;streitz;shear;ttm" cat input-list-fix-*-kk.txt > input-list-fix-kk.txt export OMP_PROC_BIND=false diff --git a/tools/regression-tests/get_kokkos_input.py b/tools/regression-tests/get_kokkos_input.py index ca4108a9ec..f94ca42c8e 100644 --- a/tools/regression-tests/get_kokkos_input.py +++ b/tools/regression-tests/get_kokkos_input.py @@ -7,6 +7,7 @@ # These 4 files will be read in by the regression tester run_tests.py from argparse import ArgumentParser +import random import subprocess import sys @@ -68,7 +69,7 @@ if __name__ == "__main__": input_list = [] generate_list(feature, example_toplevel, filter_out, input_list) - num_batches = int((len(input_list) + batch_size - 1) / batch_size) + num_batches = int(len(input_list) / batch_size) if num_batches < 2: with open(f"input-list-{feature}-kk.txt", "w") as f: for input in input_list: @@ -77,13 +78,15 @@ if __name__ == "__main__": else: for idx in range(num_batches): with open(f"input-list-{feature}-{idx}-kk.txt", "w") as f: - start = idx * batch_size - for i in range(batch_size): - if start + i < len(input_list): - input = input_list[start + i] - if input != "": - f.write(f"{input}\n") - + if len(input_list) > batch_size: + sampled = random.sample(input_list, batch_size) + else: + sampled = input_list + for input in sampled: + if input != "": + if input in input_list: + input_list.remove(input) + f.write(f"{input}\n") # combine the list of the input scripts that have these feature to a single file input-list-misc-kk.txt features = [ 'angle', 'bond', 'dihedral', 'improper', 'min' ] From b946e998b5daf6b4ccf87b224f492ed4f8aa107d Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 27 Oct 2024 12:13:29 -0500 Subject: [PATCH 218/294] also split fix into 2 lists, using 6 jobs for the matrix strategy --- .github/workflows/kokkos-regression.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index 49a610fb31..bcb3899ad2 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -17,9 +17,9 @@ jobs: env: CCACHE_DIR: ${{ github.workspace }}/.ccache strategy: - max-parallel: 5 + max-parallel: 6 matrix: - idx: [ 'pair-0', 'pair-1', 'fix', 'compute', 'misc' ] + idx: [ 'pair-0', 'pair-1', 'fix-0', 'fix-1', 'compute', 'misc' ] steps: - name: Checkout repository @@ -96,7 +96,6 @@ jobs: python3 tools/regression-tests/get_kokkos_input.py \ --examples-top-level=examples --batch-size=50 \ --filter-out="balance;fire;gcmc;granregion;hyper;mc;mdi;mliap;neb;pace;prd;pour;python;rigid;snap;streitz;shear;ttm" - cat input-list-fix-*-kk.txt > input-list-fix-kk.txt export OMP_PROC_BIND=false python3 tools/regression-tests/run_tests.py \ From c00700dec75b7f91ceeacfcca86357789aae3a10 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 28 Oct 2024 15:30:14 -0400 Subject: [PATCH 219/294] cuda sync --- src/KOKKOS/fix_cmap_kokkos.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 74322115ff..b59dabe34b 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -663,6 +663,14 @@ int FixCMAPKokkos::pack_exchange_kokkos( k_copylist.template sync(); k_exchange_sendlist.template sync(); + k_num_crossterm.template sync(); + k_crossterm_type.template sync(); + k_crossterm_atom1.template sync(); + k_crossterm_atom2.template sync(); + k_crossterm_atom3.template sync(); + k_crossterm_atom4.template sync(); + k_crossterm_atom5.template sync(); + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); @@ -700,6 +708,9 @@ int FixCMAPKokkos::pack_exchange_kokkos( const int k = d_copylist(mysend); if (k > -1) { + + Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k); + l_num_crossterm(i) = l_num_crossterm(k); for (int m = 0; m < l_num_crossterm(k); m++) { l_crossterm_type(i,m) = l_crossterm_type(k,m); @@ -708,7 +719,6 @@ int FixCMAPKokkos::pack_exchange_kokkos( l_crossterm_atom3(i,m) = l_crossterm_atom3(k,m); l_crossterm_atom4(i,m) = l_crossterm_atom4(k,m); l_crossterm_atom5(i,m) = l_crossterm_atom5(k,m); - Kokkos::printf(" *** ok 2 ... i %i k %i m %i\n", i, k, m); } } } From e4a9b0632058f6e1b3d12984a351ff9f96f351b1 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 28 Oct 2024 17:47:57 -0400 Subject: [PATCH 220/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 35 +++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index b59dabe34b..9f3fcf5a1a 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -46,12 +46,9 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : FixCMAP(lmp, narg, arg) { kokkosable = 1; - exchange_comm_device = sort_device = 1; - atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK; datamask_modify = F_MASK; @@ -658,7 +655,6 @@ int FixCMAPKokkos::pack_exchange_kokkos( DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - k_buf.template sync(); k_copylist.template sync(); k_exchange_sendlist.template sync(); @@ -748,8 +744,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( template void FixCMAPKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, - int /*nrecv1*/, int /*nextrarecv1*/, - ExecutionSpace /*space*/) + int nrecv1, int nextrarecv1, ExecutionSpace /*space*/) { k_buf.template sync(); k_indices.template sync(); @@ -758,7 +753,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); - auto d_indices = k_indices.view(); + auto d_indices = k_indices.template view(); //this->nrecv1 = nrecv1; //this->nextrarecv1 = nextrarecv1; @@ -783,14 +778,24 @@ void FixCMAPKokkos::unpack_exchange_kokkos( Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); - l_num_crossterm(index) = static_cast (d_buf(i)); - for (int m = 0; m < l_num_crossterm(index); m++) { - l_crossterm_type(index,m) = static_cast(d_buf(i*m+1)); - l_crossterm_atom1(index,m) = static_cast (d_buf(i*m+2)); - l_crossterm_atom2(index,m) = static_cast (d_buf(i*m+3)); - l_crossterm_atom3(index,m) = static_cast (d_buf(i*m+4)); - l_crossterm_atom4(index,m) = static_cast (d_buf(i*m+5)); - l_crossterm_atom5(index,m) = static_cast (d_buf(i*m+6)); + + if (index > -1) { + + // int m = d_buf[i]; + // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; + + l_num_crossterm(index) = static_cast (d_buf(i)); + for (int m = 0; m < l_num_crossterm(index); m++) { + + Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i m %i l_num_crossterm(index) %i\n", nrecv, nrecv1, nextrarecv1, i, index, m, l_num_crossterm(index)); + + l_crossterm_type(index,m) = static_cast(d_buf(i+1)); + l_crossterm_atom1(index,m) = static_cast (d_buf(i+2)); + l_crossterm_atom2(index,m) = static_cast (d_buf(i+3)); + l_crossterm_atom3(index,m) = static_cast (d_buf(i+4)); + l_crossterm_atom4(index,m) = static_cast (d_buf(i+5)); + l_crossterm_atom5(index,m) = static_cast (d_buf(i+6)); + } } }); From 7ffe6aefce6e530e1e280e826a5cbfaf56e5b86e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 28 Oct 2024 18:37:21 -0400 Subject: [PATCH 221/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 9f3fcf5a1a..b0fa1926d3 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -705,7 +705,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( const int k = d_copylist(mysend); if (k > -1) { - Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k); + // Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k); l_num_crossterm(i) = l_num_crossterm(k); for (int m = 0; m < l_num_crossterm(k); m++) { @@ -720,6 +720,8 @@ int FixCMAPKokkos::pack_exchange_kokkos( } },n); + Kokkos::printf(" *** ok 3 ... n %i \n", n); + copymode = 0; k_buf.modify(); @@ -776,7 +778,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; - Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { + Kokkos::parallel_for(nextrarecv1, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); if (index > -1) { @@ -789,7 +791,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i m %i l_num_crossterm(index) %i\n", nrecv, nrecv1, nextrarecv1, i, index, m, l_num_crossterm(index)); - l_crossterm_type(index,m) = static_cast(d_buf(i+1)); + l_crossterm_type(index,m) = static_cast (d_buf(i+1)); l_crossterm_atom1(index,m) = static_cast (d_buf(i+2)); l_crossterm_atom2(index,m) = static_cast (d_buf(i+3)); l_crossterm_atom3(index,m) = static_cast (d_buf(i+4)); From 97c6b063a7ea1f7fa9744f29dcaeb4e9bc4fd5b4 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 28 Oct 2024 18:41:04 -0400 Subject: [PATCH 222/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index b0fa1926d3..786934c217 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -699,7 +699,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( d_buf(j++) = static_cast (l_crossterm_atom3(i,m)); d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); - Kokkos::printf(" *** ok 1 ... i %i j %i m %i\n", i, j, m); + Kokkos::printf(" *** ok 1 ... i %i j %i l_num_crossterm(i) %i m %i\n", i, j, l_num_crossterm(i), m); } const int k = d_copylist(mysend); From a6ad1a837c7a16e3341265925628fb69679e77b5 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 28 Oct 2024 18:55:59 -0400 Subject: [PATCH 223/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 786934c217..a63718aaa3 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -699,7 +699,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( d_buf(j++) = static_cast (l_crossterm_atom3(i,m)); d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); - Kokkos::printf(" *** ok 1 ... i %i j %i l_num_crossterm(i) %i m %i\n", i, j, l_num_crossterm(i), m); + Kokkos::printf(" *** ok 1 ... i %i j %i l_num_crossterm(i) %i m %i d_buf[] %f %f %f %f %f %f %f\n", i, j, l_num_crossterm(i), m, d_buf(j-7), d_buf(j-6), d_buf(j-5), d_buf(j-4), d_buf(j-3), d_buf(j-2), d_buf(j-1)); } const int k = d_copylist(mysend); @@ -787,9 +787,11 @@ void FixCMAPKokkos::unpack_exchange_kokkos( // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; l_num_crossterm(index) = static_cast (d_buf(i)); + + Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i l_num_crossterm(index) %i d_buf[] %f %f %f %f %f %f %f\n", nrecv, nrecv1, nextrarecv1, i, index, l_num_crossterm(index), d_buf(i), d_buf(i+1), d_buf(i+2), d_buf(i+3), d_buf(i+4), d_buf(i+5), d_buf(i+6)); + for (int m = 0; m < l_num_crossterm(index); m++) { - Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i m %i l_num_crossterm(index) %i\n", nrecv, nrecv1, nextrarecv1, i, index, m, l_num_crossterm(index)); l_crossterm_type(index,m) = static_cast (d_buf(i+1)); l_crossterm_atom1(index,m) = static_cast (d_buf(i+2)); From 586f2b626d6184c7bb8745d6b07707cd11ac2b9d Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 11:13:36 -0400 Subject: [PATCH 224/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index a63718aaa3..e811e23bf5 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -691,7 +691,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( if (!final) offset += l_num_crossterm(i); else { int j = nsend + offset; - d_buf(j) = static_cast (l_num_crossterm(i)); + d_buf(j++) = static_cast (l_num_crossterm(i)); for (int m = 0; m < l_num_crossterm(i); m++) { d_buf(j++) = static_cast (l_crossterm_type(i,m)); d_buf(j++) = static_cast (l_crossterm_atom1(i,m)); @@ -757,9 +757,6 @@ void FixCMAPKokkos::unpack_exchange_kokkos( auto d_indices = k_indices.template view(); - //this->nrecv1 = nrecv1; - //this->nextrarecv1 = nextrarecv1; - k_num_crossterm.template sync(); k_crossterm_type.template sync(); k_crossterm_atom1.template sync(); @@ -786,19 +783,18 @@ void FixCMAPKokkos::unpack_exchange_kokkos( // int m = d_buf[i]; // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; - l_num_crossterm(index) = static_cast (d_buf(i)); + int j = nrecv + i; + l_num_crossterm(index) = static_cast (d_buf(j)); - Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i l_num_crossterm(index) %i d_buf[] %f %f %f %f %f %f %f\n", nrecv, nrecv1, nextrarecv1, i, index, l_num_crossterm(index), d_buf(i), d_buf(i+1), d_buf(i+2), d_buf(i+3), d_buf(i+4), d_buf(i+5), d_buf(i+6)); + Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i j %i index %i l_num_crossterm(index) %i d_buf[] %f %f %f %f %f %f %f\n", nrecv, nrecv1, nextrarecv1, i, j, index, l_num_crossterm(index), d_buf(j), d_buf(j+1), d_buf(j+2), d_buf(j+3), d_buf(j+4), d_buf(j+5), d_buf(j+6)); for (int m = 0; m < l_num_crossterm(index); m++) { - - - l_crossterm_type(index,m) = static_cast (d_buf(i+1)); - l_crossterm_atom1(index,m) = static_cast (d_buf(i+2)); - l_crossterm_atom2(index,m) = static_cast (d_buf(i+3)); - l_crossterm_atom3(index,m) = static_cast (d_buf(i+4)); - l_crossterm_atom4(index,m) = static_cast (d_buf(i+5)); - l_crossterm_atom5(index,m) = static_cast (d_buf(i+6)); + l_crossterm_type(index,m) = static_cast (d_buf(j+1)); + l_crossterm_atom1(index,m) = static_cast (d_buf(j+2)); + l_crossterm_atom2(index,m) = static_cast (d_buf(j+3)); + l_crossterm_atom3(index,m) = static_cast (d_buf(j+4)); + l_crossterm_atom4(index,m) = static_cast (d_buf(j+5)); + l_crossterm_atom5(index,m) = static_cast (d_buf(j+6)); } } }); From 4941cf25c0160f0dce83e06a8e1290ce8e80301e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 12:59:09 -0400 Subject: [PATCH 225/294] typo --- src/KOKKOS/kokkos_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index ef50e739d9..a81e2ed0cf 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -166,7 +166,7 @@ template using KKScatterView = Kokkos::Experimental::ScatterView; -// set ExecutionSpace stuct with variable "space" +// set ExecutionSpace struct with variable "space" template struct ExecutionSpaceFromDevice; From 452ae4621d26454ac8000521c3405c9be167489c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 13:29:48 -0400 Subject: [PATCH 226/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 79 ++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index e811e23bf5..55f4d35983 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -690,33 +690,38 @@ int FixCMAPKokkos::pack_exchange_kokkos( if (!final) offset += l_num_crossterm(i); else { - int j = nsend + offset; - d_buf(j++) = static_cast (l_num_crossterm(i)); - for (int m = 0; m < l_num_crossterm(i); m++) { - d_buf(j++) = static_cast (l_crossterm_type(i,m)); - d_buf(j++) = static_cast (l_crossterm_atom1(i,m)); - d_buf(j++) = static_cast (l_crossterm_atom2(i,m)); - d_buf(j++) = static_cast (l_crossterm_atom3(i,m)); - d_buf(j++) = static_cast (l_crossterm_atom4(i,m)); - d_buf(j++) = static_cast (l_crossterm_atom5(i,m)); - Kokkos::printf(" *** ok 1 ... i %i j %i l_num_crossterm(i) %i m %i d_buf[] %f %f %f %f %f %f %f\n", i, j, l_num_crossterm(i), m, d_buf(j-7), d_buf(j-6), d_buf(j-5), d_buf(j-4), d_buf(j-3), d_buf(j-2), d_buf(j-1)); + int m = nsend + offset; + d_buf(mysend) = m; + d_buf(m++) = static_cast (l_num_crossterm(i)); + for (int k = 0; k < l_num_crossterm(i); k++) { + d_buf(m++) = static_cast (l_crossterm_type(i,k)); + d_buf(m++) = static_cast (l_crossterm_atom1(i,k)); + d_buf(m++) = static_cast (l_crossterm_atom2(i,k)); + d_buf(m++) = static_cast (l_crossterm_atom3(i,k)); + d_buf(m++) = static_cast (l_crossterm_atom4(i,k)); + d_buf(m++) = static_cast (l_crossterm_atom5(i,k)); + Kokkos::printf(" *** ok 1 ... i %i m %i l_num_crossterm(i) %i k %i d_buf[] %f %f %f %f %f %f %f\n", i, m, l_num_crossterm(i), k, d_buf(m-7), d_buf(m-6), d_buf(m-5), d_buf(m-4), d_buf(m-3), d_buf(m-2), d_buf(m-1)); } - const int k = d_copylist(mysend); - if (k > -1) { + const int j = d_copylist(mysend); + if (j > -1) { // Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k); - l_num_crossterm(i) = l_num_crossterm(k); - for (int m = 0; m < l_num_crossterm(k); m++) { - l_crossterm_type(i,m) = l_crossterm_type(k,m); - l_crossterm_atom1(i,m) = l_crossterm_atom1(k,m); - l_crossterm_atom2(i,m) = l_crossterm_atom2(k,m); - l_crossterm_atom3(i,m) = l_crossterm_atom3(k,m); - l_crossterm_atom4(i,m) = l_crossterm_atom4(k,m); - l_crossterm_atom5(i,m) = l_crossterm_atom5(k,m); + l_num_crossterm(i) = l_num_crossterm(j); + for (int k = 0; k < l_num_crossterm(i); k++) { + l_crossterm_type(i,k) = l_crossterm_type(j,k); + l_crossterm_atom1(i,k) = l_crossterm_atom1(j,k); + l_crossterm_atom2(i,k) = l_crossterm_atom2(j,k); + l_crossterm_atom3(i,k) = l_crossterm_atom3(j,k); + l_crossterm_atom4(i,k) = l_crossterm_atom4(j,k); + l_crossterm_atom5(i,k) = l_crossterm_atom5(j,k); } } + + for( int k=0 ; k::pack_exchange_kokkos( copymode = 0; - k_buf.modify(); - if (space == Host) k_buf.sync(); - else k_buf.sync(); + k_buf.template modify(); + if (space == Host) k_buf.template sync(); + else k_buf.template sync(); k_num_crossterm.template modify(); k_crossterm_type.template modify(); @@ -775,26 +780,28 @@ void FixCMAPKokkos::unpack_exchange_kokkos( auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; - Kokkos::parallel_for(nextrarecv1, KOKKOS_LAMBDA(const int &i) { + Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); if (index > -1) { - // int m = d_buf[i]; - // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; + int m = d_buf(i); + // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; - int j = nrecv + i; - l_num_crossterm(index) = static_cast (d_buf(j)); + for( int k=0 ; k (d_buf(m++)); - for (int m = 0; m < l_num_crossterm(index); m++) { - l_crossterm_type(index,m) = static_cast (d_buf(j+1)); - l_crossterm_atom1(index,m) = static_cast (d_buf(j+2)); - l_crossterm_atom2(index,m) = static_cast (d_buf(j+3)); - l_crossterm_atom3(index,m) = static_cast (d_buf(j+4)); - l_crossterm_atom4(index,m) = static_cast (d_buf(j+5)); - l_crossterm_atom5(index,m) = static_cast (d_buf(j+6)); + Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i m %i index %i l_num_crossterm(index) %i \n", nrecv, nrecv1, nextrarecv1, i, m, index, l_num_crossterm(index)); + + for (int k = 0; k < l_num_crossterm(index); k++) { + l_crossterm_type(index,k) = static_cast (d_buf(m++)); + l_crossterm_atom1(index,k) = static_cast (d_buf(m++)); + l_crossterm_atom2(index,k) = static_cast (d_buf(m++)); + l_crossterm_atom3(index,k) = static_cast (d_buf(m++)); + l_crossterm_atom4(index,k) = static_cast (d_buf(m++)); + l_crossterm_atom5(index,k) = static_cast (d_buf(m++)); } } }); From c17e2cd30d1a4394c63746500c88f9652e1a2f71 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 13:58:13 -0400 Subject: [PATCH 227/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 55f4d35983..5a11bcfad2 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -719,8 +719,8 @@ int FixCMAPKokkos::pack_exchange_kokkos( } } - for( int k=0 ; k::unpack_exchange_kokkos( int m = d_buf(i); // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; - for( int k=0 ; k (d_buf(m++)); Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i m %i index %i l_num_crossterm(index) %i \n", nrecv, nrecv1, nextrarecv1, i, m, index, l_num_crossterm(index)); + for( int k=0 ; k<100 ; k++ ) + Kokkos::printf(" *** unpack_exchange_kokkos() ... d_buf(%i) %f\n", k, d_buf(k)); + + for (int k = 0; k < l_num_crossterm(index); k++) { l_crossterm_type(index,k) = static_cast (d_buf(m++)); l_crossterm_atom1(index,k) = static_cast (d_buf(m++)); From ae4cb004022ee079837c62a008c6f8929feb708e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 14:09:11 -0400 Subject: [PATCH 228/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 5a11bcfad2..ec49caf077 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -783,6 +783,11 @@ void FixCMAPKokkos::unpack_exchange_kokkos( Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); + Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i\n", nrecv, nrecv1, nextrarecv1, i, index); + + for( int k=0 ; k<100 ; k++ ) + Kokkos::printf(" *** unpack_exchange_kokkos() ... i %i d_buf(%i) %f\n", i, k, d_buf(k)); + if (index > -1) { int m = d_buf(i); @@ -790,10 +795,8 @@ void FixCMAPKokkos::unpack_exchange_kokkos( l_num_crossterm(index) = static_cast (d_buf(m++)); - Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i m %i index %i l_num_crossterm(index) %i \n", nrecv, nrecv1, nextrarecv1, i, m, index, l_num_crossterm(index)); + Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i \n", m, index, l_num_crossterm(index)); - for( int k=0 ; k<100 ; k++ ) - Kokkos::printf(" *** unpack_exchange_kokkos() ... d_buf(%i) %f\n", k, d_buf(k)); for (int k = 0; k < l_num_crossterm(index); k++) { From 08e460ece66d69dd662124768aa175365655b409 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 14:09:48 -0400 Subject: [PATCH 229/294] Update fix_cmap_kokkos.cpp --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index ec49caf077..ed0091bcde 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -795,7 +795,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( l_num_crossterm(index) = static_cast (d_buf(m++)); - Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i \n", m, index, l_num_crossterm(index)); + Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i \n", m, l_num_crossterm(index)); From 0e1137aee72d39f61ddad1bc7d5309ea293a0121 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Tue, 29 Oct 2024 18:34:30 -0400 Subject: [PATCH 230/294] add inertia() --- src/KOKKOS/group_kokkos.cpp | 106 +++++++++++++++++++++++++++++------- src/KOKKOS/group_kokkos.h | 1 + 2 files changed, 87 insertions(+), 20 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index 6a64dd79ca..e1ed73f869 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -44,26 +44,20 @@ template double GroupKokkos::mass(int igroup) { int groupbit = bitmask[igroup]; - auto d_mass = atomKK->k_mass.template view(); auto d_rmass = atomKK->k_rmass.template view(); auto d_mask = atomKK->k_mask.template view(); auto d_type = atomKK->k_type.template view(); - double one = 0.0; if (atomKK->rmass) { - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_rmass(i); }, one); - } else { - Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); }, one); - } double all; @@ -88,7 +82,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) auto l_prd = Few(domain->prd); auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; - double cmone[3]; + double cmone[3] = {0.0, 0.0, 0.0}; if (atomKK->rmass) { @@ -150,9 +144,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) auto d_v = atomKK->k_v.template view(); auto d_mask = atomKK->k_mask.template view(); auto d_image = atomKK->k_image.template view(); - - double p[3], massone; - p[0] = p[1] = p[2] = 0.0; + double p[3] = {0.0, 0.0, 0.0}; if (atomKK->rmass) { @@ -191,8 +183,6 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) } } - - /* ---------------------------------------------------------------------- compute the angular momentum L (lmom) of group around center-of-mass cm @@ -200,7 +190,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) ------------------------------------------------------------------------- */ template -void GroupKokkos::angmom(int igroup, double *cm, double *lmom) +void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) { int groupbit = bitmask[igroup]; auto d_x = atomKK->k_x.template view(); @@ -210,7 +200,6 @@ void GroupKokkos::angmom(int igroup, double *cm, double *lmom) auto l_prd = Few(domain->prd); auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; - double p[3] = {0.0, 0.0, 0.0}; if (atomKK->rmass) { @@ -225,9 +214,9 @@ void GroupKokkos::angmom(int igroup, double *cm, double *lmom) x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - cm[0]; - double dy = unwrapKK[1] - cm[1]; - double dz = unwrapKK[2] - cm[2]; + double dx = unwrapKK[0] - xcm[0]; + double dy = unwrapKK[1] - xcm[1]; + double dz = unwrapKK[2] - xcm[2]; l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); @@ -247,9 +236,9 @@ void GroupKokkos::angmom(int igroup, double *cm, double *lmom) x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - cm[0]; - double dy = unwrapKK[1] - cm[1]; - double dz = unwrapKK[2] - cm[2]; + double dx = unwrapKK[0] - xcm[0]; + double dy = unwrapKK[1] - xcm[1]; + double dz = unwrapKK[2] - xcm[2]; l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); @@ -260,6 +249,83 @@ void GroupKokkos::angmom(int igroup, double *cm, double *lmom) MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world); } +/* ---------------------------------------------------------------------- + compute moment of inertia tensor around center-of-mass xcm of group + must unwrap atoms to compute itensor correctly +------------------------------------------------------------------------- */ + +template +void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3][3]) +{ + int groupbit = bitmask[igroup]; + auto d_x = atomKK->k_x.template view(); + auto d_mask = atomKK->k_mask.template view(); + auto d_image = atomKK->k_image.template view(); + auto l_prd = Few(domain->prd); + auto l_h = Few(domain->h); + auto l_triclinic = domain->triclinic; + + double ione[3][3]; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) ione[i][j] = 0.0; + + if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_rmass(i); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - xcm[0]; + double dy = unwrapKK[1] - xcm[1]; + double dz = unwrapKK[2] - xcm[2]; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { + if (d_mask(i) & groupbit) { + double massone = d_mass(d_type(i)); + Few x_i; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); + double dx = unwrapKK[0] - xcm[0]; + double dy = unwrapKK[1] - xcm[1]; + double dz = unwrapKK[2] - xcm[2]; + l_i00 += massone * (dy * dy + dz * dz); + l_i11 += massone * (dx * dx + dz * dz); + l_i22 += massone * (dx * dx + dy * dy); + l_i01 -= massone * dx * dy; + l_i12 -= massone * dy * dz; + l_i02 -= massone * dx * dz; + } + }, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]); + + } + + ione[1][0] = ione[0][1]; + ione[2][1] = ione[1][2]; + ione[2][0] = ione[0][2]; + MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world); +} + namespace LAMMPS_NS { template class GroupKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h index a51e339b50..a38187db10 100644 --- a/src/KOKKOS/group_kokkos.h +++ b/src/KOKKOS/group_kokkos.h @@ -27,6 +27,7 @@ class GroupKokkos : public Group { void xcm(int, double, double *); // center-of-mass coords of group void vcm(int, double, double *); // center-of-mass velocity of group void angmom(int, double *, double *); // angular momentum of group + void inertia(int, double *, double[3][3]); // inertia tensor }; } // namespace LAMMPS_NS From acebce0431ba90b50f07254fb1e399b977ec393a Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 29 Oct 2024 23:07:35 -0500 Subject: [PATCH 231/294] turn off verbose, reduce timeout --- .github/workflows/kokkos-regression.yaml | 2 +- tools/regression-tests/config_kokkos_openmp.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index bcb3899ad2..b0cb75041c 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -105,7 +105,7 @@ jobs: --output-file=output-${{ matrix.idx }}.xml \ --progress-file=progress-${{ matrix.idx }}.yaml \ --log-file=run-${{ matrix.idx }}.log \ - --quick-max=100 --verbose + --quick-max=100 tar -cvf kokkos-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml diff --git a/tools/regression-tests/config_kokkos_openmp.yaml b/tools/regression-tests/config_kokkos_openmp.yaml index 217888c762..1979d54b6a 100644 --- a/tools/regression-tests/config_kokkos_openmp.yaml +++ b/tools/regression-tests/config_kokkos_openmp.yaml @@ -21,6 +21,6 @@ abs: 1e-3 rel: 1e-7 - timeout: 180 + timeout: 120 nugget: 1.0 epsilon: 1e-16 From 50dd95e6e1e8e3caa457d42ebe1c8e3e3f579853 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 30 Oct 2024 11:32:24 -0400 Subject: [PATCH 232/294] move d_ubuf to kokkos_type.h https://matsci.org/t/a-few-kokkos-development-questions/56598/8 (Q11) "More correct would be to use a union with ubuf as is done in the atom_vec styles, so that no precision is lost for converting a huge 64-bit integer to a double." --- src/KOKKOS/atom_vec_kokkos.h | 11 ----------- src/KOKKOS/kokkos_type.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7030f706b8..d4dd68ce18 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -24,17 +24,6 @@ namespace LAMMPS_NS { -union d_ubuf { - double d; - int64_t i; - KOKKOS_INLINE_FUNCTION - d_ubuf(double arg) : d(arg) {} - KOKKOS_INLINE_FUNCTION - d_ubuf(int64_t arg) : i(arg) {} - KOKKOS_INLINE_FUNCTION - d_ubuf(int arg) : i(arg) {} -}; - class AtomVecKokkos : virtual public AtomVec { public: AtomVecKokkos(class LAMMPS *); diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index a81e2ed0cf..fd304efd1c 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -41,6 +41,16 @@ constexpr int HALF = 4; #define MAX_TYPES_STACKPARAMS 12 static constexpr LAMMPS_NS::bigint LMP_KOKKOS_AV_DELTA = 10; +namespace LAMMPS_NS { + union d_ubuf { + double d; + int64_t i; + KOKKOS_INLINE_FUNCTION d_ubuf(double arg) : d(arg) {} + KOKKOS_INLINE_FUNCTION d_ubuf(int64_t arg) : i(arg) {} + KOKKOS_INLINE_FUNCTION d_ubuf(int arg) : i(arg) {} + }; +} + namespace Kokkos { static auto NoInit = [](std::string const& label) { return Kokkos::view_alloc(Kokkos::WithoutInitializing, label); From 5efd8ba11ec0ec2f6dbdb050cbb26554aec1ddac Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 30 Oct 2024 11:27:15 -0500 Subject: [PATCH 233/294] run the Kokkos regression workflow after merge, not every push --- .github/workflows/kokkos-regression.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml index b0cb75041c..a475c1eb7b 100644 --- a/.github/workflows/kokkos-regression.yaml +++ b/.github/workflows/kokkos-regression.yaml @@ -2,7 +2,7 @@ name: "Kokkos OpenMP Regression Test" on: - pull_request: + push: branches: - develop From 41672f6ac9eb1f1b92e80132ddd6e76e7d0d0e8c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 30 Oct 2024 20:44:25 -0400 Subject: [PATCH 234/294] this might work on cuda --- src/KOKKOS/fix_cmap_kokkos.cpp | 91 ++++++++++++++++++---------------- src/KOKKOS/fix_cmap_kokkos.h | 3 ++ 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index ed0091bcde..63accd7827 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -111,6 +111,10 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : k_d1cmapgrid.template sync(); k_d2cmapgrid.template sync(); k_d12cmapgrid.template sync(); + + d_count = typename AT::t_int_scalar("fix_cmap:count"); + h_count = Kokkos::create_mirror_view(d_count); + } /* ---------------------------------------------------------------------- */ @@ -673,8 +677,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto d_copylist = k_copylist.template view(); auto d_exchange_sendlist = k_exchange_sendlist.template view(); - int n; - copymode = 1; + Kokkos::deep_copy(d_count,0); auto l_num_crossterm = d_num_crossterm; auto l_crossterm_type = d_crossterm_type; @@ -683,31 +686,42 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto l_crossterm_atom3 = d_crossterm_atom3; auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; + //auto l_nsend = nsend; + + copymode = 1; Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) { const int i = d_exchange_sendlist(mysend); - if (!final) offset += l_num_crossterm(i); + if (!final) offset += (1+l_num_crossterm(i)*6); else { + int m = nsend + offset; - d_buf(mysend) = m; - d_buf(m++) = static_cast (l_num_crossterm(i)); - for (int k = 0; k < l_num_crossterm(i); k++) { - d_buf(m++) = static_cast (l_crossterm_type(i,k)); - d_buf(m++) = static_cast (l_crossterm_atom1(i,k)); - d_buf(m++) = static_cast (l_crossterm_atom2(i,k)); - d_buf(m++) = static_cast (l_crossterm_atom3(i,k)); - d_buf(m++) = static_cast (l_crossterm_atom4(i,k)); - d_buf(m++) = static_cast (l_crossterm_atom5(i,k)); - Kokkos::printf(" *** ok 1 ... i %i m %i l_num_crossterm(i) %i k %i d_buf[] %f %f %f %f %f %f %f\n", i, m, l_num_crossterm(i), k, d_buf(m-7), d_buf(m-6), d_buf(m-5), d_buf(m-4), d_buf(m-3), d_buf(m-2), d_buf(m-1)); + d_buf(mysend) = d_ubuf(m).d; + d_buf(m++) = d_ubuf(l_num_crossterm(i)).d; + + if( l_num_crossterm(i) > 0 ) { + + for (int k = 0; k < l_num_crossterm(i); k++) { + + d_buf(m++) = d_ubuf(l_crossterm_type(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom1(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom2(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom3(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom4(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom5(i,k)).d; + + Kokkos::printf(" *** pack_exchange_kokkos() ... mysend %i i %i l_nsend %i offset %i m %i l_num_crossterm(i) %i l_crossterm_type(i,k) %i k %i d_buf[] %i %i %i %i %i %i %i\n", mysend, i, nsend, offset, m, l_num_crossterm(i), l_crossterm_type(i,k), k, d_ubuf(d_buf(m-7)).i, d_ubuf(d_buf(m-6)).i, d_ubuf(d_buf(m-5)).i, d_ubuf(d_buf(m-4)).i, d_ubuf(d_buf(m-3)).i, d_ubuf(d_buf(m-2)).i, d_ubuf(d_buf(m-1)).i); + + } } + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; + const int j = d_copylist(mysend); if (j > -1) { - - // Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k); - l_num_crossterm(i) = l_num_crossterm(j); for (int k = 0; k < l_num_crossterm(i); k++) { l_crossterm_type(i,k) = l_crossterm_type(j,k); @@ -718,14 +732,8 @@ int FixCMAPKokkos::pack_exchange_kokkos( l_crossterm_atom5(i,k) = l_crossterm_atom5(j,k); } } - - for( int k=0 ; k<100 ; k++ ) - Kokkos::printf(" *** pack_exchange_kokkos() ... mysend %i d_buf(%i) %f\n", mysend, k, d_buf(k)); - } - },n); - - Kokkos::printf(" *** ok 3 ... n %i \n", n); + }); copymode = 0; @@ -741,7 +749,8 @@ int FixCMAPKokkos::pack_exchange_kokkos( k_crossterm_atom4.template modify(); k_crossterm_atom5.template modify(); - return n; + Kokkos::deep_copy(h_count,d_count); + return h_count(); } /* ---------------------------------------------------------------------- @@ -770,8 +779,6 @@ void FixCMAPKokkos::unpack_exchange_kokkos( k_crossterm_atom4.template sync(); k_crossterm_atom5.template sync(); - copymode = 1; - auto l_num_crossterm = d_num_crossterm; auto l_crossterm_type = d_crossterm_type; auto l_crossterm_atom1 = d_crossterm_atom1; @@ -780,32 +787,28 @@ void FixCMAPKokkos::unpack_exchange_kokkos( auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; + copymode = 1; + Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); - Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i\n", nrecv, nrecv1, nextrarecv1, i, index); - - for( int k=0 ; k<100 ; k++ ) - Kokkos::printf(" *** unpack_exchange_kokkos() ... i %i d_buf(%i) %f\n", i, k, d_buf(k)); - if (index > -1) { - int m = d_buf(i); - // if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1]; - - l_num_crossterm(index) = static_cast (d_buf(m++)); - - Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i \n", m, l_num_crossterm(index)); - + int m = d_ubuf(d_buf(i)).i; + if (i >= nrecv1) m = nextrarecv1 + d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i; + l_num_crossterm(index) = static_cast (d_ubuf(d_buf(m++)).i); for (int k = 0; k < l_num_crossterm(index); k++) { - l_crossterm_type(index,k) = static_cast (d_buf(m++)); - l_crossterm_atom1(index,k) = static_cast (d_buf(m++)); - l_crossterm_atom2(index,k) = static_cast (d_buf(m++)); - l_crossterm_atom3(index,k) = static_cast (d_buf(m++)); - l_crossterm_atom4(index,k) = static_cast (d_buf(m++)); - l_crossterm_atom5(index,k) = static_cast (d_buf(m++)); + l_crossterm_type(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom1(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom2(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom3(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom4(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + l_crossterm_atom5(index,k) = static_cast (d_ubuf(d_buf(m++)).i); + + Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i l_crossterm_type %i l_crossterm_atom1-5 %i %i %i %i %i \n", m, l_num_crossterm(index), l_crossterm_type(index,k), l_crossterm_atom1(index,k), l_crossterm_atom2(index,k), l_crossterm_atom3(index,k), l_crossterm_atom4(index,k), l_crossterm_atom5(index,k)); + } } }); diff --git a/src/KOKKOS/fix_cmap_kokkos.h b/src/KOKKOS/fix_cmap_kokkos.h index efa6a78c09..745b2bcfe2 100644 --- a/src/KOKKOS/fix_cmap_kokkos.h +++ b/src/KOKKOS/fix_cmap_kokkos.h @@ -81,6 +81,9 @@ class FixCMAPKokkos : public FixCMAP, public KokkosBase { DAT::tdual_int_1d k_map_array; dual_hash_type k_map_hash; + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; + DAT::tdual_int_1d k_num_crossterm; typename AT::t_int_1d d_num_crossterm; From 84e234921ba396bbf3d1e8b6fdd94ca133524c6e Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 30 Oct 2024 21:36:18 -0400 Subject: [PATCH 235/294] fix cuda warning --- src/KOKKOS/fix_cmap_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 63accd7827..069cacde5e 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -687,6 +687,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; //auto l_nsend = nsend; + auto l_count = d_count; copymode = 1; @@ -717,7 +718,7 @@ int FixCMAPKokkos::pack_exchange_kokkos( } } - if (mysend == nsend-1) d_count() = m; + if (mysend == nsend-1) l_count() = m; offset = m - nsend; const int j = d_copylist(mysend); From 9aa9e7079a97d1caf75fbe48ae3ebda5313d91e2 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 30 Oct 2024 21:44:27 -0400 Subject: [PATCH 236/294] cleanup --- src/KOKKOS/fix_cmap_kokkos.cpp | 43 +++++++++++----------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 069cacde5e..29f6249d4c 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -686,7 +686,6 @@ int FixCMAPKokkos::pack_exchange_kokkos( auto l_crossterm_atom3 = d_crossterm_atom3; auto l_crossterm_atom4 = d_crossterm_atom4; auto l_crossterm_atom5 = d_crossterm_atom5; - //auto l_nsend = nsend; auto l_count = d_count; copymode = 1; @@ -702,20 +701,13 @@ int FixCMAPKokkos::pack_exchange_kokkos( d_buf(mysend) = d_ubuf(m).d; d_buf(m++) = d_ubuf(l_num_crossterm(i)).d; - if( l_num_crossterm(i) > 0 ) { - - for (int k = 0; k < l_num_crossterm(i); k++) { - - d_buf(m++) = d_ubuf(l_crossterm_type(i,k)).d; - d_buf(m++) = d_ubuf(l_crossterm_atom1(i,k)).d; - d_buf(m++) = d_ubuf(l_crossterm_atom2(i,k)).d; - d_buf(m++) = d_ubuf(l_crossterm_atom3(i,k)).d; - d_buf(m++) = d_ubuf(l_crossterm_atom4(i,k)).d; - d_buf(m++) = d_ubuf(l_crossterm_atom5(i,k)).d; - - Kokkos::printf(" *** pack_exchange_kokkos() ... mysend %i i %i l_nsend %i offset %i m %i l_num_crossterm(i) %i l_crossterm_type(i,k) %i k %i d_buf[] %i %i %i %i %i %i %i\n", mysend, i, nsend, offset, m, l_num_crossterm(i), l_crossterm_type(i,k), k, d_ubuf(d_buf(m-7)).i, d_ubuf(d_buf(m-6)).i, d_ubuf(d_buf(m-5)).i, d_ubuf(d_buf(m-4)).i, d_ubuf(d_buf(m-3)).i, d_ubuf(d_buf(m-2)).i, d_ubuf(d_buf(m-1)).i); - - } + for (int k = 0; k < l_num_crossterm(i); k++) { + d_buf(m++) = d_ubuf(l_crossterm_type(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom1(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom2(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom3(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom4(i,k)).d; + d_buf(m++) = d_ubuf(l_crossterm_atom5(i,k)).d; } if (mysend == nsend-1) l_count() = m; @@ -765,13 +757,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( { k_buf.template sync(); k_indices.template sync(); - - auto d_buf = typename ArrayTypes::t_xfloat_1d_um( - k_buf.template view().data(), - k_buf.extent(0)*k_buf.extent(1)); - - auto d_indices = k_indices.template view(); - + k_num_crossterm.template sync(); k_crossterm_type.template sync(); k_crossterm_atom1.template sync(); @@ -780,6 +766,12 @@ void FixCMAPKokkos::unpack_exchange_kokkos( k_crossterm_atom4.template sync(); k_crossterm_atom5.template sync(); + auto d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + + auto d_indices = k_indices.template view(); + auto l_num_crossterm = d_num_crossterm; auto l_crossterm_type = d_crossterm_type; auto l_crossterm_atom1 = d_crossterm_atom1; @@ -792,14 +784,10 @@ void FixCMAPKokkos::unpack_exchange_kokkos( Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) { int index = d_indices(i); - if (index > -1) { - int m = d_ubuf(d_buf(i)).i; if (i >= nrecv1) m = nextrarecv1 + d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i; - l_num_crossterm(index) = static_cast (d_ubuf(d_buf(m++)).i); - for (int k = 0; k < l_num_crossterm(index); k++) { l_crossterm_type(index,k) = static_cast (d_ubuf(d_buf(m++)).i); l_crossterm_atom1(index,k) = static_cast (d_ubuf(d_buf(m++)).i); @@ -807,9 +795,6 @@ void FixCMAPKokkos::unpack_exchange_kokkos( l_crossterm_atom3(index,k) = static_cast (d_ubuf(d_buf(m++)).i); l_crossterm_atom4(index,k) = static_cast (d_ubuf(d_buf(m++)).i); l_crossterm_atom5(index,k) = static_cast (d_ubuf(d_buf(m++)).i); - - Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i l_crossterm_type %i l_crossterm_atom1-5 %i %i %i %i %i \n", m, l_num_crossterm(index), l_crossterm_type(index,k), l_crossterm_atom1(index,k), l_crossterm_atom2(index,k), l_crossterm_atom3(index,k), l_crossterm_atom4(index,k), l_crossterm_atom5(index,k)); - } } }); From 22d08cbac0be3dae1e4a5e235b1af794dcdf97df Mon Sep 17 00:00:00 2001 From: alphataubio Date: Wed, 30 Oct 2024 21:58:50 -0400 Subject: [PATCH 237/294] whitespace --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 29f6249d4c..9fb96df6f9 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -757,7 +757,7 @@ void FixCMAPKokkos::unpack_exchange_kokkos( { k_buf.template sync(); k_indices.template sync(); - + k_num_crossterm.template sync(); k_crossterm_type.template sync(); k_crossterm_atom1.template sync(); From 7eccc56607ea96cde660efba38610d209db5ec79 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 1 Nov 2024 19:42:16 -0400 Subject: [PATCH 238/294] auto variables for lambda capture --- src/KOKKOS/group_kokkos.cpp | 40 +++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index e1ed73f869..346a98d92d 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -73,7 +73,7 @@ double GroupKokkos::mass(int igroup) ------------------------------------------------------------------------- */ template -void GroupKokkos::xcm(int igroup, double masstotal, double *cm) +void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) { int groupbit = bitmask[igroup]; auto d_x = atomKK->k_x.template view(); @@ -123,11 +123,11 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *cm) } - MPI_Allreduce(cmone, cm, 3, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world); if (masstotal > 0.0) { - cm[0] /= masstotal; - cm[1] /= masstotal; - cm[2] /= masstotal; + xcm[0] /= masstotal; + xcm[1] /= masstotal; + xcm[2] /= masstotal; } } @@ -200,6 +200,9 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) auto l_prd = Few(domain->prd); auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; double p[3] = {0.0, 0.0, 0.0}; if (atomKK->rmass) { @@ -214,9 +217,9 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - xcm[0]; - double dy = unwrapKK[1] - xcm[1]; - double dz = unwrapKK[2] - xcm[2]; + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); @@ -236,9 +239,9 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - xcm[0]; - double dy = unwrapKK[1] - xcm[1]; - double dz = unwrapKK[2] - xcm[2]; + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1)); l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2)); l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0)); @@ -264,6 +267,9 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] auto l_prd = Few(domain->prd); auto l_h = Few(domain->h); auto l_triclinic = domain->triclinic; + auto l_xcm0 = xcm[0]; + auto l_xcm1 = xcm[1]; + auto l_xcm2 = xcm[2]; double ione[3][3]; for (int i = 0; i < 3; i++) @@ -281,9 +287,9 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - xcm[0]; - double dy = unwrapKK[1] - xcm[1]; - double dz = unwrapKK[2] - xcm[2]; + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; l_i00 += massone * (dy * dy + dz * dz); l_i11 += massone * (dx * dx + dz * dz); l_i22 += massone * (dx * dx + dy * dy); @@ -306,9 +312,9 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] x_i[1] = d_x(i,1); x_i[2] = d_x(i,2); auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i)); - double dx = unwrapKK[0] - xcm[0]; - double dy = unwrapKK[1] - xcm[1]; - double dz = unwrapKK[2] - xcm[2]; + double dx = unwrapKK[0] - l_xcm0; + double dy = unwrapKK[1] - l_xcm1; + double dz = unwrapKK[2] - l_xcm2; l_i00 += massone * (dy * dy + dz * dz); l_i11 += massone * (dx * dx + dz * dz); l_i22 += massone * (dx * dx + dy * dy); From ecebf187915ed72aeacaec8fde325f6695857ddf Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 2 Nov 2024 13:39:23 -0400 Subject: [PATCH 239/294] wrap host functions with sync/modify for kokkos --- src/KOKKOS/fix_colvars_kokkos.cpp | 48 +++++++++++++++++++++++++++++++ src/KOKKOS/fix_colvars_kokkos.h | 42 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/KOKKOS/fix_colvars_kokkos.cpp create mode 100644 src/KOKKOS/fix_colvars_kokkos.h diff --git a/src/KOKKOS/fix_colvars_kokkos.cpp b/src/KOKKOS/fix_colvars_kokkos.cpp new file mode 100644 index 0000000000..5142f0621c --- /dev/null +++ b/src/KOKKOS/fix_colvars_kokkos.cpp @@ -0,0 +1,48 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mitch Murphy (alphataubio at gmail) +------------------------------------------------------------------------- */ + +#include "fix_colvars_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +FixColvarsKokkos::FixColvarsKokkos(LAMMPS *lmp, int narg, char **arg) : + FixColvars(lmp, narg, arg) +{ + atomKK = (AtomKokkos *)atom; +} + +/* ---------------------------------------------------------------------- */ + +void FixColvarsKokkos::post_force(int vflag) +{ + atomKK->sync(Host,X_MASK|F_MASK); + FixColvars::post_force(vflag); + atomKK->modified(Host,F_MASK); +} + +/* ---------------------------------------------------------------------- */ +void FixColvarsKokkos::end_of_step() +{ + if (store_forces) { + atomKK->sync(Host,F_MASK); + FixColvars::end_of_step(); + } +} diff --git a/src/KOKKOS/fix_colvars_kokkos.h b/src/KOKKOS/fix_colvars_kokkos.h new file mode 100644 index 0000000000..fe649c7350 --- /dev/null +++ b/src/KOKKOS/fix_colvars_kokkos.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(colvars/kk,FixColvarsKokkos); +FixStyle(colvars/kk/device,FixColvarsKokkos); +FixStyle(colvars/kk/host,FixColvarsKokkos); +// clang-format on +#else + +#ifndef LMP_FIX_COLVARS_KOKKOS_H +#define LMP_FIX_COLVARS_KOKKOS_H + +#include "fix_colvars.h" + +namespace LAMMPS_NS { + +class FixColvarsKokkos : public FixColvars { + + public: + FixColvarsKokkos(class LAMMPS *, int, char **); + + void post_force(int) override; + void end_of_step() override; + +}; + +} // namespace LAMMPS_NS + +#endif +#endif From 03487707ae179f889ef3340c2952c40dc1fbadbe Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 2 Nov 2024 14:24:36 -0400 Subject: [PATCH 240/294] atomics --- src/KOKKOS/fix_cmap_kokkos.cpp | 55 ++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 9fb96df6f9..c88ae9c10f 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -439,29 +439,54 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou // apply force to each of the 5 atoms if (i1 < nlocal) { - d_f(i1,0) += dEdPhi*dphidr1x; - d_f(i1,1) += dEdPhi*dphidr1y; - d_f(i1,2) += dEdPhi*dphidr1z; + //d_f(i1,0) += dEdPhi*dphidr1x; + //d_f(i1,1) += dEdPhi*dphidr1y; + //d_f(i1,2) += dEdPhi*dphidr1z; + + Kokkos::atomic_add(&d_f(i1,0), dEdPhi*dphidr1x); + Kokkos::atomic_add(&d_f(i1,1), dEdPhi*dphidr1y); + Kokkos::atomic_add(&d_f(i1,2), dEdPhi*dphidr1z); + } if (i2 < nlocal) { - d_f(i2,0) += dEdPhi*dphidr2x + dEdPsi*dpsidr1x; - d_f(i2,1) += dEdPhi*dphidr2y + dEdPsi*dpsidr1y; - d_f(i2,2) += dEdPhi*dphidr2z + dEdPsi*dpsidr1z; + //d_f(i2,0) += dEdPhi*dphidr2x + dEdPsi*dpsidr1x; + //d_f(i2,1) += dEdPhi*dphidr2y + dEdPsi*dpsidr1y; + //d_f(i2,2) += dEdPhi*dphidr2z + dEdPsi*dpsidr1z; + + Kokkos::atomic_add(&d_f(i2,0), dEdPhi*dphidr2x + dEdPsi*dpsidr1x); + Kokkos::atomic_add(&d_f(i2,1), dEdPhi*dphidr2y + dEdPsi*dpsidr1y); + Kokkos::atomic_add(&d_f(i2,2), dEdPhi*dphidr2z + dEdPsi*dpsidr1z); + } if (i3 < nlocal) { - d_f(i3,0) += (-dEdPhi*dphidr3x - dEdPsi*dpsidr2x); - d_f(i3,1) += (-dEdPhi*dphidr3y - dEdPsi*dpsidr2y); - d_f(i3,2) += (-dEdPhi*dphidr3z - dEdPsi*dpsidr2z); + //d_f(i3,0) += (-dEdPhi*dphidr3x - dEdPsi*dpsidr2x); + //d_f(i3,1) += (-dEdPhi*dphidr3y - dEdPsi*dpsidr2y); + //d_f(i3,2) += (-dEdPhi*dphidr3z - dEdPsi*dpsidr2z); + + Kokkos::atomic_add(&d_f(i3,0), -dEdPhi*dphidr3x - dEdPsi*dpsidr2x); + Kokkos::atomic_add(&d_f(i3,1), -dEdPhi*dphidr3y - dEdPsi*dpsidr2y); + Kokkos::atomic_add(&d_f(i3,2), -dEdPhi*dphidr3z - dEdPsi*dpsidr2z); + } if (i4 < nlocal) { - d_f(i4,0) += (-dEdPhi*dphidr4x - dEdPsi*dpsidr3x); - d_f(i4,1) += (-dEdPhi*dphidr4y - dEdPsi*dpsidr3y); - d_f(i4,2) += (-dEdPhi*dphidr4z - dEdPsi*dpsidr3z); + //d_f(i4,0) += (-dEdPhi*dphidr4x - dEdPsi*dpsidr3x); + //d_f(i4,1) += (-dEdPhi*dphidr4y - dEdPsi*dpsidr3y); + //d_f(i4,2) += (-dEdPhi*dphidr4z - dEdPsi*dpsidr3z); + + Kokkos::atomic_add(&d_f(i4,0), -dEdPhi*dphidr4x - dEdPsi*dpsidr3x); + Kokkos::atomic_add(&d_f(i4,1), -dEdPhi*dphidr4y - dEdPsi*dpsidr3y); + Kokkos::atomic_add(&d_f(i4,2), -dEdPhi*dphidr4z - dEdPsi*dpsidr3z); + } if (i5 < nlocal) { - d_f(i5,0) -= dEdPsi*dpsidr4x; - d_f(i5,1) -= dEdPsi*dpsidr4y; - d_f(i5,2) -= dEdPsi*dpsidr4z; + //d_f(i5,0) -= dEdPsi*dpsidr4x; + //d_f(i5,1) -= dEdPsi*dpsidr4y; + //d_f(i5,2) -= dEdPsi*dpsidr4z; + + Kokkos::atomic_add(&d_f(i5,0), -dEdPsi*dpsidr4x); + Kokkos::atomic_add(&d_f(i5,1), -dEdPsi*dpsidr4y); + Kokkos::atomic_add(&d_f(i5,2), -dEdPsi*dpsidr4z); + } } From 1f1491a71cdddf7195e3f2a7a51f6287a16fc757 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 2 Nov 2024 14:52:01 -0400 Subject: [PATCH 241/294] Update fix_colvars_kokkos.cpp --- src/KOKKOS/fix_colvars_kokkos.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/KOKKOS/fix_colvars_kokkos.cpp b/src/KOKKOS/fix_colvars_kokkos.cpp index 5142f0621c..483a718dbb 100644 --- a/src/KOKKOS/fix_colvars_kokkos.cpp +++ b/src/KOKKOS/fix_colvars_kokkos.cpp @@ -26,7 +26,10 @@ using namespace LAMMPS_NS; FixColvarsKokkos::FixColvarsKokkos(LAMMPS *lmp, int narg, char **arg) : FixColvars(lmp, narg, arg) { + kokkosable = 1; atomKK = (AtomKokkos *)atom; + datamask_read = X_MASK | F_MASK; + datamask_modify = F_MASK; } /* ---------------------------------------------------------------------- */ From 953762458c0397726695e0bc75ca1f2b0236bf27 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Sat, 2 Nov 2024 16:36:50 -0400 Subject: [PATCH 242/294] EMPTY_MASK --- src/KOKKOS/fix_cmap_kokkos.cpp | 29 ++--------------------------- src/KOKKOS/fix_colvars_kokkos.cpp | 4 ++-- 2 files changed, 4 insertions(+), 29 deletions(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index c88ae9c10f..7501338826 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -49,8 +49,8 @@ FixCMAPKokkos::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) : exchange_comm_device = sort_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK; - datamask_modify = F_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; // allocate memory for CMAP data @@ -439,54 +439,29 @@ void FixCMAPKokkos::operator()(TagFixCmapPostForce, const int n, dou // apply force to each of the 5 atoms if (i1 < nlocal) { - //d_f(i1,0) += dEdPhi*dphidr1x; - //d_f(i1,1) += dEdPhi*dphidr1y; - //d_f(i1,2) += dEdPhi*dphidr1z; - Kokkos::atomic_add(&d_f(i1,0), dEdPhi*dphidr1x); Kokkos::atomic_add(&d_f(i1,1), dEdPhi*dphidr1y); Kokkos::atomic_add(&d_f(i1,2), dEdPhi*dphidr1z); - } if (i2 < nlocal) { - //d_f(i2,0) += dEdPhi*dphidr2x + dEdPsi*dpsidr1x; - //d_f(i2,1) += dEdPhi*dphidr2y + dEdPsi*dpsidr1y; - //d_f(i2,2) += dEdPhi*dphidr2z + dEdPsi*dpsidr1z; - Kokkos::atomic_add(&d_f(i2,0), dEdPhi*dphidr2x + dEdPsi*dpsidr1x); Kokkos::atomic_add(&d_f(i2,1), dEdPhi*dphidr2y + dEdPsi*dpsidr1y); Kokkos::atomic_add(&d_f(i2,2), dEdPhi*dphidr2z + dEdPsi*dpsidr1z); - } if (i3 < nlocal) { - //d_f(i3,0) += (-dEdPhi*dphidr3x - dEdPsi*dpsidr2x); - //d_f(i3,1) += (-dEdPhi*dphidr3y - dEdPsi*dpsidr2y); - //d_f(i3,2) += (-dEdPhi*dphidr3z - dEdPsi*dpsidr2z); - Kokkos::atomic_add(&d_f(i3,0), -dEdPhi*dphidr3x - dEdPsi*dpsidr2x); Kokkos::atomic_add(&d_f(i3,1), -dEdPhi*dphidr3y - dEdPsi*dpsidr2y); Kokkos::atomic_add(&d_f(i3,2), -dEdPhi*dphidr3z - dEdPsi*dpsidr2z); - } if (i4 < nlocal) { - //d_f(i4,0) += (-dEdPhi*dphidr4x - dEdPsi*dpsidr3x); - //d_f(i4,1) += (-dEdPhi*dphidr4y - dEdPsi*dpsidr3y); - //d_f(i4,2) += (-dEdPhi*dphidr4z - dEdPsi*dpsidr3z); - Kokkos::atomic_add(&d_f(i4,0), -dEdPhi*dphidr4x - dEdPsi*dpsidr3x); Kokkos::atomic_add(&d_f(i4,1), -dEdPhi*dphidr4y - dEdPsi*dpsidr3y); Kokkos::atomic_add(&d_f(i4,2), -dEdPhi*dphidr4z - dEdPsi*dpsidr3z); - } if (i5 < nlocal) { - //d_f(i5,0) -= dEdPsi*dpsidr4x; - //d_f(i5,1) -= dEdPsi*dpsidr4y; - //d_f(i5,2) -= dEdPsi*dpsidr4z; - Kokkos::atomic_add(&d_f(i5,0), -dEdPsi*dpsidr4x); Kokkos::atomic_add(&d_f(i5,1), -dEdPsi*dpsidr4y); Kokkos::atomic_add(&d_f(i5,2), -dEdPsi*dpsidr4z); - } } diff --git a/src/KOKKOS/fix_colvars_kokkos.cpp b/src/KOKKOS/fix_colvars_kokkos.cpp index 483a718dbb..2ab08bf587 100644 --- a/src/KOKKOS/fix_colvars_kokkos.cpp +++ b/src/KOKKOS/fix_colvars_kokkos.cpp @@ -28,8 +28,8 @@ FixColvarsKokkos::FixColvarsKokkos(LAMMPS *lmp, int narg, char **arg) : { kokkosable = 1; atomKK = (AtomKokkos *)atom; - datamask_read = X_MASK | F_MASK; - datamask_modify = F_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; } /* ---------------------------------------------------------------------- */ From a832a4a1c93830d51f7e5e2a3f6beb3704cb3921 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 5 Nov 2024 16:38:06 -0600 Subject: [PATCH 243/294] fixed a bug with three_ilist when used with pair hybrid --- lib/gpu/lal_neighbor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 288415e0e7..aca9b1d141 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -365,7 +365,9 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj, int i=ilist[ii]; three_ilist[i] = ii; } - three_ilist.update_device(inum,true); + // needs to transfer _max_atoms because three_ilist indexes all the atoms (local and ghost) + // not just inum (number of neighbor list items) + three_ilist.update_device(_max_atoms,true); } time_nbor.stop(); From b74096b2c4578979aed8ee37461031290d3d71d1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 7 Nov 2024 09:35:49 -0800 Subject: [PATCH 244/294] Fix bug when a proc has no atoms/neighbors (existing issue) --- src/KOKKOS/pair_kokkos.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index c4bd603041..399142dfaf 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -923,6 +923,8 @@ int GetMaxNeighs(NeighStyle* list) maxneigh = MAX(maxneigh,num_neighs); }, Kokkos::Max(maxneigh)); + if (maxneigh < 0) maxneigh = 0; + return maxneigh; } @@ -958,6 +960,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) { lastcall = fpair->lmp->update->ntimestep; vectorsize = GetMaxNeighs(list); + if (vectorsize == 0) vectorsize = 1; vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2 #if defined(KOKKOS_ENABLE_HIP) From 1c48d201b41765a81a4820fbee38df09fd5b10ad Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 8 Nov 2024 10:07:16 +0000 Subject: [PATCH 245/294] Remove unused pack_flag = 5 options --- src/REAXFF/fix_qtpie_reaxff.cpp | 44 +++------------------------------ 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 946457a4da..48c1109178 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -903,15 +903,6 @@ int FixQtpieReaxFF::pack_forward_comm(int n, int *list, double *buf, for (m = 0; m < n; m++) buf[m] = t[list[m]]; else if (pack_flag == 4) for (m = 0; m < n; m++) buf[m] = atom->q[list[m]]; - else if (pack_flag == 5) { - m = 0; - for (int i = 0; i < n; i++) { - int j = 2 * list[i]; - buf[m++] = d[j]; - buf[m++] = d[j+1]; - } - return m; - } return n; } @@ -929,15 +920,6 @@ void FixQtpieReaxFF::unpack_forward_comm(int n, int first, double *buf) for (m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; else if (pack_flag == 4) for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; - else if (pack_flag == 5) { - int last = first + n; - m = 0; - for (i = first; i < last; i++) { - int j = 2 * i; - d[j] = buf[m++]; - d[j+1] = buf[m++]; - } - } } /* ---------------------------------------------------------------------- */ @@ -945,35 +927,15 @@ void FixQtpieReaxFF::unpack_forward_comm(int n, int first, double *buf) int FixQtpieReaxFF::pack_reverse_comm(int n, int first, double *buf) { int i, m; - if (pack_flag == 5) { - m = 0; - int last = first + n; - for (i = first; i < last; i++) { - int indxI = 2 * i; - buf[m++] = q[indxI]; - buf[m++] = q[indxI+1]; - } - return m; - } else { - for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; - return n; - } + for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; + return n; } /* ---------------------------------------------------------------------- */ void FixQtpieReaxFF::unpack_reverse_comm(int n, int *list, double *buf) { - if (pack_flag == 5) { - int m = 0; - for (int i = 0; i < n; i++) { - int indxI = 2 * list[i]; - q[indxI] += buf[m++]; - q[indxI+1] += buf[m++]; - } - } else { - for (int m = 0; m < n; m++) q[list[m]] += buf[m]; - } + for (int m = 0; m < n; m++) q[list[m]] += buf[m]; } /* ---------------------------------------------------------------------- From 95899b53b86b68249dd26ee4c32b18bc0c8ab398 Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 8 Nov 2024 10:26:47 +0000 Subject: [PATCH 246/294] Add fix qtpie/reaxff to pair_style reaxff docs --- doc/src/pair_reaxff.rst | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst index 03d53d1ff4..84403c15d1 100644 --- a/doc/src/pair_reaxff.rst +++ b/doc/src/pair_reaxff.rst @@ -20,7 +20,7 @@ Syntax .. parsed-literal:: keyword = *checkqeq* or *lgvdw* or *safezone* or *mincap* or *minhbonds* or *tabulate* or *list/blocking* - *checkqeq* value = *yes* or *no* = whether or not to require qeq/reaxff or acks2/reaxff fix + *checkqeq* value = *yes* or *no* = whether or not to require one of fix qeq/reaxff, fix acks2/reaxff or fix qtpie/reaxff *enobonds* value = *yes* or *no* = whether or not to tally energy of atoms with no bonds *lgvdw* value = *yes* or *no* = whether or not to use a low gradient vdW correction *safezone* = factor used for array allocation @@ -120,20 +120,22 @@ up that process. The ReaxFF parameter files provided were created using a charge equilibration (QEq) model for handling the electrostatic interactions. -Therefore, by default, LAMMPS requires that either the -:doc:`fix qeq/reaxff ` or the -:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` -command be used with -*pair_style reaxff* when simulating a ReaxFF model, to equilibrate -the charges each timestep. +Therefore, by default, LAMMPS requires that +:doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` +or :doc:`fix acks2/reaxff ` +or :doc:`fix qtpie/reaxff ` +is used with *pair_style reaxff* when simulating a ReaxFF model, +to equilibrate the charges at each timestep. +See the :doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` +or :doc:`fix acks2/reaxff ` +or :doc:`fix qtpie/reaxff ` +command documentation for more details. Using the keyword *checkqeq* with the value *no* turns off the check for the QEq fixes, allowing a simulation to be run without charge equilibration. In this case, the static charges you assign to each atom will be used for computing the electrostatic interactions in -the system. See the :doc:`fix qeq/reaxff ` or -:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` -command documentation for more details. +the system. Using the optional keyword *lgvdw* with the value *yes* turns on the low-gradient correction of ReaxFF for long-range London Dispersion, @@ -372,8 +374,8 @@ Related commands """""""""""""""" :doc:`pair_coeff `, :doc:`fix qeq/reaxff `, -:doc:`fix acks2/reaxff `, :doc:`fix reaxff/bonds `, -:doc:`fix reaxff/species `, +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff `, +:doc:`fix reaxff/bonds `, :doc:`fix reaxff/species `, :doc:`compute reaxff/atom ` Default From e84c45c6e7d61a57bcdba143435fda23a0d6d67d Mon Sep 17 00:00:00 2001 From: Navraj Lalli Date: Fri, 8 Nov 2024 10:38:49 +0000 Subject: [PATCH 247/294] Fix whitespace --- doc/src/pair_reaxff.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst index 84403c15d1..495572dc0e 100644 --- a/doc/src/pair_reaxff.rst +++ b/doc/src/pair_reaxff.rst @@ -124,9 +124,9 @@ Therefore, by default, LAMMPS requires that :doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` or :doc:`fix qtpie/reaxff ` -is used with *pair_style reaxff* when simulating a ReaxFF model, -to equilibrate the charges at each timestep. -See the :doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` +is used with *pair_style reaxff* when simulating a ReaxFF model, +to equilibrate the charges at each timestep. +See the :doc:`fix qeq/reaxff ` or :doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` or :doc:`fix qtpie/reaxff ` command documentation for more details. @@ -135,7 +135,7 @@ Using the keyword *checkqeq* with the value *no* turns off the check for the QEq fixes, allowing a simulation to be run without charge equilibration. In this case, the static charges you assign to each atom will be used for computing the electrostatic interactions in -the system. +the system. Using the optional keyword *lgvdw* with the value *yes* turns on the low-gradient correction of ReaxFF for long-range London Dispersion, From 0ad6babead44c48bfaeea89647495d8a95bdfe32 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 8 Nov 2024 08:42:47 -0800 Subject: [PATCH 248/294] Fix another (already existing) bug when a proc has no atoms --- src/KOKKOS/npair_kokkos.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4fec623c5d..fb3149ca4b 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -151,7 +151,11 @@ void NPairKokkos::build(NeighList *list_) if (GHOST) nall += atom->nghost; - if (nall == 0) return; + if (nall == 0) { + list->inum = 0; + list->gnum = 0; + return; + } list->grow(nall); From 339c9654ded5c0a5e29c850cb0ca2dd7ca86ec05 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 8 Nov 2024 14:42:59 -0800 Subject: [PATCH 249/294] Update GNU Make for new file --- src/Depend.sh | 4 ++++ src/KOKKOS/Install.sh | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Depend.sh b/src/Depend.sh index 85542b21c0..9ddb29450d 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -68,6 +68,10 @@ if (test $1 = "COLLOID") then depend OPENMP fi +if (test $1 = "COLVARS") then + depend KOKKOS +fi + if (test $1 = "DIELECTRIC") then depend OPENMP fi diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ce4634ede9..64ba0c6b03 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -133,6 +133,8 @@ action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h action fix_cmap_kokkos.cpp fix_cmap.cpp action fix_cmap_kokkos.h fix_cmap.h +action fix_colvars_kokkos.cpp fix_colvars.cpp +action fix_colvars_kokkos.h fix_colvars.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp From b830f7806e6c6070a50208dcd78e897ae2a9696f Mon Sep 17 00:00:00 2001 From: alphataubio Date: Fri, 8 Nov 2024 18:01:50 -0500 Subject: [PATCH 250/294] Update fix_colvars_kokkos.cpp --- src/KOKKOS/fix_colvars_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_colvars_kokkos.cpp b/src/KOKKOS/fix_colvars_kokkos.cpp index 2ab08bf587..faf738f9e7 100644 --- a/src/KOKKOS/fix_colvars_kokkos.cpp +++ b/src/KOKKOS/fix_colvars_kokkos.cpp @@ -36,7 +36,7 @@ FixColvarsKokkos::FixColvarsKokkos(LAMMPS *lmp, int narg, char **arg) : void FixColvarsKokkos::post_force(int vflag) { - atomKK->sync(Host,X_MASK|F_MASK); + atomKK->sync(Host,X_MASK|F_MASK|TAG_MASK|IMAGE_MASK); FixColvars::post_force(vflag); atomKK->modified(Host,F_MASK); } @@ -45,7 +45,7 @@ void FixColvarsKokkos::post_force(int vflag) void FixColvarsKokkos::end_of_step() { if (store_forces) { - atomKK->sync(Host,F_MASK); + atomKK->sync(Host,F_MASK|TAG_MASK); FixColvars::end_of_step(); } } From fb758d951f8571ee6e0ca9db4ca1905cf7ffb9cd Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 8 Nov 2024 20:56:28 -0500 Subject: [PATCH 251/294] synchronize neb/spin with bugfixes for neb --- src/SPIN/neb_spin.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp index 6d6ec1bbfd..14c47e9430 100644 --- a/src/SPIN/neb_spin.cpp +++ b/src/SPIN/neb_spin.cpp @@ -164,8 +164,10 @@ void NEBSpin::run() // create MPI communicator for root proc from each world int color; - if (me == 0) color = 0; - else color = 1; + if (me == 0) + color = 0; + else + color = MPI_UNDEFINED; MPI_Comm_split(uworld,color,0,&roots); // search for neb_spin fix, allocate it @@ -728,19 +730,21 @@ void NEBSpin::print_status() local_norm_inf = MAX(temp_inf,local_norm_inf); } - double fmaxreplica; - MPI_Allreduce(&tnorm2,&fmaxreplica,1,MPI_DOUBLE,MPI_MAX,roots); + double fmaxreplica = 0.0; + double fmaxatom = 0.0; double fnorminf = 0.0; MPI_Allreduce(&local_norm_inf,&fnorminf,1,MPI_DOUBLE,MPI_MAX,world); - double fmaxatom; - MPI_Allreduce(&fnorminf,&fmaxatom,1,MPI_DOUBLE,MPI_MAX,roots); - if (verbose) { - freplica = new double[nreplica]; - MPI_Allgather(&tnorm2,1,MPI_DOUBLE,&freplica[0],1,MPI_DOUBLE,roots); - fmaxatomInRepl = new double[nreplica]; - MPI_Allgather(&fnorminf,1,MPI_DOUBLE,&fmaxatomInRepl[0],1,MPI_DOUBLE,roots); + if (me == 0) { + MPI_Allreduce(&tnorm2,&fmaxreplica,1,MPI_DOUBLE,MPI_MAX,roots); + MPI_Allreduce(&fnorminf,&fmaxatom,1,MPI_DOUBLE,MPI_MAX,roots); + if (verbose) { + freplica = new double[nreplica]; + MPI_Allgather(&tnorm2,1,MPI_DOUBLE,&freplica[0],1,MPI_DOUBLE,roots); + fmaxatomInRepl = new double[nreplica]; + MPI_Allgather(&fnorminf,1,MPI_DOUBLE,&fmaxatomInRepl[0],1,MPI_DOUBLE,roots); + } } double one[7]; @@ -828,5 +832,9 @@ void NEBSpin::print_status() fprintf(ulogfile,"\n"); fflush(ulogfile); } + if ((me == 0) && verbose) { + delete[] freplica; + delete[] fmaxatomInRepl; + } } } From 324e7952379d1554243b7762862b69435340302f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 8 Nov 2024 20:56:52 -0500 Subject: [PATCH 252/294] do not free a NULL communicator --- src/REPLICA/neb.cpp | 2 +- src/SPIN/neb_spin.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 4023f2c0b3..b14748565b 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -95,7 +95,7 @@ NEB::NEB(LAMMPS *lmp, double etol_in, double ftol_in, int n1steps_in, int n2step NEB::~NEB() { - MPI_Comm_free(&roots); + if (roots != MPI_COMM_NULL) MPI_Comm_free(&roots); memory->destroy(all); delete[] rdist; if (fp) { diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp index 14c47e9430..b1b9dc077e 100644 --- a/src/SPIN/neb_spin.cpp +++ b/src/SPIN/neb_spin.cpp @@ -79,7 +79,7 @@ NEBSpin::NEBSpin(LAMMPS *lmp) : Command(lmp), fp(nullptr) { NEBSpin::~NEBSpin() { - MPI_Comm_free(&roots); + if (roots != MPI_COMM_NULL) MPI_Comm_free(&roots); memory->destroy(all); delete[] rdist; if (fp) { From a1a2a54f724c73cd32111efdf7f9f01429428d81 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 9 Nov 2024 04:04:52 -0500 Subject: [PATCH 253/294] update linalg for ML-QUIP --- lib/linalg/dbdsdc.cpp | 282 +++++++++++++ lib/linalg/dcombssq.cpp | 26 ++ lib/linalg/dgebak.cpp | 117 ++++++ lib/linalg/dgebal.cpp | 513 ++++++++++++++++++++++++ lib/linalg/dgehd2.cpp | 57 +++ lib/linalg/dgehrd.cpp | 144 +++++++ lib/linalg/dgesdd.cpp | 788 ++++++++++++++++++++++++++++++++++++ lib/linalg/dhseqr.cpp | 145 +++++++ lib/linalg/dlaexc.cpp | 214 ++++++++++ lib/linalg/dlahqr.cpp | 311 +++++++++++++++ lib/linalg/dlahr2.cpp | 121 ++++++ lib/linalg/dlaln2.cpp | 298 ++++++++++++++ lib/linalg/dlanv2.cpp | 106 +++++ lib/linalg/dlaqr0.cpp | 306 ++++++++++++++ lib/linalg/dlaqr1.cpp | 52 +++ lib/linalg/dlaqr2.cpp | 359 +++++++++++++++++ lib/linalg/dlaqr3.cpp | 375 ++++++++++++++++++ lib/linalg/dlaqr4.cpp | 298 ++++++++++++++ lib/linalg/dlaqr5.cpp | 521 ++++++++++++++++++++++++ lib/linalg/dlarfx.cpp | 552 ++++++++++++++++++++++++++ lib/linalg/dlasd0.cpp | 143 +++++++ lib/linalg/dlasd1.cpp | 96 +++++ lib/linalg/dlasd2.cpp | 282 +++++++++++++ lib/linalg/dlasd3.cpp | 218 ++++++++++ lib/linalg/dlasy2.cpp | 284 +++++++++++++ lib/linalg/dlasyf.cpp | 337 ++++++++++++++++ lib/linalg/dorghr.cpp | 94 +++++ lib/linalg/dormhr.cpp | 111 ++++++ lib/linalg/dsyconv.cpp | 199 ++++++++++ lib/linalg/dsyr.cpp | 167 ++++++++ lib/linalg/dsytf2.cpp | 246 ++++++++++++ lib/linalg/dsytrf.cpp | 123 ++++++ lib/linalg/dsytrs.cpp | 214 ++++++++++ lib/linalg/dsytrs2.cpp | 180 +++++++++ lib/linalg/dtrevc3.cpp | 858 ++++++++++++++++++++++++++++++++++++++++ lib/linalg/dtrexc.cpp | 217 ++++++++++ lib/linalg/dtrtrs.cpp | 65 +++ lib/linalg/izamax.cpp | 46 +++ lib/linalg/zcop.cpp | 43 ++ lib/linalg/zdotu.cpp | 55 +++ lib/linalg/zgetrf.cpp | 90 +++++ lib/linalg/zgetrf2.cpp | 117 ++++++ lib/linalg/zgetri.cpp | 132 +++++++ lib/linalg/zhegs2.cpp | 197 +++++++++ lib/linalg/zhegst.cpp | 195 +++++++++ lib/linalg/zhegv.cpp | 115 ++++++ lib/linalg/zhemm.cpp | 271 +++++++++++++ lib/linalg/zher.cpp | 187 +++++++++ lib/linalg/zherk.cpp | 325 +++++++++++++++ lib/linalg/zhetf2.cpp | 439 ++++++++++++++++++++ lib/linalg/zhetrf.cpp | 123 ++++++ lib/linalg/zhetri.cpp | 319 +++++++++++++++ lib/linalg/zlahef.cpp | 520 ++++++++++++++++++++++++ lib/linalg/zlaswp.cpp | 79 ++++ lib/linalg/zlasyf.cpp | 431 ++++++++++++++++++++ lib/linalg/zlauu2.cpp | 100 +++++ lib/linalg/zlauum.cpp | 103 +++++ lib/linalg/zpotrf.cpp | 115 ++++++ lib/linalg/zpotrf2.cpp | 89 +++++ lib/linalg/zpotri.cpp | 40 ++ lib/linalg/zsymv.cpp | 263 ++++++++++++ lib/linalg/zsyr.cpp | 141 +++++++ lib/linalg/zsytf2.cpp | 356 +++++++++++++++++ lib/linalg/zsytrf.cpp | 124 ++++++ lib/linalg/zsytri.cpp | 292 ++++++++++++++ lib/linalg/ztrsm.cpp | 443 +++++++++++++++++++++ lib/linalg/ztrsv.cpp | 330 ++++++++++++++++ lib/linalg/ztrti2.cpp | 88 +++++ lib/linalg/ztrtri.cpp | 112 ++++++ 69 files changed, 15699 insertions(+) create mode 100644 lib/linalg/dbdsdc.cpp create mode 100644 lib/linalg/dcombssq.cpp create mode 100644 lib/linalg/dgebak.cpp create mode 100644 lib/linalg/dgebal.cpp create mode 100644 lib/linalg/dgehd2.cpp create mode 100644 lib/linalg/dgehrd.cpp create mode 100644 lib/linalg/dgesdd.cpp create mode 100644 lib/linalg/dhseqr.cpp create mode 100644 lib/linalg/dlaexc.cpp create mode 100644 lib/linalg/dlahqr.cpp create mode 100644 lib/linalg/dlahr2.cpp create mode 100644 lib/linalg/dlaln2.cpp create mode 100644 lib/linalg/dlanv2.cpp create mode 100644 lib/linalg/dlaqr0.cpp create mode 100644 lib/linalg/dlaqr1.cpp create mode 100644 lib/linalg/dlaqr2.cpp create mode 100644 lib/linalg/dlaqr3.cpp create mode 100644 lib/linalg/dlaqr4.cpp create mode 100644 lib/linalg/dlaqr5.cpp create mode 100644 lib/linalg/dlarfx.cpp create mode 100644 lib/linalg/dlasd0.cpp create mode 100644 lib/linalg/dlasd1.cpp create mode 100644 lib/linalg/dlasd2.cpp create mode 100644 lib/linalg/dlasd3.cpp create mode 100644 lib/linalg/dlasy2.cpp create mode 100644 lib/linalg/dlasyf.cpp create mode 100644 lib/linalg/dorghr.cpp create mode 100644 lib/linalg/dormhr.cpp create mode 100644 lib/linalg/dsyconv.cpp create mode 100644 lib/linalg/dsyr.cpp create mode 100644 lib/linalg/dsytf2.cpp create mode 100644 lib/linalg/dsytrf.cpp create mode 100644 lib/linalg/dsytrs.cpp create mode 100644 lib/linalg/dsytrs2.cpp create mode 100644 lib/linalg/dtrevc3.cpp create mode 100644 lib/linalg/dtrexc.cpp create mode 100644 lib/linalg/dtrtrs.cpp create mode 100644 lib/linalg/izamax.cpp create mode 100644 lib/linalg/zcop.cpp create mode 100644 lib/linalg/zdotu.cpp create mode 100644 lib/linalg/zgetrf.cpp create mode 100644 lib/linalg/zgetrf2.cpp create mode 100644 lib/linalg/zgetri.cpp create mode 100644 lib/linalg/zhegs2.cpp create mode 100644 lib/linalg/zhegst.cpp create mode 100644 lib/linalg/zhegv.cpp create mode 100644 lib/linalg/zhemm.cpp create mode 100644 lib/linalg/zher.cpp create mode 100644 lib/linalg/zherk.cpp create mode 100644 lib/linalg/zhetf2.cpp create mode 100644 lib/linalg/zhetrf.cpp create mode 100644 lib/linalg/zhetri.cpp create mode 100644 lib/linalg/zlahef.cpp create mode 100644 lib/linalg/zlaswp.cpp create mode 100644 lib/linalg/zlasyf.cpp create mode 100644 lib/linalg/zlauu2.cpp create mode 100644 lib/linalg/zlauum.cpp create mode 100644 lib/linalg/zpotrf.cpp create mode 100644 lib/linalg/zpotrf2.cpp create mode 100644 lib/linalg/zpotri.cpp create mode 100644 lib/linalg/zsymv.cpp create mode 100644 lib/linalg/zsyr.cpp create mode 100644 lib/linalg/zsytf2.cpp create mode 100644 lib/linalg/zsytrf.cpp create mode 100644 lib/linalg/zsytri.cpp create mode 100644 lib/linalg/ztrsm.cpp create mode 100644 lib/linalg/ztrsv.cpp create mode 100644 lib/linalg/ztrti2.cpp create mode 100644 lib/linalg/ztrtri.cpp diff --git a/lib/linalg/dbdsdc.cpp b/lib/linalg/dbdsdc.cpp new file mode 100644 index 0000000000..7f362f3be9 --- /dev/null +++ b/lib/linalg/dbdsdc.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; +int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *d__, doublereal *e, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, + doublereal *work, integer *iwork, integer *info, ftnlen uplo_len, ftnlen compq_len) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + double d_lmp_sign(doublereal *, doublereal *), log(doublereal); + integer i__, j, k; + doublereal p, r__; + integer z__, ic, ii, kk; + doublereal cs; + integer is, iu; + doublereal sn; + integer nm1; + doublereal eps; + integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer poles, iuplo, nsize, start; + extern int dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, ftnlen); + integer icompq; + doublereal orgnrm; + integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + *info = 0; + iuplo = 0; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + iuplo = 1; + } + if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + iuplo = 2; + } + if (lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompq = 0; + } else if (lsame_(compq, (char *)"P", (ftnlen)1, (ftnlen)1)) { + icompq = 1; + } else if (lsame_(compq, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DBDSDC", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, (char *)"DBDSDC", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_lmp_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_lmp_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + if (icompq == 2) { + wstart = (*n << 1) - 1; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } + } + } + if (icompq == 0) { + dlasdq_((char *)"U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, + &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info, (ftnlen)1); + goto L40; + } + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], + ldu, &u[u_offset], ldu, &work[wstart], info, (ftnlen)1); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (qstart - 1) * *n], n, + &q[iu + (qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &work[wstart], + info, (ftnlen)1); + } + goto L40; + } + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + } + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &ierr, (ftnlen)1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7) * .9; + mlvl = (integer)(log((doublereal)(*n) / (doublereal)(smlsiz + 1)) / log(2.)) + 1; + smlszp = smlsiz + 1; + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_lmp_sign(&eps, &d__[i__]); + } + } + start = 1; + sqre = 0; + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + if (i__ < nm1) { + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + nsize = *n - start + 1; + } else { + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_lmp_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_lmp_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + start * u_dim1], ldu, + &vt[start + start * vt_dim1], ldvt, &smlsiz, &iwork[1], &work[wstart], + info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[start], + &q[start + (iu + qstart - 2) * *n], n, &q[start + (ivt + qstart - 2) * *n], + &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], + &q[start + (difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], + &q[start + (poles + qstart - 2) * *n], &iq[start + givptr * *n], + &iq[start + givcol * *n], n, &iq[start + perm * *n], + &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], + &q[start + (is + qstart - 2) * *n], &work[wstart], &iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } + } + dlascl_((char *)"G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); +L40: + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } + } + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + if (iuplo == 2 && icompq == 2) { + dlasr_((char *)"L", (char *)"V", (char *)"B", n, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dcombssq.cpp b/lib/linalg/dcombssq.cpp new file mode 100644 index 0000000000..179be8ad9e --- /dev/null +++ b/lib/linalg/dcombssq.cpp @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dcombssq_(doublereal *v1, doublereal *v2) +{ + doublereal d__1; + --v2; + --v1; + if (v1[1] >= v2[1]) { + if (v1[1] != 0.) { + d__1 = v2[1] / v1[1]; + v1[2] += d__1 * d__1 * v2[2]; + } else { + v1[2] += v2[2]; + } + } else { + d__1 = v1[1] / v2[1]; + v1[2] = v2[2] + d__1 * d__1 * v1[2]; + v1[1] = v2[1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebak.cpp b/lib/linalg/dgebak.cpp new file mode 100644 index 0000000000..ba0db07641 --- /dev/null +++ b/lib/linalg/dgebak.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, + integer *m, doublereal *v, integer *ldv, integer *info, ftnlen job_len, ftnlen side_len) +{ + integer v_dim1, v_offset, i__1; + integer i__, k; + doublereal s; + integer ii; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv; + extern int xerbla_(char *, integer *, ftnlen); + logical rightv; + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1); + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!rightv && !leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAK", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + return 0; + } + if (*ilo == *ihi) { + goto L30; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + } +L30: + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L40:; + } + } + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L50:; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebal.cpp b/lib/linalg/dgebal.cpp new file mode 100644 index 0000000000..c5301edcdd --- /dev/null +++ b/lib/linalg/dgebal.cpp @@ -0,0 +1,513 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +int dgebal_(char *job, integer *n, doublereal *a, integer *lda, integer *ilo, integer *ihi, + doublereal *scale, integer *info, ftnlen job_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + doublereal c__, f, g; + integer i__, j, k, l, m; + doublereal r__, s, ca, ra; + integer ica, ira, iexc; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + doublereal sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + logical noconv; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAL", &i__1, (ftnlen)6); + return 0; + } + k = 1; + l = *n; + if (*n == 0) { + goto L210; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + goto L210; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1)) { + goto L120; + } + goto L50; +L20: + scale[m] = (doublereal)j; + if (j == m) { + goto L30; + } + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); +L30: + switch (iexc) { + case 1: + goto L40; + case 2: + goto L80; + } +L40: + if (l == 1) { + goto L210; + } + --l; +L50: + for (j = l; j >= 1; --j) { + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } + L60:; + } + m = l; + iexc = 1; + goto L20; + L70:; + } + goto L90; +L80: + ++k; +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } + L100:; + } + m = k; + iexc = 2; + goto L20; + L110:; + } +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1)) { + goto L210; + } + sfmin1 = dlamch_((char *)"S", (ftnlen)1) / dlamch_((char *)"P", (ftnlen)1); + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + i__2 = l - k + 1; + c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1); + i__2 = l - k + 1; + r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda); + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; + L160: + d__1 = max(f, c__); + d__2 = min(r__, g); + if (c__ >= g || max(d__1, ca) >= sfmax2 || min(d__2, ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + *info = -3; + i__2 = -(*info); + xerbla_((char *)"DGEBAL", &i__2, (ftnlen)6); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + L170: + g = c__ / 2.; + L180: + d__1 = min(f, c__), d__1 = min(d__1, g); + if (g < r__ || max(r__, ra) >= sfmax2 || min(d__1, ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + L200:; + } + if (noconv) { + goto L140; + } +L210: + *ilo = k; + *ihi = l; + return 0; +} +int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *work, integer *lwork, integer *info, ftnlen jobvl_len, ftnlen jobvr_len) +{ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + double sqrt(doublereal); + integer i__, k; + doublereal r__, cs, sn; + integer ihi; + doublereal scl; + integer ilo; + doublereal dum[1], eps; + integer lwork_trevc__, ibal; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern int dlabad_(doublereal *, doublereal *), + dgebak_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen, ftnlen), + dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, ftnlen); + logical scalea; + extern doublereal dlamch_(char *, ftnlen); + doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + xerbla_(char *, integer *, ftnlen); + logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal bignum; + extern int dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen); + integer minwrk, maxwrk; + logical wantvl; + doublereal smlnum; + integer hswork; + logical lquery, wantvr; + extern int dtrevc3_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, (char *)"V", (ftnlen)1, (ftnlen)1); + wantvr = lsame_(jobvr, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantvl && !lsame_(jobvl, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!wantvr && !lsame_(jobvr, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + + *n * ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], + ldvl, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"L", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else if (wantvr) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"R", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else { + minwrk = *n * 3; + dhseqr_((char *)"E", (char *)"N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + } + maxwrk = max(maxwrk, minwrk); + } + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -13; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", n, n, &a[a_offset], lda, dum, (ftnlen)1); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + ibal = 1; + dgebal_((char *)"B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr, (ftnlen)1); + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); + if (wantvl) { + *(unsigned char *)side = 'L'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], ldvl, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + if (wantvr) { + *(unsigned char *)side = 'B'; + dlacpy_((char *)"F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, (ftnlen)1); + } + } else if (wantvr) { + *(unsigned char *)side = 'R'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } else { + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"E", (char *)"N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } + if (*info != 0) { + goto L50; + } + if (wantvl || wantvr) { + i__1 = *lwork - iwrk + 1; + dtrevc3_(side, (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], + ldvr, n, &nout, &work[iwrk], &i__1, &ierr, (ftnlen)1, (ftnlen)1); + } + if (wantvl) { + dgebak_((char *)"B", (char *)"L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vl[k + i__ * vl_dim1]; + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, + &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } + } + } + if (wantvr) { + dgebak_((char *)"B", (char *)"R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vr[k + i__ * vr_dim1]; + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, + &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } + } + } +L50: + if (scalea) { + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr, + (ftnlen)1); + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr, + (ftnlen)1); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr, (ftnlen)1); + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr, (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehd2.cpp b/lib/linalg/dgehd2.cpp new file mode 100644 index 0000000000..9eaa873bd3 --- /dev/null +++ b/lib/linalg/dgehd2.cpp @@ -0,0 +1,57 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + integer i__; + doublereal aii; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHD2", &i__1, (ftnlen)6); + return 0; + } + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi - i__; + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + i__2 = *ihi - i__; + dlarf_((char *)"Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[(i__ + 1) * a_dim1 + 1], lda, &work[1], (ftnlen)5); + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4); + a[i__ + 1 + i__ * a_dim1] = aii; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehrd.cpp b/lib/linalg/dgehrd.cpp new file mode 100644 index 0000000000..eb152b90ed --- /dev/null +++ b/lib/linalg/dgehrd.cpp @@ -0,0 +1,144 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static doublereal c_b25 = -1.; +static doublereal c_b26 = 1.; +int dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, j, ib; + doublereal ei; + integer nb, nh, nx, iwt; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer nbmin, iinfo; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dgehd2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), + dlahr2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -8; + } + if (*info == 0) { + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + lwkopt = *n * nb + 4160; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + i__1 = *n - 1; + for (i__ = max(1, *ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + nbmin = 2; + if (nb > 1 && nb < nh) { + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1, i__2); + if (nx < nh) { + if (*lwork < *n * nb + 4160) { + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + if (*lwork >= *n * nbmin + 4160) { + nb = (*lwork - 4160) / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + if (nb < nbmin || nb >= nh) { + i__ = *ilo; + } else { + iwt = *n * nb + 1; + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3, i__4); + dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &work[iwt], &c__65, + &work[1], &ldwork); + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_((char *)"No transpose", (char *)"Transpose", ihi, &i__3, &ib, &c_b25, &work[1], &ldwork, + &a[i__ + ib + i__ * a_dim1], lda, &c_b26, &a[(i__ + ib) * a_dim1 + 1], lda, + (ftnlen)12, (ftnlen)9); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + i__3 = ib - 1; + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork, (ftnlen)5, (ftnlen)5, + (ftnlen)9, (ftnlen)4); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + j + 1) * a_dim1 + 1], + &c__1); + } + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib, + &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &c__65, + &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork, (ftnlen)4, (ftnlen)9, + (ftnlen)7, (ftnlen)10); + } + } + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgesdd.cpp b/lib/linalg/dgesdd.cpp new file mode 100644 index 0000000000..59dbee7210 --- /dev/null +++ b/lib/linalg/dgesdd.cpp @@ -0,0 +1,788 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b63 = 0.; +static integer c__1 = 1; +static doublereal c_b84 = 1.; +int dgesdd_(char *jobz, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, + doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, + integer *lwork, integer *iwork, integer *info, ftnlen jobz_len) +{ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2, i__3; + double sqrt(doublereal); + integer lwork_dorglq_mn__, lwork_dorglq_nn__, lwork_dorgqr_mm__, lwork_dorgqr_mn__, i__, ie, + lwork_dorgbr_p_mm__, il, lwork_dorgbr_q_nn__, ir, iu, blk; + doublereal dum[1], eps; + integer ivt, iscl; + doublereal anrm; + integer idum[1], ierr, itau, lwork_dormbr_qln_mm__, lwork_dormbr_qln_mn__, + lwork_dormbr_qln_nn__, lwork_dormbr_prt_mm__, lwork_dormbr_prt_mn__, lwork_dormbr_prt_nn__; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + extern int dbdsdc_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen), + dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + integer bdspac; + extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen), + dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, ftnlen); + doublereal bignum; + extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen, ftnlen), + dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + doublereal smlnum; + logical wntqas, lquery; + integer lwork_dgebrd_mm__, lwork_dgebrd_mn__, lwork_dgebrd_nn__, lwork_dgelqf_mn__, + lwork_dgeqrf_mn__; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + *info = 0; + minmn = min(*m, *n); + wntqa = lsame_(jobz, (char *)"A", (ftnlen)1, (ftnlen)1); + wntqs = lsame_(jobz, (char *)"S", (ftnlen)1, (ftnlen)1); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, (char *)"O", (ftnlen)1, (ftnlen)1); + wntqn = lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!(wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + bdspac = 0; + mnthr = (integer)(minmn * 11. / 6.); + if (*m >= *n && minmn > 0) { + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_nn__ = (integer)dum[0]; + dgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dgeqrf_mn__ = (integer)dum[0]; + dorgbr_((char *)"Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_q_nn__ = (integer)dum[0]; + dorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mm__ = (integer)dum[0]; + dorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*m >= mnthr) { + if (wntqn) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + i__1 = *n * 3 + bdspac, i__2 = *n + *m; + minwrk = *n * *n + max(i__1, i__2); + } + } else { + wrkbl = *n * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } + } + } else if (minmn > 0) { + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mm__ = (integer)dum[0]; + dgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dgelqf_mn__ = (integer)dum[0]; + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_nn__ = (integer)dum[0]; + dorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dorglq_mn__ = (integer)dum[0]; + dorgbr_((char *)"P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_p_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*n >= mnthr) { + if (wntqn) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + i__1 = *m * 3 + bdspac, i__2 = *m + *n; + minwrk = *m * *m + max(i__1, i__2); + } + } else { + wrkbl = *m * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } + } + } + maxwrk = max(maxwrk, minwrk); + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -12; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = sqrt(dlamch_((char *)"S", (ftnlen)1)) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, dum, (ftnlen)1); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } else if (anrm > bignum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + if (*m >= *n) { + if (*m >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *n; + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ir = 1; + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + iu = nwork; + nwork = iu + *n * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &work[iu], n, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], n, + &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, (ftnlen)1); + } + } else if (wntqs) { + ir = 1; + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir], &ldwrkr, &c_b63, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + iu = 1; + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &a[a_offset], lda, &work[itauq], &work[iu], &ldwrku, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu], &ldwrku, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku, (ftnlen)1); + ir = -1; + } else { + ldwrku = *n; + nwork = iu + ldwrku * *n; + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], &ldwrku, &vt[vt_offset], ldvt, + dum, idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *n * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &work[iu], + &ldwrku, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[iu], &ldwrku, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], + &ldwrku, &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, + (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", m, m, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (*n + 1) * u_dim1], ldu, + (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + if (*n >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *m; + dbdsdc_((char *)"U", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ivt = 1; + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], m, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &work[ivt], m, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], m, &a[i__ * a_dim1 + 1], lda, + &c_b63, &work[il], &ldwrkl, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } else if (wntqs) { + il = 1; + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[a_offset], lda, &c_b63, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + ivt = 1; + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[vt_offset], ldvt, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"L", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt, (ftnlen)1); + nwork = ivt + ldwkvt * *n; + il = -1; + } else { + nwork = ivt + ldwkvt * *m; + il = nwork; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *m * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], &ldwkvt, + &a[i__ * a_dim1 + 1], lda, &c_b63, &work[il], m, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (*m + 1) * vt_dim1], + ldvt, (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + if (iscl == 1) { + if (anrm > bignum) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + if (anrm < smlnum) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dhseqr.cpp b/lib/linalg/dhseqr.cpp new file mode 100644 index 0000000000..2ac0219858 --- /dev/null +++ b/lib/linalg/dhseqr.cpp @@ -0,0 +1,145 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__12 = 12; +static integer c__2 = 2; +static integer c__49 = 49; +int dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info, ftnlen job_len, ftnlen compz_len) +{ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + doublereal d__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i__; + doublereal hl[2401]; + integer kbot, nmin; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical initz; + doublereal workl[49]; + logical wantt, wantz; + extern int dlaqr0_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical lquery; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + wantt = lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1); + initz = lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1); + wantz = initz || lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)max(1, *n); + lquery = *lwork == -1; + *info = 0; + if (!lsame_(job, (char *)"E", (ftnlen)1, (ftnlen)1) && !wantt) { + *info = -1; + } else if (!lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1) && !wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1, *n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1, *n)) { + *info = -11; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DHSEQR", &i__1, (ftnlen)6); + return 0; + } else if (*n == 0) { + return 0; + } else if (lquery) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + return 0; + } else { + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + if (initz) { + dlaset_((char *)"A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz, (ftnlen)1); + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_lmp_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, (char *)"DHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + if (*n > nmin) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, info); + if (*info > 0) { + kbot = *info; + if (*n >= 49) { + dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlacpy_((char *)"A", n, n, &h__[h_offset], ldh, hl, &c__49, (ftnlen)1); + hl[*n + 1 + *n * 49 - 50] = 0.; + i__1 = 49 - *n; + dlaset_((char *)"A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * 49 - 49], &c__49, + (ftnlen)1); + dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, workl, &c__49, info); + if (wantt || *info != 0) { + dlacpy_((char *)"A", n, n, hl, &c__49, &h__[h_offset], ldh, (ftnlen)1); + } + } + } + } + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + dlaset_((char *)"L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh, (ftnlen)1); + } + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaexc.cpp b/lib/linalg/dlaexc.cpp new file mode 100644 index 0000000000..9d528080cf --- /dev/null +++ b/lib/linalg/dlaexc.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__3 = 3; +int dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *j1, integer *n1, integer *n2, doublereal *work, integer *info) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + doublereal d__1, d__2, d__3; + doublereal d__[16]; + integer k; + doublereal u[3], x[4]; + integer j2, j3, j4; + doublereal u1[3], u2[3]; + integer nd; + doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; + integer ierr; + doublereal temp; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + doublereal scale, dnorm, xnorm; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlasy2_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + doublereal thresh, smlnum; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + if (*n1 == 1 && *n2 == 1) { + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + d__1 = t22 - t11; + dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], ldt, &cs, &sn); + } + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } else { + nd = *n1 + *n2; + dlacpy_((char *)"Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4, (ftnlen)4); + dnorm = dlange_((char *)"Max", &nd, &nd, d__, &c__4, &work[1], (ftnlen)3); + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + d__1 = eps * 10. * dnorm; + thresh = max(d__1, smlnum); + dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + (*n1 + 1 << 2) - 5], + &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, &scale, x, &c__2, &xnorm, &ierr); + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + } + L10: + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + dlarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.; + t11 = t[*j1 + *j1 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[10] - t11, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j3 + j3 * t_dim1] = t11; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L20: + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + dlarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.; + t33 = t[j3 + j3 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[0] - t33, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + dlarfx_((char *)"R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[1], (ftnlen)1); + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.; + t[j3 + *j1 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L30: + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.; + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.; + dlarfx_((char *)"L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1], (ftnlen)1); + d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1, d__2), d__2 = abs(d__[3]), + d__1 = max(d__1, d__2), d__2 = abs(d__[7]); + if (max(d__1, d__2) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j4 + *j1 * t_dim1] = 0.; + t[j4 + j2 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + dlarfx_((char *)"R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + L40: + if (*n2 == 2) { + dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + *j1 * t_dim1], + &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) * t_dim1], ldt, &cs, + &sn); + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + if (*n1 == 2) { + j3 = *j1 + *n2; + j4 = j3 + 1; + dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * t_dim1], + &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) * t_dim1], ldt, &cs, + &sn); + } + i__1 = j3 - 1; + drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + } + return 0; +L50: + *info = 1; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahqr.cpp b/lib/linalg/dlahqr.cpp new file mode 100644 index 0000000000..c2f2775b9b --- /dev/null +++ b/lib/linalg/dlahqr.cpp @@ -0,0 +1,311 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + double sqrt(doublereal); + integer i__, j, k, l, m; + doublereal s, v[3]; + integer i1, i2; + doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; + integer nh; + doublereal sn; + integer nr; + doublereal tr; + integer nz; + doublereal det, h21s; + integer its; + doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer itmax; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); + doublereal safmin, safmax, rtdisc, smlnum; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + *info = 0; + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.; + h__[j + 3 + j * h_dim1] = 0.; + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.; + } + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)nh / ulp); + if (*wantt) { + i1 = 1; + i2 = *n; + } + itmax = max(10, nh) * 30; + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + i__1 = itmax; + for (its = 0; its <= i__1; ++its) { + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { + goto L40; + } + tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[k + k * h_dim1], abs(d__2)); + if (tst == 0.) { + if (k - 2 >= *ilo) { + tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); + } + if (k + 1 <= *ihi) { + tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); + } + } + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ab = max(d__3, d__4); + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ba = min(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + aa = max(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + bb = min(d__3, d__4); + s = aa + ab; + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1, d__2)) { + goto L40; + } + } + } + L40: + l = k; + if (l > *ilo) { + h__[l + (l - 1) * h_dim1] = 0.; + } + if (l >= i__ - 1) { + goto L150; + } + if (!(*wantt)) { + i1 = l; + i2 = i__; + } + if (its == 10) { + s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + + (d__2 = h__[l + 2 + (l + 1) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[l + l * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else if (its == 20) { + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else { + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = abs(h11) + abs(h12) + abs(h21) + abs(h22); + if (s == 0.) { + rt1r = 0.; + rt1i = 0.; + rt2r = 0.; + rt2i = 0.; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((abs(det))); + if (det >= 0.) { + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs(d__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.; + rt2i = 0.; + } + } + i__2 = l; + for (m = i__ - 2; m >= i__2; --m) { + h21s = h__[m + 1 + m * h_dim1]; + s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + abs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + + (h__[m + m * h_dim1] - rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - + rt1i * (rt2i / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v[0]) + abs(v[1]) + abs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + abs(v[2])) <= + ulp * abs(v[0]) * + ((d__2 = h__[m - 1 + (m - 1) * h_dim1], abs(d__2)) + + (d__3 = h__[m + m * h_dim1], abs(d__3)) + + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs(d__4)))) { + goto L60; + } + } + L60: + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + i__3 = 3, i__4 = i__ - k + 1; + nr = min(i__3, i__4); + if (k > m) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { + h__[k + (k - 1) * h_dim1] *= 1. - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; + } + i__4 = k + 3; + i__3 = min(i__4, i__); + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1] + + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; + } + } + } else if (nr == 2) { + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + } + } + } + } + } + *info = i__; + return 0; +L150: + if (l == i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], + &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], + &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); + if (*wantt) { + if (i2 > i__) { + i__1 = i2 - i__; + drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, + &h__[i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, + &sn); + } + if (*wantz) { + drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + i__ * z_dim1], &c__1, + &cs, &sn); + } + } + i__ = l - 1; + goto L20; +L160: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahr2.cpp b/lib/linalg/dlahr2.cpp new file mode 100644 index 0000000000..36264e950f --- /dev/null +++ b/lib/linalg/dlahr2.cpp @@ -0,0 +1,121 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b38 = 0.; +int dlahr2_(integer *n, integer *k, integer *nb, doublereal *a, integer *lda, doublereal *tau, + doublereal *t, integer *ldt, doublereal *y, integer *ldy) +{ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; + doublereal d__1; + integer i__; + doublereal ei; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), + dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + if (*n <= 1) { + return 0; + } + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, + &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"Transpose", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)4); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * t_dim1 + 1], &c__1, + (ftnlen)9); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[*nb * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)8); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)4); + i__2 = i__ - 1; + daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ * a_dim1], &c__1); + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + i__2 = *n - *k - i__ + 1; + i__3 = *k + i__ + 1; + dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*k + 1 + i__ * y_dim1], &c__1, + (ftnlen)12); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, &t[i__ * t_dim1 + 1], + &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], &c__1, (ftnlen)12); + i__2 = *n - *k; + dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + i__2 = i__ - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"No Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)8); + t[i__ + i__ * t_dim1] = tau[i__]; + } + a[*k + *nb + *nb * a_dim1] = ei; + dlacpy_((char *)"ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy, (ftnlen)3); + dtrmm_((char *)"RIGHT", (char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", k, nb, &c_b5, &a[*k + 1 + a_dim1], lda, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + dgemm_((char *)"NO TRANSPOSE", (char *)"NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + 2) * a_dim1 + 1], lda, + &a[*k + 1 + *nb + a_dim1], lda, &c_b5, &y[y_offset], ldy, (ftnlen)12, (ftnlen)12); + } + dtrmm_((char *)"RIGHT", (char *)"Upper", (char *)"NO TRANSPOSE", (char *)"NON-UNIT", k, nb, &c_b5, &t[t_offset], ldt, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)8); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaln2.cpp b/lib/linalg/dlaln2.cpp new file mode 100644 index 0000000000..220eaae63a --- /dev/null +++ b/lib/linalg/dlaln2.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, + doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, + integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + static logical zswap[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical rswap[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + static integer ipivot[16] = {1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, 3, 2, 1}; + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + integer j; +#define ci (equiv_0) +#define cr (equiv_1) + doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, cr22, li21, csi, ui11, + lr21, ui12, ui22; +#define civ (equiv_0) + doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + integer icmax; + doublereal bnorm, cnorm, smini; + extern doublereal dlamch_(char *, ftnlen); + extern int dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + doublereal bignum, smlnum; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + smlnum = 2. * dlamch_((char *)"Safe minimum", (ftnlen)12); + bignum = 1. / smlnum; + smini = max(*smin, smlnum); + *info = 0; + *scale = 1.; + if (*na == 1) { + if (*nw == 1) { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } + } else { + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + if (*nw == 1) { + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } + } + if (cmax < smini) { + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[b_dim1 + 2], abs(d__2)); + bnorm = max(d__3, d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2, d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)); + icmax = j; + } + } + if (cmax < smini) { + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)), + d__6 = (d__3 = b[b_dim1 + 2], abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5, d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))), + d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1, d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + return 0; +} +#undef crv +#undef civ +#undef cr +#undef ci +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlanv2.cpp b/lib/linalg/dlanv2.cpp new file mode 100644 index 0000000000..29a511bf31 --- /dev/null +++ b/lib/linalg/dlanv2.cpp @@ -0,0 +1,106 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b3 = 1.; +int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, + doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + doublereal d__1, d__2; + double d_lmp_sign(doublereal *, doublereal *), sqrt(doublereal); + doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, scale, bcmax, bcmis, + sigma; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + eps = dlamch_((char *)"P", (ftnlen)1); + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + } else if (*b == 0.) { + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + } else if (*a - *d__ == 0. && d_lmp_sign(&c_b3, b) != d_lmp_sign(&c_b3, c__)) { + *cs = 1.; + *sn = 0.; + } else { + temp = *a - *d__; + p = temp * .5; + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1, d__2); + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1, d__2) * d_lmp_sign(&c_b3, b) * d_lmp_sign(&c_b3, c__); + d__1 = abs(p); + scale = max(d__1, bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + if (z__ >= eps * 4.) { + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_lmp_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + tau = dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + sigma = *b + *c__; + tau = dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_lmp_sign(&c_b3, &sigma); + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + if (*c__ != 0.) { + if (*b != 0.) { + if (d_lmp_sign(&c_b3, b) == d_lmp_sign(&c_b3, c__)) { + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_lmp_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + } + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr0.cpp b/lib/linalg/dlaqr0.cpp new file mode 100644 index 0000000000..31a265c3e9 --- /dev/null +++ b/lib/linalg/dlaqr0.cpp @@ -0,0 +1,306 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr3_(logical *, logical *, integer *, integer *, integer *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, integer *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + if (ns > nmin) { + dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr1.cpp b/lib/linalg/dlaqr1.cpp new file mode 100644 index 0000000000..292dce0f45 --- /dev/null +++ b/lib/linalg/dlaqr1.cpp @@ -0,0 +1,52 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, + doublereal *sr2, doublereal *si2, doublereal *v) +{ + integer h_dim1, h_offset; + doublereal d__1, d__2, d__3; + doublereal s, h21s, h31s; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + if (*n != 2 && *n != 3) { + return 0; + } + if (*n == 2) { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + + (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2); + } + } else { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs(d__3)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + v[3] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s) + + h__[(h_dim1 << 1) + 1] * h21s + h__[h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2) + + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - *sr2) + + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr2.cpp b/lib/linalg/dlaqr2.cpp new file mode 100644 index 0000000000..102433a90d --- /dev/null +++ b/lib/linalg/dlaqr2.cpp @@ -0,0 +1,359 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b12 = 0.; +static doublereal c_b13 = 1.; +static logical c_true = TRUE_; +int dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2; + doublereal beta; + integer kend, kcol, info, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax; + extern int dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + lwkopt = jw + max(lwk1, lwk2); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b12, &c_b13, &v[v_offset], ldv, (ftnlen)1); + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b12, &c_b12, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b13, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b12, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr3.cpp b/lib/linalg/dlaqr3.cpp new file mode 100644 index 0000000000..5711a3e349 --- /dev/null +++ b/lib/linalg/dlaqr3.cpp @@ -0,0 +1,375 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static logical c_true = TRUE_; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__12 = 12; +int dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2, lwk3; + doublereal beta; + integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal safmax; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], &si[1], &c__1, &jw, + &v[v_offset], ldv, &work[1], &c_n1, &infqr); + lwk3 = (integer)work[1]; + i__1 = jw + max(lwk1, lwk2); + lwkopt = max(i__1, lwk3); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b17, &c_b18, &v[v_offset], ldv, (ftnlen)1); + nmin = ilaenv_(&c__12, (char *)"DLAQR3", (char *)"SV", &jw, &c__1, &jw, lwork, (ftnlen)6, (ftnlen)2); + if (jw > nmin) { + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &work[1], lwork, &infqr); + } else { + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + } + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b17, &c_b17, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b18, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b17, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr4.cpp b/lib/linalg/dlaqr4.cpp new file mode 100644 index 0000000000..e32193ee2d --- /dev/null +++ b/lib/linalg/dlaqr4.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlaqr2_(logical *, logical *, integer *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), + dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr5.cpp b/lib/linalg/dlaqr5.cpp new file mode 100644 index 0000000000..1cd0ac9d88 --- /dev/null +++ b/lib/linalg/dlaqr5.cpp @@ -0,0 +1,521 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = 0.; +static doublereal c_b8 = 1.; +static integer c__3 = 3; +static integer c__1 = 1; +static integer c__2 = 2; +int dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, + integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, + integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + doublereal *v, integer *ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) +{ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, wh_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4, d__5; + integer i__, j, k, m, i2, j2, i4, j4, k1; + doublereal h11, h12, h21, h22; + integer m22, ns, nu; + doublereal vt[3], scl; + integer kdu, kms; + doublereal ulp; + integer knz, kzs; + doublereal tst1, tst2, beta; + logical blk22, bmp22; + integer mend, jcol, jlen, jbot, mbot; + doublereal swap; + integer jtop, jrow, mtop; + doublereal alpha; + logical accum; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ndcol, incol, krcol, nbmps; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + dlaqr1_(integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax, refsum; + integer mstart; + doublereal smlnum; + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + if (*nshfts < 2) { + return 0; + } + if (*ktop >= *kbot) { + return 0; + } + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } + } + ns = *nshfts - *nshfts % 2; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + accum = *kacc22 == 1 || *kacc22 == 2; + blk22 = ns > 2 && *kacc22 == 2; + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.; + } + nbmps = ns / 2; + kdu = nbmps * 6 - 3; + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : incol <= i__1; + incol += i__2) { + ndcol = incol + kdu; + if (accum) { + dlaset_((char *)"ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu, (ftnlen)3); + } + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4, i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4, i__5); + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4, i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * h_dim1] != 0. || + h__[k + 3 + (k + 2) * h_dim1] == 0.) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], vt); + alpha = vt[0]; + dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = + vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * h__[k + 2 + k * h_dim1]); + if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], abs(d__1)) + + (d__2 = refsum * vt[2], abs(d__2)) > + ulp * ((d__3 = h__[k + k * h_dim1], abs(d__3)) + + (d__4 = h__[k + 1 + (k + 1) * h_dim1], abs(d__4)) + + (d__5 = h__[k + 2 + (k + 2) * h_dim1], abs(d__5)))) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m22 << 1) - 1], + &si[(m22 << 1) - 1], &sr[m22 * 2], &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + } + } + if (accum) { + jbot = min(ndcol, *kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop, krcol); j <= i__4; ++j) { + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5, i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + + v[m * v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4, *ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + if (accum) { + jtop = max(*ktop, incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.) { + k = krcol + (m - 1) * 3; + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6, i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = + v[m * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m * v_dim1 + 2] * h__[j + (k + 2) * h_dim1] + + v[m * v_dim1 + 3] * h__[j + (k + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + if (accum) { + kms = k - incol; + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4, i__6); j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (u[j + (kms + 1) * u_dim1] + + v[m * v_dim1 + 2] * u[j + (kms + 2) * u_dim1] + + v[m * v_dim1 + 3] * u[j + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } else if (*wantz) { + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (z__[j + (k + 1) * z_dim1] + + v[m * v_dim1 + 2] * z__[j + (k + 2) * z_dim1] + + v[m * v_dim1 + 3] * z__[j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (v[m22 * v_dim1 + 1] != 0.) { + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7, i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = + v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + if (accum) { + kms = k - incol; + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5, i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (u[j + (kms + 1) * u_dim1] + + v[m22 * v_dim1 + 2] * u[j + (kms + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (z__[j + (k + 1) * z_dim1] + + v[m22 * v_dim1 + 2] * z__[j + (k + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + } + } + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5, i__7); + if (h__[k + 1 + k * h_dim1] != 0.) { + tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + + (d__2 = h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 2) { + tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 3) { + tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 2) { + tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 3) { + tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 4) { + tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], abs(d__1)); + } + } + d__2 = smlnum, d__3 = ulp * tst1; + if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max(d__2, d__3)) { + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h12 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h21 = min(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h11 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h22 = min(d__3, d__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1, d__2)) { + h__[k + 1 + k * h_dim1] = 0.; + } + } + } + } + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4, i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (!blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3, i__4); + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3, i__4) - k1 + 1; + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__4 < 0 ? jcol >= i__3 : jcol <= i__3; + jcol += i__4) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"C", (char *)"N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * u_dim1], ldu, + &h__[incol + k1 + jcol * h_dim1], ldh, &c_b7, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &nu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + k1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__4 = max(*ktop, incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = max(*ktop, incol) - jrow; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &h__[jrow + (incol + k1) * h_dim1], + ldh, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + k1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &z__[jrow + (incol + k1) * z_dim1], + ldz, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + k1) * z_dim1], ldz, (ftnlen)3); + } + } + } else { + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__3 < 0 ? jcol >= i__4 : jcol <= i__4; + jcol += i__3) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, + &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)3); + dlaset_((char *)"ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &knz, &jlen, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"C", (char *)"N", &i2, &jlen, &j2, &c_b8, &u[u_offset], ldu, + &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1], ldh, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"L", (char *)"C", (char *)"N", &j2, &jlen, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"C", (char *)"N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + (i2 + 1) * u_dim1], + ldu, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, &c_b8, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &kdu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + 1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__3 = max(incol, *ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = max(incol, *ktop) - jrow; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &h__[jrow + (incol + 1) * h_dim1], ldh, + &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &h__[jrow + (incol + 1) * h_dim1], ldh, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + 1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, + &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &z__[jrow + (incol + 1) * z_dim1], + ldz, &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &z__[jrow + (incol + 1) * z_dim1], ldz, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], + ldu, &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + 1) * z_dim1], ldz, (ftnlen)3); + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlarfx.cpp b/lib/linalg/dlarfx.cpp new file mode 100644 index 0000000000..44d73f27a9 --- /dev/null +++ b/lib/linalg/dlarfx.cpp @@ -0,0 +1,552 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlarfx_(char *side, integer *m, integer *n, doublereal *v, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work, ftnlen side_len) +{ + integer c_dim1, c_offset, i__1; + integer j; + doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7, v8, v9, t10, v10, + sum; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + if (*tau == 0.) { + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + switch (*m) { + case 1: + goto L10; + case 2: + goto L30; + case 3: + goto L50; + case 4: + goto L70; + case 5: + goto L90; + case 6: + goto L110; + case 7: + goto L130; + case 8: + goto L150; + case 9: + goto L170; + case 10: + goto L190; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L10: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; + } + goto L410; + L30: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + } + goto L410; + L50: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + } + goto L410; + L70: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + } + goto L410; + L90: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + } + goto L410; + L110: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + } + goto L410; + L130: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + } + goto L410; + L150: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + } + goto L410; + L170: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + } + goto L410; + L190: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9] + + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; + } + goto L410; + } else { + switch (*n) { + case 1: + goto L210; + case 2: + goto L230; + case 3: + goto L250; + case 4: + goto L270; + case 5: + goto L290; + case 6: + goto L310; + case 7: + goto L330; + case 8: + goto L350; + case 9: + goto L370; + case 10: + goto L390; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L210: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; + } + goto L410; + L230: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + } + goto L410; + L250: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + } + goto L410; + L270: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + } + goto L410; + L290: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + } + goto L410; + L310: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + } + goto L410; + L330: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + } + goto L410; + L350: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + } + goto L410; + L370: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + } + goto L410; + L390: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9] + + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; + } + goto L410; + } +L410: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd0.cpp b/lib/linalg/dlasd0.cpp new file mode 100644 index 0000000000..006c379fa9 --- /dev/null +++ b/lib/linalg/dlasd0.cpp @@ -0,0 +1,143 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static integer c__2 = 2; +int dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, integer *smlsiz, integer *iwork, doublereal *work, + integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + integer pow_lmp_ii(integer *, integer *); + integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern int dlasd1_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), + xerbla_(char *, integer *, ftnlen); + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + m = *n + *sqre; + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD0", &i__1, (ftnlen)6); + return 0; + } + if (*n <= *smlsiz) { + dlasdq_((char *)"U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, + &u[u_offset], ldu, &work[1], info, (ftnlen)1); + return 0; + } + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + dlasdq_((char *)"U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[nlf + nlf * vt_dim1], + ldvt, &u[nlf + nlf * u_dim1], ldu, &u[nlf + nlf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + dlasdq_((char *)"U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[nrf + nrf * vt_dim1], + ldvt, &u[nrf + nrf * u_dim1], ldu, &u[nrf + nrf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; + } + } + for (lvl = nlvl; lvl >= 1; --lvl) { + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_lmp_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * u_dim1], ldu, + &vt[nlf + nlf * vt_dim1], ldvt, &iwork[idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd1.cpp b/lib/linalg/dlasd1.cpp new file mode 100644 index 0000000000..e7b7fba747 --- /dev/null +++ b/lib/linalg/dlasd1.cpp @@ -0,0 +1,96 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; +int dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + integer *idxq, integer *iwork, doublereal *work, integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc, idxp, ldvt2; + extern int dlasd2_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), + dlasd3_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *); + integer isigma; + extern int xerbla_(char *, integer *, ftnlen); + doublereal orgnrm; + integer coltyp; + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD1", &i__1, (ftnlen)6); + return 0; + } + n = *nl + *nr + 1; + m = n + *sqre; + ldu2 = n; + ldvt2 = m; + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1, d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + *alpha /= orgnrm; + *beta /= orgnrm; + dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], ldu, &vt[vt_offset], + ldvt, &work[isigma], &work[iu2], &ldu2, &work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], + &iwork[idxc], &idxq[1], &iwork[coltyp], info); + ldq = k; + dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[u_offset], ldu, + &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ivt2], &ldvt2, &iwork[idxc], + &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + n1 = k; + n2 = n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd2.cpp b/lib/linalg/dlasd2.cpp new file mode 100644 index 0000000000..36562850e0 --- /dev/null +++ b/lib/linalg/dlasd2.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b30 = 0.; +int dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, + integer *coltyp, integer *info) +{ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + doublereal c__; + integer i__, j, m, n; + doublereal s; + integer k2; + doublereal z1; + integer ct, jp; + doublereal eps, tau, tol; + integer psm[4], nlp1, nlp2, idxi, idxj; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer ctot[4], idxjp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer jprev; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + doublereal hlftol; + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD2", &i__1, (ftnlen)6); + return 0; + } + nlp1 = *nl + 1; + nlp2 = *nl + 2; + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; + } + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; + } + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; + } + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1, d__2); + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2, tol); + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + s = z__[jprev]; + c__ = z__[j]; + tau = dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &c__1, &c__, &s); + drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; +L120: + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; + } + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); + } + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + i__1 = *k - 1; + dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + dlaset_((char *)"A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2, (ftnlen)1); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; + } + } else { + dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + if (n > *k) { + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + dlacpy_((char *)"A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu, + (ftnlen)1); + i__1 = n - *k; + dlacpy_((char *)"A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt, + (ftnlen)1); + } + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd3.cpp b/lib/linalg/dlasd3.cpp new file mode 100644 index 0000000000..745c613e08 --- /dev/null +++ b/lib/linalg/dlasd3.cpp @@ -0,0 +1,218 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b13 = 1.; +static doublereal c_b26 = 0.; +int dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, + integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, + integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, + vt2_offset, i__1, i__2; + doublereal d__1, d__2; + double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *); + integer i__, j, m, n, jc; + doublereal rho; + integer nlp1, nlp2, nrp1; + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ctemp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer ktemp; + extern doublereal dlamc3_(doublereal *, doublereal *); + extern int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD3", &i__1, (ftnlen)6); + return 0; + } + if (*k == 1) { + d__[1] = abs(z__[1]); + dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; + } + } + return 0; + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; + } + dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_((char *)"G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info, (ftnlen)1); + rho *= rho; + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], + info); + if (*info != 0) { + return 0; + } + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[i__] - dsigma[j]) / + (dsigma[i__] + dsigma[j]); + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / + (dsigma[i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_lmp_sign(&d__2, &q[i__ + q_dim1]); + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; + } + temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + goto L100; + } + if (ctot[1] > 0) { + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], + ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } else { + dlacpy_((char *)"F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu, (ftnlen)1); + } + dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + dgemm_((char *)"N", (char *)"N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], + ldq, &c_b26, &u[nlp2 + u_dim1], ldu, (ftnlen)1, (ftnlen)1); +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset], ldvt2, &c_b26, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + return 0; + } + ktemp = ctot[1] + 1; + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[vt2_dim1 + 1], ldvt2, + &c_b26, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + } + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; + } + } + ctemp = ctot[2] + 1 + ctot[3]; + dgemm_((char *)"N", (char *)"N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt, (ftnlen)1, + (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasy2.cpp b/lib/linalg/dlasy2.cpp new file mode 100644 index 0000000000..94e9ed0e7c --- /dev/null +++ b/lib/linalg/dlasy2.cpp @@ -0,0 +1,284 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__4 = 4; +static integer c__1 = 1; +static integer c__16 = 16; +static integer c__0 = 0; +int dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, + doublereal *tl, integer *ldtl, doublereal *tr, integer *ldtr, doublereal *b, + integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, + integer *info) +{ + static integer locu12[4] = {3, 4, 1, 2}; + static integer locl21[4] = {2, 1, 4, 3}; + static integer locu22[4] = {4, 3, 2, 1}; + static logical xswpiv[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical bswpiv[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + integer i__, j, k; + doublereal x2[2], l21, u11, u12; + integer ip, jp; + doublereal u22, t16[16], gam, bet, eps, sgn, tmp[4], tau1, btmp[4], smin; + integer ipiv; + doublereal temp; + integer jpiv[4]; + doublereal xmax; + integer ipsv, jpsv; + logical bswap; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical xswap; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal smlnum; + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + *info = 0; + if (*n1 == 0 || *n2 == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + sgn = (doublereal)(*isgn); + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + case 4: + goto L50; + } +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = abs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + *scale = 1.; + gam = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (smlnum * gam > bet) { + *scale = 1. / gam; + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + return 0; +L20: + d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tr[(tr_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tr[tr_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tr[(tr_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; +L30: + d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tl[(tl_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tl[tl_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tl[(tl_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + ipiv = idamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (abs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (abs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.; + if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > abs(u11)) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); + *scale = .5 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } else { + x[x_dim1 + 2] = x2[1]; + d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2], abs(d__2)); + *xnorm = max(d__3, d__4); + } + return 0; +L50: + d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << 1) + 1], abs(d__2)), + d__5 = max(d__5, d__6), d__6 = (d__3 = tr[tr_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tr[(tr_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, d__6), + d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = max(d__5, d__6), + d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__1 = eps * smin; + smin = max(d__1, smlnum); + btmp[0] = 0.; + dcopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { + xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); + ipsv = ip; + jpsv = jp; + } + } + } + if (ipsv != i__) { + dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (k << 2) - 5]; + } + } + } + if (abs(t16[15]) < smin) { + *info = 1; + t16[15] = smin; + } + *scale = 1.; + if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) > abs(t16[5]) || + smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1, d__2), d__2 = abs(btmp[2]), + d__1 = max(d__1, d__2), d__2 = abs(btmp[3]); + *scale = .125 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1. / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; + } + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; + d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); + *xnorm = max(d__1, d__2); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasyf.cpp b/lib/linalg/dlasyf.cpp new file mode 100644 index 0000000000..aaafd1a88f --- /dev/null +++ b/lib/linalg/dlasyf.cpp @@ -0,0 +1,337 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b8 = -1.; +static doublereal c_b9 = 1.; +int dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, + integer *ipiv, doublereal *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer j, k; + doublereal t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + absakk = (d__1 = w[k + kw * w_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kk - 1 - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + dcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + dswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d21 = w[k - 1 + kw * w_dim1]; + d11 = w[k + kw * w_dim1] / d21; + d22 = w[k - 1 + (kw - 1) * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + a[j + (k - 1) * a_dim1] = + d21 * (d11 * w[j + (kw - 1) * w_dim1] - w[j + kw * w_dim1]); + a[j + k * a_dim1] = + d21 * (d22 * w[j + kw * w_dim1] - w[j + (kw - 1) * w_dim1]); + } + } + a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1]; + a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1]; + a[k + k * a_dim1] = w[k + kw * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + dgemv_((char *)"No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b9, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &c_b8, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b9, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b9, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + absakk = (d__1 = w[k + k * w_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + colmax = (d__1 = w[imax + k * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b9, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kp - kk - 1; + dcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + dswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = w[k + 1 + k * w_dim1]; + d11 = w[k + 1 + (k + 1) * w_dim1] / d21; + d22 = w[k + k * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + a[j + k * a_dim1] = + d21 * (d11 * w[j + k * w_dim1] - w[j + (k + 1) * w_dim1]); + a[j + (k + 1) * a_dim1] = + d21 * (d22 * w[j + (k + 1) * w_dim1] - w[j + k * w_dim1]); + } + } + a[k + k * a_dim1] = w[k + k * w_dim1]; + a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1]; + a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + dgemv_((char *)"No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b9, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &c_b8, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b9, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dorghr.cpp b/lib/linalg/dorghr.cpp new file mode 100644 index 0000000000..80ffa7dbc5 --- /dev/null +++ b/lib/linalg/dorghr.cpp @@ -0,0 +1,94 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +int dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, nb, nh, iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, nh) && !lquery) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DORGQR", (char *)" ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1, nh) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGHR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + work[1] = 1.; + return 0; + } + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + if (nh > 0) { + dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*ilo], &work[1], lwork, + &iinfo); + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dormhr.cpp b/lib/linalg/dormhr.cpp new file mode 100644 index 0000000000..9cb0cd6690 --- /dev/null +++ b/lib/linalg/dormhr.cpp @@ -0,0 +1,111 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen trans_len) +{ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i1, i2, nb, mi, nh, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (!left && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1, nq)) { + *info = -5; + } else if (*ihi < min(*ilo, nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1, nq)) { + *info = -8; + } else if (*ldc < max(1, *m)) { + *info = -11; + } else if (*lwork < max(1, nw) && !lquery) { + *info = -13; + } + if (*info == 0) { + if (left) { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } else { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } + lwkopt = max(1, nw) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DORMHR", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.; + return 0; + } + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &tau[*ilo], + &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyconv.cpp b/lib/linalg/dsyconv.cpp new file mode 100644 index 0000000000..9d4a2908ae --- /dev/null +++ b/lib/linalg/dsyconv.cpp @@ -0,0 +1,199 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dsyconv_(char *uplo, char *way, integer *n, doublereal *a, integer *lda, integer *ipiv, + doublereal *e, integer *info, ftnlen uplo_len, ftnlen way_len) +{ + integer a_dim1, a_offset, i__1; + integer i__, j, ip; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical convert; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --e; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + convert = lsame_(way, (char *)"C", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!convert && !lsame_(way, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYCONV", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + if (convert) { + i__ = *n; + e[1] = 0.; + while (i__ > 1) { + if (ipiv[i__] < 0) { + e[i__] = a[i__ - 1 + i__ * a_dim1]; + e[i__ - 1] = 0.; + a[i__ - 1 + i__ * a_dim1] = 0.; + --i__; + } else { + e[i__] = 0.; + } + --i__; + } + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + --i__; + } + --i__; + } + } else { + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + ++i__; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + } + ++i__; + } + i__ = *n; + while (i__ > 1) { + if (ipiv[i__] < 0) { + a[i__ - 1 + i__ * a_dim1] = e[i__]; + --i__; + } + --i__; + } + } + } else { + if (convert) { + i__ = 1; + e[*n] = 0.; + while (i__ <= *n) { + if (i__ < *n && ipiv[i__] < 0) { + e[i__] = a[i__ + 1 + i__ * a_dim1]; + e[i__ + 1] = 0.; + a[i__ + 1 + i__ * a_dim1] = 0.; + ++i__; + } else { + e[i__] = 0.; + } + ++i__; + } + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = temp; + } + } + ++i__; + } + ++i__; + } + } else { + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + --i__; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } + --i__; + } + i__ = 1; + while (i__ <= *n - 1) { + if (ipiv[i__] < 0) { + a[i__ + 1 + i__ * a_dim1] = e[i__]; + ++i__; + } + ++i__; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyr.cpp b/lib/linalg/dsyr.cpp new file mode 100644 index 0000000000..6806baea29 --- /dev/null +++ b/lib/linalg/dsyr.cpp @@ -0,0 +1,167 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +int dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, + integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"DSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +int dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dsytrs2_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + lquery = *lwork == -1; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } else if (*lwork < 1 && !lquery) { + *info = -10; + } + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, info, (ftnlen)1); + lwkopt = (integer)work[1]; + } + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYSV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info, (ftnlen)1); + if (*info == 0) { + if (*lwork < *n) { + dsytrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info, (ftnlen)1); + } else { + dsytrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, &work[1], info, + (ftnlen)1); + } + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytf2.cpp b/lib/linalg/dsytf2.cpp new file mode 100644 index 0000000000..8b48de1da4 --- /dev/null +++ b/lib/linalg/dsytf2.cpp @@ -0,0 +1,246 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dsytf2_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer i__, j, k; + doublereal t, r1, d11, d12, d21, d22; + integer kk, kp; + doublereal wk, wkm1, wkp1; + integer imax, jmax; + extern int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + logical upper; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + dswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k - 1 + k * a_dim1]; + a[k - 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + d__1 = -r1; + dsyr_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d12 = a[k - 1 + k * a_dim1]; + d22 = a[k - 1 + (k - 1) * a_dim1] / d12; + d11 = a[k + k * a_dim1] / d12; + t = 1. / (d11 * d22 - 1.); + d12 = t / d12; + for (j = k - 2; j >= 1; --j) { + wkm1 = d12 * (d11 * a[j + (k - 1) * a_dim1] - a[j + k * a_dim1]); + wk = d12 * (d22 * a[j + k * a_dim1] - a[j + (k - 1) * a_dim1]); + for (i__ = j; i__ >= 1; --i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k - 1) * a_dim1] * wkm1; + } + a[j + k * a_dim1] = wk; + a[j + (k - 1) * a_dim1] = wkm1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &a[imax + k * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + dswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k + 1 + k * a_dim1]; + a[k + 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + if (k < *n) { + d11 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + d__1 = -d11; + dsyr_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + dscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = a[k + 1 + k * a_dim1]; + d11 = a[k + 1 + (k + 1) * a_dim1] / d21; + d22 = a[k + k * a_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + wk = d21 * (d11 * a[j + k * a_dim1] - a[j + (k + 1) * a_dim1]); + wkp1 = d21 * (d22 * a[j + (k + 1) * a_dim1] - a[j + k * a_dim1]); + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k + 1) * a_dim1] * wkp1; + } + a[j + k * a_dim1] = wk; + a[j + (k + 1) * a_dim1] = wkp1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrf.cpp b/lib/linalg/dsytrf.cpp new file mode 100644 index 0000000000..6bfc84ab87 --- /dev/null +++ b/lib/linalg/dsytrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dsytrf_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, + integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int dsytf2_(char *, integer *, doublereal *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlasyf_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + dlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], &ldwork, &iinfo, + (ftnlen)1); + } else { + dsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + dlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], &ldwork, + &iinfo, (ftnlen)1); + } else { + i__1 = *n - k + 1; + dsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs.cpp b/lib/linalg/dsytrs.cpp new file mode 100644 index 0000000000..c9f849879b --- /dev/null +++ b/lib/linalg/dsytrs.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b19 = 1.; +int dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1; + extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L30; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + --k; + } else { + kp = -ipiv[k]; + if (kp != k - 1) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k - 1 + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + akm1k = a[k - 1 + k * a_dim1]; + akm1 = a[k - 1 + (k - 1) * a_dim1] / akm1k; + ak = a[k + k * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k - 1 + j * b_dim1] / akm1k; + bk = b[k + j * b_dim1] / akm1k; + b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += -2; + } + goto L10; + L30: + k = 1; + L40: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[(k + 1) * a_dim1 + 1], + &c__1, &c_b19, &b[k + 1 + b_dim1], ldb, (ftnlen)9); + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + goto L40; + L50:; + } else { + k = 1; + L60: + if (k > *n) { + goto L80; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n) { + i__1 = *n - k; + dger_(&i__1, nrhs, &c_b7, &a[k + 1 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 1 + b_dim1], ldb); + } + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + ++k; + } else { + kp = -ipiv[k]; + if (kp != k + 1) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n - 1) { + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 2 + b_dim1], ldb); + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + (k + 1) * a_dim1], &c__1, &b[k + 1 + b_dim1], + ldb, &b[k + 2 + b_dim1], ldb); + } + akm1k = a[k + 1 + k * a_dim1]; + akm1 = a[k + k * a_dim1] / akm1k; + ak = a[k + 1 + (k + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k + j * b_dim1] / akm1k; + bk = b[k + 1 + j * b_dim1] / akm1k; + b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += 2; + } + goto L60; + L80: + k = *n; + L90: + if (k < 1) { + goto L100; + } + if (ipiv[k] > 0) { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + } + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b19, &b[k - 1 + b_dim1], ldb, + (ftnlen)9); + } + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + goto L90; + L100:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs2.cpp b/lib/linalg/dsytrs2.cpp new file mode 100644 index 0000000000..2d2bc90525 --- /dev/null +++ b/lib/linalg/dsytrs2.cpp @@ -0,0 +1,180 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b10 = 1.; +int dsytrs2_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer i__, j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1, akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + integer iinfo; + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + dsyconv_(char *, char *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + dsyconv_(uplo, (char *)"C", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + if (upper) { + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + dtrsm_((char *)"L", (char *)"U", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else if (i__ > 1) { + if (ipiv[i__ - 1] == ipiv[i__]) { + akm1k = work[i__]; + akm1 = a[i__ - 1 + (i__ - 1) * a_dim1] / akm1k; + ak = a[i__ + i__ * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ - 1 + j * b_dim1] / akm1k; + bk = b[i__ + j * b_dim1] / akm1k; + b[i__ - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + --i__; + } + } + --i__; + } + dtrsm_((char *)"L", (char *)"U", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k]; + if (k < *n && kp == -ipiv[k + 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + } else { + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k + 1]; + if (kp == -ipiv[k]) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else { + akm1k = work[i__]; + akm1 = a[i__ + i__ * a_dim1] / akm1k; + ak = a[i__ + 1 + (i__ + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ + j * b_dim1] / akm1k; + bk = b[i__ + 1 + j * b_dim1] / akm1k; + b[i__ + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + ++i__; + } + ++i__; + } + dtrsm_((char *)"L", (char *)"L", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (k > 1 && kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + } + dsyconv_(uplo, (char *)"R", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrevc3.cpp b/lib/linalg/dtrevc3.cpp new file mode 100644 index 0000000000..bd1a0a379e --- /dev/null +++ b/lib/linalg/dtrevc3.cpp @@ -0,0 +1,858 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static logical c_false = FALSE_; +static doublereal c_b29 = 1.; +static logical c_true = TRUE_; +int dtrevc3_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen howmny_len) +{ + address a__1[2]; + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1[2], i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + double sqrt(doublereal); + integer i__, j, k; + doublereal x[4]; + integer j1, j2, iscomplex[128], nb, ii, ki, ip, is, iv; + doublereal wi, wr; + integer ki2; + doublereal rec, ulp, beta, emax; + logical pair; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); + logical allv; + integer ierr; + doublereal unfl, ovfl, smin; + logical over; + doublereal vmax; + integer jnxt; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + doublereal scale; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); + doublereal remax; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv, bothv; + extern int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); + doublereal vcrit; + logical somev; + doublereal xnorm; + extern int dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal bignum; + logical rightv; + integer maxwrk; + doublereal smlnum; + logical lquery; + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + bothv = lsame_(side, (char *)"B", (ftnlen)1, (ftnlen)1); + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1) || bothv; + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) || bothv; + allv = lsame_(howmny, (char *)"A", (ftnlen)1, (ftnlen)1); + over = lsame_(howmny, (char *)"B", (ftnlen)1, (ftnlen)1); + somev = lsame_(howmny, (char *)"S", (ftnlen)1, (ftnlen)1); + *info = 0; + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = howmny; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DTREVC", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = *n + (*n << 1) * nb; + work[1] = (doublereal)maxwrk; + lquery = *lwork == -1; + if (!rightv && !leftv) { + *info = -1; + } else if (!allv && !over && !somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1, *n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + i__2 = 1, i__3 = *n * 3; + if (*lwork < max(i__2, i__3) && !lquery) { + *info = -14; + } else { + if (somev) { + *m = 0; + pair = FALSE_; + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } + } + } else { + *m = *n; + } + if (*mm < *m) { + *info = -11; + } + } + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DTREVC3", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + if (over && *lwork >= *n + (*n << 4)) { + nb = (*lwork - *n) / (*n << 1); + nb = min(nb, 128); + i__2 = (nb << 1) + 1; + dlaset_((char *)"F", n, &i__2, &c_b17, &c_b17, &work[1], n, (ftnlen)1); + } else { + nb = 1; + } + unfl = dlamch_((char *)"Safe minimum", (ftnlen)12); + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + work[1] = 0.; + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + work[j] = 0.; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); + } + } + if (rightv) { + iv = 2; + if (nb > 2) { + iv = nb; + } + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + if (ip == -1) { + ip = 1; + goto L140; + } else if (ki == 1) { + ip = 0; + } else if (t[ki + (ki - 1) * t_dim1] == 0.) { + ip = 0; + } else { + ip = -1; + } + if (somev) { + if (ip == 0) { + if (!select[ki]) { + goto L140; + } + } else { + if (!select[ki - 1]) { + goto L140; + } + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + work[k + iv * *n] = -t[k + ki * t_dim1]; + } + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + iv * *n] = x[0]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + iv * *n], n, &wr, &c_b17, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + iv * *n] = x[0]; + work[j + iv * *n] = x[1]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L60:; + } + if (!over) { + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 1) { + i__2 = ki - 1; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= + (d__2 = t[ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + (iv - 1) * *n] = 1.; + work[ki + iv * *n] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + (iv - 1) * *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + iv * *n] = 1.; + } + work[ki + (iv - 1) * *n] = 0.; + work[ki - 1 + iv * *n] = 0.; + i__2 = ki - 2; + for (k = 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = + -work[ki - 1 + (iv - 1) * *n] * t[k + (ki - 1) * t_dim1]; + work[k + iv * *n] = -work[ki + iv * *n] * t[k + ki * t_dim1]; + } + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + (iv - 1) * *n], n, &wr, &wi, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + (iv - 1) * *n] = x[0]; + work[j + iv * *n] = x[2]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 1; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + (iv - 1) * *n], n, &wr, &wi, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + (iv - 1) * *n] = x[0]; + work[j + (iv - 1) * *n] = x[1]; + work[j - 1 + iv * *n] = x[2]; + work[j + iv * *n] = x[3]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[(iv - 1) * *n + 1], &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[3]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L90:; + } + if (!over) { + dcopy_(&ki, &work[(iv - 1) * *n + 1], &c__1, &vr[(is - 1) * vr_dim1 + 1], + &c__1); + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + emax = 0.; + i__2 = ki; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + is * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 2) { + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, + &work[(iv - 1) * *n + 1], &c__1, &work[ki - 1 + (iv - 1) * *n], + &vr[(ki - 1) * vr_dim1 + 1], &c__1, (ftnlen)1); + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki - 1 + (iv - 1) * *n], &vr[(ki - 1) * vr_dim1 + 1], + &c__1); + dscal_(n, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1); + } + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + ki * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = 0.; + work[k + iv * *n] = 0.; + } + iscomplex[iv - 2] = -ip; + iscomplex[iv - 1] = ip; + --iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki - 1; + } + if (iv <= 2 || ki2 == 1) { + i__2 = nb - iv + 1; + i__3 = ki2 + nb - iv; + dgemm_((char *)"N", (char *)"N", n, &i__2, &i__3, &c_b29, &vr[vr_offset], ldvr, + &work[iv * *n + 1], n, &c_b17, &work[(nb + iv) * *n + 1], n, (ftnlen)1, + (ftnlen)1); + i__2 = nb; + for (k = iv; k <= i__2; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__3 = *n; + for (ii = 1; ii <= i__3; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + i__2 = nb - iv + 1; + dlacpy_((char *)"F", n, &i__2, &work[(nb + iv) * *n + 1], n, &vr[ki2 * vr_dim1 + 1], + ldvr, (ftnlen)1); + iv = nb; + } else { + --iv; + } + } + --is; + if (ip != 0) { + --is; + } + L140:; + } + } + if (leftv) { + iv = 1; + ip = 0; + is = 1; + i__2 = *n; + for (ki = 1; ki <= i__2; ++ki) { + if (ip == 1) { + ip = -1; + goto L260; + } else if (ki == *n) { + ip = 0; + } else if (t[ki + 1 + ki * t_dim1] == 0.) { + ip = 0; + } else { + ip = 1; + } + if (somev) { + if (!select[ki]) { + goto L260; + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__3 = *n; + for (k = ki + 1; k <= i__3; ++k) { + work[k + iv * *n] = -t[ki + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 1; + i__3 = *n; + for (j = ki + 1; j <= i__3; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + d__2 = (d__1 = work[j + iv * *n], abs(d__1)); + vmax = max(d__2, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + i__4 = j - ki - 1; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 1 + (j + 1) * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + 1 + iv * *n] = x[1]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + 1 + iv * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } + L170:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + ii = idamax_(&i__3, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n) { + i__3 = *n - ki; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 1) * vl_dim1 + 1], ldvl, + &work[ki + 1 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= + (d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + iv * *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + (iv + 1) * *n] = 1.; + } else { + work[ki + iv * *n] = 1.; + work[ki + 1 + (iv + 1) * *n] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + iv * *n] = 0.; + work[ki + (iv + 1) * *n] = 0.; + i__3 = *n; + for (k = ki + 2; k <= i__3; ++k) { + work[k + iv * *n] = -work[ki + iv * *n] * t[ki + k * t_dim1]; + work[k + (iv + 1) * *n] = + -work[ki + 1 + (iv + 1) * *n] * t[ki + 1 + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 2; + i__3 = *n; + for (j = ki + 2; j <= i__3; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + (iv + 1) * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + (iv + 1) * *n] -= + ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + work[j + 1 + iv * *n] = x[1]; + work[j + 1 + (iv + 1) * *n] = x[3]; + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, d__2), + d__2 = abs(x[1]), d__1 = max(d__1, d__2), d__2 = abs(x[3]), + d__1 = max(d__1, d__2); + vmax = max(d__1, vmax); + vcrit = bignum / vmax; + } + L200:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + (iv + 1) * *n], &c__1, &vl[ki + (is + 1) * vl_dim1], + &c__1); + emax = 0.; + i__3 = *n; + for (k = ki; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (is + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n - 1) { + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + (iv + 1) * *n], &c__1, &work[ki + 1 + (iv + 1) * *n], + &vl[(ki + 1) * vl_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki + iv * *n], &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &work[ki + 1 + (iv + 1) * *n], &vl[(ki + 1) * vl_dim1 + 1], + &c__1); + } + emax = 0.; + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (ki + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + work[k + (iv + 1) * *n] = 0.; + } + iscomplex[iv - 1] = ip; + iscomplex[iv] = -ip; + ++iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki + 1; + } + if (iv >= nb - 1 || ki2 == *n) { + i__3 = *n - ki2 + iv; + dgemm_((char *)"N", (char *)"N", n, &iv, &i__3, &c_b29, &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, + &work[ki2 - iv + 1 + *n], n, &c_b17, &work[(nb + 1) * *n + 1], n, + (ftnlen)1, (ftnlen)1); + i__3 = iv; + for (k = 1; k <= i__3; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__4 = *n; + for (ii = 1; ii <= i__4; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + dlacpy_((char *)"F", n, &iv, &work[(nb + 1) * *n + 1], n, + &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, (ftnlen)1); + iv = 1; + } else { + ++iv; + } + } + ++is; + if (ip != 0) { + ++is; + } + L260:; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrexc.cpp b/lib/linalg/dtrexc.cpp new file mode 100644 index 0000000000..07568d6ed2 --- /dev/null +++ b/lib/linalg/dtrexc.cpp @@ -0,0 +1,217 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__2 = 2; +int dtrexc_(char *compq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *ifst, integer *ilst, doublereal *work, integer *info, ftnlen compq_len) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + integer nbf, nbl, here; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical wantq; + extern int dlaexc_(logical *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + integer nbnext; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + wantq = lsame_(compq, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantq && !lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1, *n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1, *n)) { + *info = -6; + } else if ((*ifst < 1 || *ifst > *n) && *n > 0) { + *info = -7; + } else if ((*ilst < 1 || *ilst > *n) && *n > 0) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTREXC", &i__1, (ftnlen)6); + return 0; + } + if (*n <= 1) { + return 0; + } + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { + nbf = 2; + } + } + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { + nbl = 2; + } + } + if (*ifst == *ilst) { + return 0; + } + if (*ifst < *ilst) { + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + here = *ifst; + L10: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { + nbnext = 2; + } + } + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbf, &nbnext, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + ++here; + } else { + if (t[here + 2 + (here + 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + } else { + here = *ifst; + L20: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &nbf, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbnext, &c__1, + &work[1], info); + --here; + } else { + if (t[here + (here - 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__2, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrtrs.cpp b/lib/linalg/dtrtrs.cpp new file mode 100644 index 0000000000..3ef3eac882 --- /dev/null +++ b/lib/linalg/dtrtrs.cpp @@ -0,0 +1,65 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b12 = 1.; +int dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, + integer *lda, doublereal *b, integer *ldb, integer *info, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1, *n)) { + *info = -7; + } else if (*ldb < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTRTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } + } + } + *info = 0; + dtrsm_((char *)"Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[b_offset], ldb, + (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/izamax.cpp b/lib/linalg/izamax.cpp new file mode 100644 index 0000000000..1aebf6ac52 --- /dev/null +++ b/lib/linalg/izamax.cpp @@ -0,0 +1,46 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +integer izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + integer ret_val, i__1; + integer i__, ix; + doublereal dmax__; + extern doublereal dcabs1_(doublecomplex *); + --zx; + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + dmax__ = dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[i__]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[i__]); + } + } + } else { + ix = 1; + dmax__ = dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[ix]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[ix]); + } + ix += *incx; + } + } + return ret_val; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zcop.cpp b/lib/linalg/zcop.cpp new file mode 100644 index 0000000000..4ec6ae0b78 --- /dev/null +++ b/lib/linalg/zcop.cpp @@ -0,0 +1,43 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zcopy_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + integer i__1, i__2, i__3; + integer i__, ix, iy; + --zy; + --zx; + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zdotu.cpp b/lib/linalg/zdotu.cpp new file mode 100644 index 0000000000..1b284d12c6 --- /dev/null +++ b/lib/linalg/zdotu.cpp @@ -0,0 +1,55 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +VOID zdotu_(doublecomplex *ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, + integer *incy) +{ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + integer i__, ix, iy; + doublecomplex ztemp; + --zy; + --zx; + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; + } + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf.cpp b/lib/linalg/zgetrf.cpp new file mode 100644 index 0000000000..5fb9182b87 --- /dev/null +++ b/lib/linalg/zgetrf.cpp @@ -0,0 +1,90 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zgetrf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, iinfo; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *), + zgetrf2_(integer *, integer *, doublecomplex *, integer *, integer *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF", &i__1, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= min(*m, *n)) { + zgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + i__1 = min(*m, *n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = min(*m, *n) - j + 1; + jb = min(i__3, nb); + i__3 = *m - j + 1; + zgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4, i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; + } + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (j + jb <= *m) { + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &z__1, + &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf2.cpp b/lib/linalg/zgetrf2.cpp new file mode 100644 index 0000000000..805b5810bc --- /dev/null +++ b/lib/linalg/zgetrf2.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zgetrf2_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + double z_lmp_abs(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, n1, n2; + doublecomplex temp; + integer iinfo; + doublereal sfmin; + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen); + extern doublereal dlamch_(char *, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + extern integer izamax_(integer *, doublecomplex *, integer *); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF2", &i__1, (ftnlen)7); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + ipiv[1] = 1; + i__1 = a_dim1 + 1; + if (a[i__1].r == 0. && a[i__1].i == 0.) { + *info = 1; + } + } else if (*n == 1) { + sfmin = dlamch_((char *)"S", (ftnlen)1); + i__ = izamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + i__1 = i__ + a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0.) { + if (i__ != 1) { + i__1 = a_dim1 + 1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = a_dim1 + 1; + i__2 = i__ + a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = i__ + a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + if (z_lmp_abs(&a[a_dim1 + 1]) >= sfmin) { + i__1 = *m - 1; + z_lmp_div(&z__1, &c_b1, &a[a_dim1 + 1]); + zscal_(&i__1, &z__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + 1 + a_dim1; + z_lmp_div(&z__1, &a[i__ + 1 + a_dim1], &a[a_dim1 + 1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } else { + *info = 1; + } + } else { + n1 = min(*m, *n) / 2; + n2 = *n - n1; + zgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + zlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1); + ztrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b1, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &z__1, &a[n1 + 1 + a_dim1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, &c_b1, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + zgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = min(*m, *n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; + } + i__1 = n1 + 1; + i__2 = min(*m, *n); + zlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetri.cpp b/lib/linalg/zgetri.cpp new file mode 100644 index 0000000000..a61e931cb4 --- /dev/null +++ b/lib/linalg/zgetri.cpp @@ -0,0 +1,132 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zgetri_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, + integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, jj, jp, nn, iws, nbmin; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern int ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, + ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + nb = ilaenv_(&c__1, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1, *n)) { + *info = -3; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8); + if (*info > 0) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + i__1 = ldwork * nb; + iws = max(i__1, 1); + if (*lwork < iws) { + nb = *lwork / ldwork; + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = *n; + } + if (nb < nbmin || nb >= *n) { + for (j = *n; j >= 1; --j) { + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + j * a_dim1; + work[i__2].r = a[i__3].r, work[i__2].i = a[i__3].i; + i__2 = i__ + j * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; + } + if (j < *n) { + i__1 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", n, &i__1, &z__1, &a[(j + 1) * a_dim1 + 1], lda, &work[j + 1], + &c__1, &c_b2, &a[j * a_dim1 + 1], &c__1, (ftnlen)12); + } + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = nb, i__3 = *n - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + i__4 = i__ + (jj - j) * ldwork; + i__5 = i__ + jj * a_dim1; + work[i__4].r = a[i__5].r, work[i__4].i = a[i__5].i; + i__4 = i__ + jj * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; + } + } + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &z__1, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b2, + &a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12); + } + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b2, &work[j], &ldwork, + &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + } + } + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + zswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } + } + work[1].r = (doublereal)iws, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegs2.cpp b/lib/linalg/zhegs2.cpp new file mode 100644 index 0000000000..685f548c61 --- /dev/null +++ b/lib/linalg/zhegs2.cpp @@ -0,0 +1,197 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zhegs2_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + doublecomplex z__1; + integer k; + doublecomplex ct; + doublereal akk, bkk; + extern int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), + ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGS2", &i__1, (ftnlen)6); + return 0; + } + if (*itype == 1) { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + ztrsv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, + &b[k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)19, (ftnlen)8); + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + ztrsv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[k + 1 + (k + 1) * b_dim1], + ldb, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } + } + } + } else { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + ztrmv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * b_dim1 + 1], &c__1, + &a[a_offset], lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k - 1; + ztrmv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)19, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &a[a_offset], + lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegst.cpp b/lib/linalg/zhegst.cpp new file mode 100644 index 0000000000..8c9d9434cb --- /dev/null +++ b/lib/linalg/zhegst.cpp @@ -0,0 +1,195 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {.5, 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b18 = 1.; +int zhegst_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublecomplex z__1; + integer k, kb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + zhegs2_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen), + zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGST", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zhegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + } else { + if (*itype == 1) { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"C", &i__3, &kb, &z__1, &a[k + (k + kb) * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + kb + k * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"N", &i__3, &kb, &z__1, &a[k + kb + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, &b[b_offset], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"N", &i__3, &kb, &c_b1, &a[k * a_dim1 + 1], lda, + &b[k * b_dim1 + 1], ldb, &c_b18, &a[a_offset], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"C", &i__3, &kb, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, + &c_b18, &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegv.cpp b/lib/linalg/zhegv.cpp new file mode 100644 index 0000000000..9d85be5132 --- /dev/null +++ b/lib/linalg/zhegv.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zhegv_(integer *itype, char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + integer nb, neig; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zheev_(char *, char *, integer *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, ftnlen, ftnlen); + char trans[1]; + logical upper, wantz; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zhegst_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int zpotrf_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + --rwork; + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!(wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (!(upper || lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1, *n)) { + *info = -6; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = max(i__1, i__2); + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < max(i__1, i__2) && !lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + zpotrf_(uplo, n, &b[b_offset], ldb, info, (ftnlen)1); + if (*info != 0) { + *info = *n + *info; + return 0; + } + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + zheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1], info, (ftnlen)1, + (ftnlen)1); + if (wantz) { + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + ztrsm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } else if (*itype == 3) { + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + ztrmm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } + } + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhemm.cpp b/lib/linalg/zhemm.cpp new file mode 100644 index 0000000000..3237e16c2c --- /dev/null +++ b/lib/linalg/zhemm.cpp @@ -0,0 +1,271 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zhemm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *c__, + integer *ldc, ftnlen side_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldb < max(1, *m)) { + info = 9; + } else if (*ldc < max(1, *m)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"ZHEMM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0 || + alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + j * c_dim1; + i__5 = k + j * c_dim1; + i__6 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, + z__2.i = temp1.r * a[i__6].i + temp1.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + i__4 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__4].r * z__3.r - b[i__4].i * z__3.i, + z__2.i = b[i__4].r * z__3.i + b[i__4].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__3.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + i__ * a_dim1; + d__1 = a[i__5].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + j * c_dim1; + i__4 = k + j * c_dim1; + i__5 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + i__3 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__3].r * z__3.r - b[i__3].i * z__3.i, + z__2.i = b[i__3].r * z__3.i + b[i__3].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__2 = i__ + j * c_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } else { + i__2 = i__ + j * c_dim1; + i__3 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3].i, + z__3.i = beta->r * c__[i__3].i + beta->i * c__[i__3].r; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1 * alpha->r, z__1.i = d__1 * alpha->i; + temp1.r = z__1.r, temp1.i = z__1.i; + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, + z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__2.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + j * b_dim1; + z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, + z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zher.cpp b/lib/linalg/zher.cpp new file mode 100644 index 0000000000..6514a72f65 --- /dev/null +++ b/lib/linalg/zher.cpp @@ -0,0 +1,187 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zher_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZHER ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zherk.cpp b/lib/linalg/zherk.cpp new file mode 100644 index 0000000000..efae201bfa --- /dev/null +++ b/lib/linalg/zherk.cpp @@ -0,0 +1,325 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zherk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, + integer *lda, doublereal *beta, doublecomplex *c__, integer *ldc, ftnlen uplo_len, + ftnlen trans_len) +{ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, l, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + doublereal rtemp; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldc < max(1, *n)) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZHERK ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + return 0; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetf2.cpp b/lib/linalg/zhetf2.cpp new file mode 100644 index 0000000000..c960a63bc1 --- /dev/null +++ b/lib/linalg/zhetf2.cpp @@ -0,0 +1,439 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int zhetf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer i__, j, k; + doublecomplex t; + doublereal r1, d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer kk, kp; + doublecomplex wk; + doublereal tt; + doublecomplex wkm1, wkp1; + integer imax, jmax; + extern int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k - 1 + (k - 1) * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d11 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + d__ = tt / d__; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_lmp_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__3].r * z__4.r - a[i__3].i * z__4.i, + z__3.i = a[i__3].r * z__4.i + a[i__3].i * z__4.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkm1); + z__5.r = a[i__4].r * z__6.r - a[i__4].i * z__6.i, + z__5.i = a[i__4].r * z__6.i + a[i__4].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L50: + if (k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + d__ = tt / d__; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_lmp_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__5].r * z__4.r - a[i__5].i * z__4.i, + z__3.i = a[i__5].r * z__4.i + a[i__5].i * z__4.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkp1); + z__5.r = a[i__6].r * z__6.r - a[i__6].i * z__6.i, + z__5.i = a[i__6].r * z__6.i + a[i__6].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L50; + } +L90: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetrf.cpp b/lib/linalg/zhetrf.cpp new file mode 100644 index 0000000000..cb60ff4b7b --- /dev/null +++ b/lib/linalg/zhetrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zhetrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zhetf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + zlahef_(char *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlahef_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zhetf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlahef_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zhetf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetri.cpp b/lib/linalg/zhetri.cpp new file mode 100644 index 0000000000..020b4ce52b --- /dev/null +++ b/lib/linalg/zhetri.cpp @@ -0,0 +1,319 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zhetri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2; + double z_lmp_abs(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer j, k; + doublereal t, ak; + integer kp; + doublereal akp1; + doublecomplex temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer kstep; + extern int zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k + 1) * a_dim1]); + i__1 = k + k * a_dim1; + ak = a[i__1].r / t; + i__1 = k + 1 + (k + 1) * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k + 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k + k * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + (k + 1) * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L50:; + } else { + k = *n; + L60: + if (k < 1) { + goto L80; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k - 1) * a_dim1]); + i__1 = k - 1 + (k - 1) * a_dim1; + ak = a[i__1].r / t; + i__1 = k + k * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k - 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k - 1 + (k - 1) * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + k * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = k + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L60; + L80:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlahef.cpp b/lib/linalg/zlahef.cpp new file mode 100644 index 0000000000..9a18a455ea --- /dev/null +++ b/lib/linalg/zlahef.cpp @@ -0,0 +1,520 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *), + z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublereal t, r1; + doublecomplex d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern int zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern int zlacgv_(integer *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + kstep = 1; + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k + kw * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + i__1 = k + kw * w_dim1; + i__2 = k + kw * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - 1; + zcopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + i__1 = k - imax; + zlacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + (kw - 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk - 1 - kp; + zlacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + if (k > 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + } + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + kw * w_dim1], &z__2); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + kw * w_dim1; + z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__4.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k - 2; + zlacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + i__1 = k + k * w_dim1; + i__2 = k + k * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax - k; + zlacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (imax < *n) { + i__1 = *n - imax; + zcopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, + &w[imax + 1 + (k + 1) * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + (k + 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kp - kk - 1; + zlacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + k * w_dim1], &z__2); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + k * w_dim1; + z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__4.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = *n - k - 1; + zlacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlaswp.cpp b/lib/linalg/zlaswp.cpp new file mode 100644 index 0000000000..40e941ffa5 --- /dev/null +++ b/lib/linalg/zlaswp.cpp @@ -0,0 +1,79 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zlaswp_(integer *n, doublecomplex *a, integer *lda, integer *k1, integer *k2, integer *ipiv, + integer *incx) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + doublecomplex temp; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = *k1 + (*k1 - *k2) * *incx; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; + } + } + ix += *incx; + } + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; + } + } + ix += *incx; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlasyf.cpp b/lib/linalg/zlasyf.cpp new file mode 100644 index 0000000000..2823d173de --- /dev/null +++ b/lib/linalg/zlasyf.cpp @@ -0,0 +1,431 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublecomplex t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk, colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + zcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + kw * w_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + zcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (kw - 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + kw * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + i__3 = j + kw * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + k * w_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + zcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (k + 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k + k * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + i__3 = j + k * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauu2.cpp b/lib/linalg/zlauu2.cpp new file mode 100644 index 0000000000..2e92542d49 --- /dev/null +++ b/lib/linalg/zlauu2.cpp @@ -0,0 +1,100 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + integer i__; + doublereal aii; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUU2", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &c_b1, &a[(i__ + 1) * a_dim1 + 1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda, &z__1, &a[i__ * a_dim1 + 1], &c__1, + (ftnlen)12); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], + &c__1); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b1, &a[i__ + 1 + a_dim1], lda, + &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, &a[i__ + a_dim1], lda, (ftnlen)19); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauum.cpp b/lib/linalg/zlauum.cpp new file mode 100644 index 0000000000..e61268ec3d --- /dev/null +++ b/lib/linalg/zlauum.cpp @@ -0,0 +1,103 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b21 = 1.; +int zlauum_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, ib, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + zlauu2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUUM", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zlauu2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &ib, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Upper", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &ib, &i__4, &c_b1, + &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda, + &c_b1, &a[i__ * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Upper", (char *)"No transpose", &ib, &i__3, &c_b21, + &a[i__ + (i__ + ib) * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)12); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &ib, &i__3, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda, (ftnlen)4, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Lower", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &ib, &i__3, &i__4, &c_b1, + &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b1, + &a[i__ + a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Lower", (char *)"Conjugate transpose", &ib, &i__3, &c_b21, + &a[i__ + ib + i__ * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)19); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf.cpp b/lib/linalg/zpotrf.cpp new file mode 100644 index 0000000000..5679af9ca9 --- /dev/null +++ b/lib/linalg/zpotrf.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b14 = -1.; +static doublereal c_b15 = 1.; +int zpotrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + integer j, jb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zpotrf2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zpotrf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Upper", (char *)"Conjugate transpose", &jb, &i__3, &c_b14, &a[j * a_dim1 + 1], lda, + &c_b15, &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)19); + zpotrf2_((char *)"Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &jb, &i__3, &i__4, &z__1, + &a[j * a_dim1 + 1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b1, + &a[j + (j + jb) * a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Lower", (char *)"No transpose", &jb, &i__3, &c_b14, &a[j + a_dim1], lda, &c_b15, + &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)12); + zpotrf2_((char *)"Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &jb, &i__4, &z__1, + &a[j + jb + a_dim1], lda, &a[j + a_dim1], lda, &c_b1, + &a[j + jb + j * a_dim1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &jb, &c_b1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } + } + goto L40; +L30: + *info = *info + j - 1; +L40: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf2.cpp b/lib/linalg/zpotrf2.cpp new file mode 100644 index 0000000000..262ea15497 --- /dev/null +++ b/lib/linalg/zpotrf2.cpp @@ -0,0 +1,89 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublereal c_b11 = -1.; +static doublereal c_b12 = 1.; +int zpotrf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + doublereal d__1; + double sqrt(doublereal); + integer n1, n2; + doublereal ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + i__1 = a_dim1 + 1; + ajj = a[i__1].r; + if (ajj <= 0. || disnan_(&ajj)) { + *info = 1; + return 0; + } + i__1 = a_dim1 + 1; + d__1 = sqrt(ajj); + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + n1 = *n / 2; + n2 = *n - n1; + zpotrf2_(uplo, &n1, &a[a_dim1 + 1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + if (upper) { + ztrsm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &n1, &n2, &c_b1, &a[a_dim1 + 1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"C", &n2, &n1, &c_b11, &a[(n1 + 1) * a_dim1 + 1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } else { + ztrsm_((char *)"R", (char *)"L", (char *)"C", (char *)"N", &n2, &n1, &c_b1, &a[a_dim1 + 1], lda, &a[n1 + 1 + a_dim1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"N", &n2, &n1, &c_b11, &a[n1 + 1 + a_dim1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotri.cpp b/lib/linalg/zpotri.cpp new file mode 100644 index 0000000000..a13f6fde5c --- /dev/null +++ b/lib/linalg/zpotri.cpp @@ -0,0 +1,40 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zpotri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + zlauum_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_(uplo, (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)8); + if (*info > 0) { + return 0; + } + zlauum_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsymv.cpp b/lib/linalg/zsymv.cpp new file mode 100644 index 0000000000..73e956493b --- /dev/null +++ b/lib/linalg/zsymv.cpp @@ -0,0 +1,263 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsymv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3, z__4; + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1, *n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZSYMV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } else { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsyr.cpp b/lib/linalg/zsyr.cpp new file mode 100644 index 0000000000..5e79f28d94 --- /dev/null +++ b/lib/linalg/zsyr.cpp @@ -0,0 +1,141 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsyr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytf2.cpp b/lib/linalg/zsytf2.cpp new file mode 100644 index 0000000000..bce7b51f1d --- /dev/null +++ b/lib/linalg/zsytf2.cpp @@ -0,0 +1,356 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zsytf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, j, k; + doublecomplex t, r1, d11, d12, d21, d22; + integer kk, kp; + doublecomplex wk, wkm1, wkp1; + integer imax, jmax; + extern int zsyr_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d12.r = a[i__1].r, d12.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &d12); + d22.r = z__1.r, d22.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d12); + d11.r = z__1.r, d11.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d12); + d12.r = z__1.r, d12.i = z__1.i; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11.r * a[i__1].r - d11.i * a[i__1].i, + z__3.i = d11.r * a[i__1].i + d11.i * a[i__1].r; + i__2 = j + k * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22.r * a[i__1].r - d22.i * a[i__1].i, + z__3.i = d22.r * a[i__1].i + d22.i * a[i__1].r; + i__2 = j + (k - 1) * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + z__3.r = a[i__3].r * wk.r - a[i__3].i * wk.i, + z__3.i = a[i__3].r * wk.i + a[i__3].i * wk.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + z__4.r = a[i__4].r * wkm1.r - a[i__4].i * wkm1.i, + z__4.i = a[i__4].r * wkm1.i + a[i__4].i * wkm1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + zswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d21.r = a[i__1].r, d21.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11.r * a[i__2].r - d11.i * a[i__2].i, + z__3.i = d11.r * a[i__2].i + d11.i * a[i__2].r; + i__3 = j + (k + 1) * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22.r * a[i__2].r - d22.i * a[i__2].i, + z__3.i = d22.r * a[i__2].i + d22.i * a[i__2].r; + i__3 = j + k * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + z__3.r = a[i__5].r * wk.r - a[i__5].i * wk.i, + z__3.i = a[i__5].r * wk.i + a[i__5].i * wk.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + z__4.r = a[i__6].r * wkp1.r - a[i__6].i * wkp1.i, + z__4.i = a[i__6].r * wkp1.i + a[i__6].i * wkp1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytrf.cpp b/lib/linalg/zsytrf.cpp new file mode 100644 index 0000000000..178193fbaa --- /dev/null +++ b/lib/linalg/zsytrf.cpp @@ -0,0 +1,124 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zsytrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zsytf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork; + extern int zlasyf_(char *, integer *, integer *, integer *, doublecomplex *, integer *, + integer *, doublecomplex *, integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytri.cpp b/lib/linalg/zsytri.cpp new file mode 100644 index 0000000000..3f7d4dea0f --- /dev/null +++ b/lib/linalg/zsytri.cpp @@ -0,0 +1,292 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zsytri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + doublecomplex d__; + integer k; + doublecomplex t, ak; + integer kp; + doublecomplex akp1, temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern VOID zdotu_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zsymv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L40; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k + 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k + 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + 1 + (k + 1) * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - kp - 1; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L40:; + } else { + k = *n; + L50: + if (k < 1) { + goto L60; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k - 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k - 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k - 1 + (k - 1) * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - k - 1; + zswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L50; + L60:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsm.cpp b/lib/linalg/ztrsm.cpp new file mode 100644 index 0000000000..160b65974a --- /dev/null +++ b/lib/linalg/ztrsm.cpp @@ -0,0 +1,443 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +int ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lside && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1, nrowa)) { + info = 9; + } else if (*ldb < max(1, *m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"ZTRSM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + return 0; + } + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + if (nounit) { + i__2 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * a[i__6].i, + z__2.i = b[i__5].r * a[i__6].i + b[i__5].i * a[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + if (nounit) { + i__3 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * a[i__7].i, + z__2.i = b[i__6].r * a[i__7].i + b[i__6].i * a[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5].i, + z__2.i = a[i__4].r * b[i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * b[i__4].i, + z__2.i = a[i__3].r * b[i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3].i, + z__2.i = z__3.r * b[i__3].i + z__3.i * b[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, + z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * b[i__6].i, + z__2.i = a[i__5].r * b[i__6].i + a[i__5].i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } + } else { + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5].i, + z__2.i = temp.r * b[i__5].i + temp.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6].i, + z__2.i = temp.r * b[i__6].i + temp.i * b[i__6].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsv.cpp b/lib/linalg/ztrsv.cpp new file mode 100644 index 0000000000..324416d9e3 --- /dev/null +++ b/lib/linalg/ztrsv.cpp @@ -0,0 +1,330 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, *n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"ZTRSV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx += *incx; + } + } + } + } else { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrti2.cpp b/lib/linalg/ztrti2.cpp new file mode 100644 index 0000000000..00cb4154b3 --- /dev/null +++ b/lib/linalg/ztrti2.cpp @@ -0,0 +1,88 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int ztrti2_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j; + doublecomplex ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + logical upper; + extern int ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTI2", &i__1, (ftnlen)6); + return 0; + } + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + j * a_dim1; + z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + i__2 = j - 1; + ztrmv_((char *)"Upper", (char *)"No transpose", diag, &i__2, &a[a_offset], lda, &a[j * a_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); + } + } else { + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + j * a_dim1; + z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + i__1 = *n - j; + ztrmv_((char *)"Lower", (char *)"No transpose", diag, &i__1, &a[j + 1 + (j + 1) * a_dim1], lda, + &a[j + 1 + j * a_dim1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrtri.cpp b/lib/linalg/ztrtri.cpp new file mode 100644 index 0000000000..771d54adb7 --- /dev/null +++ b/lib/linalg/ztrtri.cpp @@ -0,0 +1,112 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int ztrtri_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + doublecomplex z__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer j, jb, nb, nn; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + ztrti2_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0. && a[i__2].i == 0.) { + return 0; + } + } + *info = 0; + } + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_lmp_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + if (nb <= 1 || nb >= *n) { + ztrti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4, i__5); + i__4 = j - 1; + ztrmm_((char *)"Left", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &c_b1, &a[a_offset], lda, + &a[j * a_dim1 + 1], lda, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)12, (ftnlen)1); + ztrti2_((char *)"Upper", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1, i__4); + if (j + jb <= *n) { + i__1 = *n - j - jb + 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, &a[j + jb + j * a_dim1], lda, + (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)12, (ftnlen)1); + } + ztrti2_((char *)"Lower", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif From 1b2781db9b30f2c62b87b86d131ab4f3b32ab190 Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 11 Nov 2024 14:57:45 -0500 Subject: [PATCH 254/294] add syncs --- src/KOKKOS/group_kokkos.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index 346a98d92d..b3a95d216a 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -44,20 +44,28 @@ template double GroupKokkos::mass(int igroup) { int groupbit = bitmask[igroup]; - auto d_mass = atomKK->k_mass.template view(); - auto d_rmass = atomKK->k_rmass.template view(); auto d_mask = atomKK->k_mask.template view(); - auto d_type = atomKK->k_type.template view(); double one = 0.0; if (atomKK->rmass) { + + auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|RMASS_MASK); + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_rmass(i); }, one); + } else { + + auto d_mass = atomKK->k_mass.template view(); + auto d_type = atomKK->k_type.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|MASS_MASK|TYPE_MASK); + Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); }, one); + } double all; @@ -87,6 +95,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { @@ -106,6 +115,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { @@ -149,6 +159,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -163,6 +174,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -208,6 +220,7 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -230,6 +243,7 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -278,6 +292,7 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { if (d_mask(i) & groupbit) { @@ -303,6 +318,7 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { if (d_mask(i) & groupbit) { From 8809ed067fc45ae15e6cdc05eb241fdd7b00221a Mon Sep 17 00:00:00 2001 From: alphataubio Date: Mon, 11 Nov 2024 15:11:02 -0500 Subject: [PATCH 255/294] oops --- src/KOKKOS/group_kokkos.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index b3a95d216a..3a760c19c3 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -18,6 +18,7 @@ #include "group_kokkos.h" #include "atom_kokkos.h" +#include "atom_masks.h" #include "domain_kokkos.h" #include "kokkos_few.h" @@ -60,7 +61,8 @@ double GroupKokkos::mass(int igroup) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|MASS_MASK|TYPE_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_mass(d_type(i)); @@ -115,7 +117,8 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { @@ -174,7 +177,8 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -243,7 +247,8 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -318,7 +323,8 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|MASS_MASK|TYPE_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { if (d_mask(i) & groupbit) { From ecfda6042dbb89aff3b4ca9e7e2e4c440854f0eb Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Nov 2024 18:52:24 -0500 Subject: [PATCH 256/294] correct error message --- src/MANYBODY/pair_sw_angle_table.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MANYBODY/pair_sw_angle_table.cpp b/src/MANYBODY/pair_sw_angle_table.cpp index 12592f4af6..6e691afeb6 100644 --- a/src/MANYBODY/pair_sw_angle_table.cpp +++ b/src/MANYBODY/pair_sw_angle_table.cpp @@ -724,7 +724,7 @@ double PairSWAngleTable::splint(double *xa, double *ya, double *y2a, int n, doub void PairSWAngleTable::uf_lookup(ParamTable *pm, double x, double &u, double &f) { - if (!std::isfinite(x)) { error->one(FLERR, "Illegal angle in angle style table"); } + if (!std::isfinite(x)) error->one(FLERR, "Illegal angle in pair style sw/angle/table"); double fraction,a,b; From fc805f470c4acc90abeb48ee4db2d1a45ca8e8ea Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 12 Nov 2024 08:42:38 -0800 Subject: [PATCH 257/294] Small change to simplify code --- src/KOKKOS/group_kokkos.cpp | 21 +++++++++++---------- src/KOKKOS/group_kokkos.h | 3 +++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/group_kokkos.cpp b/src/KOKKOS/group_kokkos.cpp index 3a760c19c3..b2de2e6a64 100644 --- a/src/KOKKOS/group_kokkos.cpp +++ b/src/KOKKOS/group_kokkos.cpp @@ -30,6 +30,7 @@ template GroupKokkos::GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; } // ---------------------------------------------------------------------- @@ -51,7 +52,7 @@ double GroupKokkos::mass(int igroup) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|RMASS_MASK); + atomKK->sync(execution_space,MASK_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { if (d_mask(i) & groupbit) l_one += d_rmass(i); @@ -61,7 +62,7 @@ double GroupKokkos::mass(int igroup) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,MASK_MASK|TYPE_MASK); + atomKK->sync(execution_space,MASK_MASK|TYPE_MASK); atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) { @@ -97,7 +98,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { if (d_mask(i) & groupbit) { @@ -117,7 +118,7 @@ void GroupKokkos::xcm(int igroup, double masstotal, double *xcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) { @@ -162,7 +163,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -177,7 +178,7 @@ void GroupKokkos::vcm(int igroup, double masstotal, double *vcm) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { @@ -224,7 +225,7 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { if (d_mask(i) & groupbit) { @@ -247,7 +248,7 @@ void GroupKokkos::angmom(int igroup, double *xcm, double *lmom) auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) { @@ -297,7 +298,7 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] if (atomKK->rmass) { auto d_rmass = atomKK->k_rmass.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { if (d_mask(i) & groupbit) { @@ -323,7 +324,7 @@ void GroupKokkos::inertia(int igroup, double *xcm, double itensor[3] auto d_mass = atomKK->k_mass.template view(); auto d_type = atomKK->k_type.template view(); - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); + atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK); atomKK->k_mass.template sync(); Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) { diff --git a/src/KOKKOS/group_kokkos.h b/src/KOKKOS/group_kokkos.h index a38187db10..f23023b17c 100644 --- a/src/KOKKOS/group_kokkos.h +++ b/src/KOKKOS/group_kokkos.h @@ -28,6 +28,9 @@ class GroupKokkos : public Group { void vcm(int, double, double *); // center-of-mass velocity of group void angmom(int, double *, double *); // angular momentum of group void inertia(int, double *, double[3][3]); // inertia tensor + + private: + ExecutionSpace execution_space; }; } // namespace LAMMPS_NS From 5673375d2153c85e24ebfc371c0c6a603cfa15e3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 12 Nov 2024 12:32:37 -0700 Subject: [PATCH 258/294] Add more related commands to docs --- doc/src/fix_acks2_reaxff.rst | 3 ++- doc/src/fix_qeq_reaxff.rst | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/src/fix_acks2_reaxff.rst b/doc/src/fix_acks2_reaxff.rst index 566e17a330..79a9cf8ea6 100644 --- a/doc/src/fix_acks2_reaxff.rst +++ b/doc/src/fix_acks2_reaxff.rst @@ -123,7 +123,8 @@ components in non-periodic directions. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff ` +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, +:doc:`fix qtpi/reaxff ` Default """"""" diff --git a/doc/src/fix_qeq_reaxff.rst b/doc/src/fix_qeq_reaxff.rst index c449c8cda9..e1a09c4fc3 100644 --- a/doc/src/fix_qeq_reaxff.rst +++ b/doc/src/fix_qeq_reaxff.rst @@ -139,7 +139,8 @@ as an atom-style variable using the *potential* keyword for `fix efield`. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/shielded ` +:doc:`pair_style reaxff `, :doc:`fix qeq/shielded `, +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff ` Default """"""" From 1b65fb5a1af8ad6458bf683799020c15b434e595 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 12 Nov 2024 15:05:18 -0500 Subject: [PATCH 259/294] avoid memory leak --- src/MC/fix_charge_regulation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MC/fix_charge_regulation.cpp b/src/MC/fix_charge_regulation.cpp index ddf14f6804..f5be7a041b 100644 --- a/src/MC/fix_charge_regulation.cpp +++ b/src/MC/fix_charge_regulation.cpp @@ -174,9 +174,9 @@ FixChargeRegulation::~FixChargeRegulation() { neighbor->exclusion_group_group_delete(exclusion_group, igroupall); } - if (groupstrings) { + if (ngroups > 0) { for (int i = 0; i < ngroups; ++i) delete[] groupstrings[i]; - memory->destroy(groupstrings); + memory->sfree(groupstrings); } } From 0f2fd8a882365b7dd85e4f73de1ed72d6a52a4aa Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:51:57 -0700 Subject: [PATCH 260/294] Pesky backticks --- doc/documentation_conventions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index c4757b85e9..80e1798116 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -10,7 +10,7 @@ Last change: 2022-12-30 In fall 2019, the LAMMPS documentation file format has changed from a home grown markup designed to generate HTML format files only, to -[reStructuredText](https://docutils.sourceforge.io/rst.html>. For a +[reStructuredText](https://docutils.sourceforge.io/rst.html>). For a transition period all files in the old .txt format were transparently converted to .rst and then processed. The `txt2rst tool` is still included in the distribution to obtain an initial .rst file for legacy @@ -64,7 +64,7 @@ Groups of shell commands or LAMMPS input script or C/C++/Python source code should be typeset into a `.. code-block::` section. A syntax highlighting extension for LAMMPS input scripts is provided, so `LAMMPS` can be used to indicate the language in the code block in addition to -`bash`, `c`, `c++`, `console`, `csh`, `diff', `fortran`, `json`, `make`, +`bash`, `c`, `c++`, `console`, `csh`, `diff`, `fortran`, `json`, `make`, `perl`, `powershell`, `python`, `sh`, or `tcl`, `text`, or `yaml`. When no syntax style is indicated, no syntax highlighting is performed. When typesetting commands executed on the shell, please do not prefix @@ -84,7 +84,7 @@ block can be used, followed by multiple `.. tab::` blocks, one for each alternative. This is only used for HTML output. For other outputs, the `.. tabs::` directive is transparently removed and the individual `.. tab::` blocks will be replaced with an -`.. admonition::`` block. Thus in PDF and ePUB output those will +`.. admonition::` block. Thus in PDF and ePUB output those will be realized as sequential and plain notes. Special remarks can be highlighted with a `.. note::` block and From 0463434ff9c58b3ca5509c652f16fe1e2b075d57 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:55:25 -0700 Subject: [PATCH 261/294] adding hyperlink --- doc/documentation_conventions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index 80e1798116..b91bfda5ac 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -45,8 +45,8 @@ what kind of information and sections are needed. ## Formatting conventions -For headlines we try to follow the conventions posted here: -https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings +For headlines we try to follow the conventions posted [here] +(https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). It seems to be sufficient to have this consistent only within any single file and it is not (yet) enforced strictly, but making this globally consistent makes it easier to move sections around. From 8dfb63206e4ff24720ece514270a27065bcda323 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:56:23 -0700 Subject: [PATCH 262/294] extra space --- doc/documentation_conventions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index b91bfda5ac..e02b0c77a3 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -45,8 +45,7 @@ what kind of information and sections are needed. ## Formatting conventions -For headlines we try to follow the conventions posted [here] -(https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). +For headlines we try to follow the conventions posted [here](https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). It seems to be sufficient to have this consistent only within any single file and it is not (yet) enforced strictly, but making this globally consistent makes it easier to move sections around. From e192c4d5836dacf45a510131645bcd534e390ecd Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 09:11:22 -0700 Subject: [PATCH 263/294] Minor edits to workflow doc --- doc/github-development-workflow.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md index e16ae82764..69bf00b707 100644 --- a/doc/github-development-workflow.md +++ b/doc/github-development-workflow.md @@ -6,7 +6,9 @@ choices the LAMMPS developers have agreed on. Git and GitHub provide the tools, but do not set policies, so it is up to the developers to come to an agreement as to how to define and interpret policies. This document is likely to change as our experiences and needs change, and we try to -adapt it accordingly. Last change 2023-02-10. +adapt it accordingly. + +Last change: 2023-02-10 ## Table of Contents @@ -72,7 +74,7 @@ be assigned to signal urgency to merge this pull request quickly. People can be assigned to review a pull request in two ways: * They can be assigned manually to review a pull request - by the submitter or a LAMMPS developer + by the submitter or a LAMMPS developer. * They can be automatically assigned, because a developer's GitHub handle matches a file pattern in the `.github/CODEOWNERS` file, which associates developers with the code they contributed and @@ -86,9 +88,9 @@ required before merging, in addition to passing all automated compilation and unit tests. Merging counts as implicit approval, so does submission of a pull request (by a LAMMPS developer). So the person doing the merge may not also submit an approving review. The GitHub -feature, that reviews from code owners are "hard" reviews (i.e. they -must all approve before merging is allowed), is currently disabled. -It is in the discretion of the merge maintainer to assess when a +feature that reviews from code owners are "hard" reviews (i.e. they +must all approve before merging is allowed) is currently disabled. +It is at the discretion of the merge maintainer to assess when a sufficient degree of approval has been reached, especially from external collaborators. Reviews may be (automatically) dismissed, when the reviewed code has been changed. Review may be requested a second time. @@ -147,7 +149,8 @@ only contain bug fixes, feature additions to peripheral functionality, and documentation updates. In between stable releases, bug fixes and infrastructure updates are back-ported from the "develop" branch to the "maintenance" branch and occasionally merged into "stable" and published -as update releases. +as update releases. Further explanation of LAMMPS versions can be found +[in the documentation](https://docs.lammps.org/Manual_version.html). ## Project Management From 4dbb5ade76032bc75882ae307c176463ab6f32fd Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 14 Nov 2024 07:59:17 -0500 Subject: [PATCH 264/294] avoid crashes with 180 degree angles --- src/MANYBODY/pair_sw_angle_table.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/MANYBODY/pair_sw_angle_table.cpp b/src/MANYBODY/pair_sw_angle_table.cpp index 6e691afeb6..9bd0316cf5 100644 --- a/src/MANYBODY/pair_sw_angle_table.cpp +++ b/src/MANYBODY/pair_sw_angle_table.cpp @@ -402,14 +402,18 @@ void PairSWAngleTable::threebody_table(Param *paramij, Param *paramik, ParamTabl rinv12 = 1.0/(r1*r2); cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12; - - var = acos(cs); + cs = MAX(-1.0,MIN(cs,1.0)); // look up energy (f(theta), ftheta) and force (df(theta)/dtheta, fprimetheta) at // angle theta (var) in angle table belonging to parameter set paramijk + + var = acos(cs); uf_lookup(table_paramijk, var, ftheta, fprimetheta); - acosprime = 1.0 / (sqrt(1 - cs*cs ) ); + if ((cs*cs - 1.0) != 0.0) + acosprime = 1.0 / (sqrt(1 - cs*cs ) ); + else + acosprime = 0.0; facradtable = facexp*ftheta; frad1table = facradtable*gsrainvsq1; From a06275d18ed854f8be29590e8a21b06dafac3bc5 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:51:57 -0700 Subject: [PATCH 265/294] Pesky backticks --- doc/documentation_conventions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index c4757b85e9..80e1798116 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -10,7 +10,7 @@ Last change: 2022-12-30 In fall 2019, the LAMMPS documentation file format has changed from a home grown markup designed to generate HTML format files only, to -[reStructuredText](https://docutils.sourceforge.io/rst.html>. For a +[reStructuredText](https://docutils.sourceforge.io/rst.html>). For a transition period all files in the old .txt format were transparently converted to .rst and then processed. The `txt2rst tool` is still included in the distribution to obtain an initial .rst file for legacy @@ -64,7 +64,7 @@ Groups of shell commands or LAMMPS input script or C/C++/Python source code should be typeset into a `.. code-block::` section. A syntax highlighting extension for LAMMPS input scripts is provided, so `LAMMPS` can be used to indicate the language in the code block in addition to -`bash`, `c`, `c++`, `console`, `csh`, `diff', `fortran`, `json`, `make`, +`bash`, `c`, `c++`, `console`, `csh`, `diff`, `fortran`, `json`, `make`, `perl`, `powershell`, `python`, `sh`, or `tcl`, `text`, or `yaml`. When no syntax style is indicated, no syntax highlighting is performed. When typesetting commands executed on the shell, please do not prefix @@ -84,7 +84,7 @@ block can be used, followed by multiple `.. tab::` blocks, one for each alternative. This is only used for HTML output. For other outputs, the `.. tabs::` directive is transparently removed and the individual `.. tab::` blocks will be replaced with an -`.. admonition::`` block. Thus in PDF and ePUB output those will +`.. admonition::` block. Thus in PDF and ePUB output those will be realized as sequential and plain notes. Special remarks can be highlighted with a `.. note::` block and From d8bd1ae3b8100754efe7ce84089706c30a4f5978 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:55:25 -0700 Subject: [PATCH 266/294] adding hyperlink --- doc/documentation_conventions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index 80e1798116..b91bfda5ac 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -45,8 +45,8 @@ what kind of information and sections are needed. ## Formatting conventions -For headlines we try to follow the conventions posted here: -https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings +For headlines we try to follow the conventions posted [here] +(https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). It seems to be sufficient to have this consistent only within any single file and it is not (yet) enforced strictly, but making this globally consistent makes it easier to move sections around. From c06cab269650760cd8e5c412848da314856ed7aa Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 08:56:23 -0700 Subject: [PATCH 267/294] extra space --- doc/documentation_conventions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/documentation_conventions.md b/doc/documentation_conventions.md index b91bfda5ac..e02b0c77a3 100644 --- a/doc/documentation_conventions.md +++ b/doc/documentation_conventions.md @@ -45,8 +45,7 @@ what kind of information and sections are needed. ## Formatting conventions -For headlines we try to follow the conventions posted [here] -(https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). +For headlines we try to follow the conventions posted [here](https://documentation-style-guide-sphinx.readthedocs.io/en/latest/style-guide.html#headings). It seems to be sufficient to have this consistent only within any single file and it is not (yet) enforced strictly, but making this globally consistent makes it easier to move sections around. From 9c290a0facbe82e624cb7f9c875d0f98f9b43f99 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 13 Nov 2024 09:11:22 -0700 Subject: [PATCH 268/294] Minor edits to workflow doc --- doc/github-development-workflow.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md index e16ae82764..69bf00b707 100644 --- a/doc/github-development-workflow.md +++ b/doc/github-development-workflow.md @@ -6,7 +6,9 @@ choices the LAMMPS developers have agreed on. Git and GitHub provide the tools, but do not set policies, so it is up to the developers to come to an agreement as to how to define and interpret policies. This document is likely to change as our experiences and needs change, and we try to -adapt it accordingly. Last change 2023-02-10. +adapt it accordingly. + +Last change: 2023-02-10 ## Table of Contents @@ -72,7 +74,7 @@ be assigned to signal urgency to merge this pull request quickly. People can be assigned to review a pull request in two ways: * They can be assigned manually to review a pull request - by the submitter or a LAMMPS developer + by the submitter or a LAMMPS developer. * They can be automatically assigned, because a developer's GitHub handle matches a file pattern in the `.github/CODEOWNERS` file, which associates developers with the code they contributed and @@ -86,9 +88,9 @@ required before merging, in addition to passing all automated compilation and unit tests. Merging counts as implicit approval, so does submission of a pull request (by a LAMMPS developer). So the person doing the merge may not also submit an approving review. The GitHub -feature, that reviews from code owners are "hard" reviews (i.e. they -must all approve before merging is allowed), is currently disabled. -It is in the discretion of the merge maintainer to assess when a +feature that reviews from code owners are "hard" reviews (i.e. they +must all approve before merging is allowed) is currently disabled. +It is at the discretion of the merge maintainer to assess when a sufficient degree of approval has been reached, especially from external collaborators. Reviews may be (automatically) dismissed, when the reviewed code has been changed. Review may be requested a second time. @@ -147,7 +149,8 @@ only contain bug fixes, feature additions to peripheral functionality, and documentation updates. In between stable releases, bug fixes and infrastructure updates are back-ported from the "develop" branch to the "maintenance" branch and occasionally merged into "stable" and published -as update releases. +as update releases. Further explanation of LAMMPS versions can be found +[in the documentation](https://docs.lammps.org/Manual_version.html). ## Project Management From e3e502f0950d4863912946abd995d6329c5974d1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 14 Nov 2024 15:26:46 -0500 Subject: [PATCH 269/294] add note that pair_coeff settings used for fix adapt must be set explicitly --- doc/src/fix_adapt.rst | 8 ++++++++ doc/src/fix_adapt_fep.rst | 22 ++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 1b5282f741..1ddf80cbdb 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -119,6 +119,14 @@ style supports it. Note that the :doc:`pair_style ` and to specify these parameters initially; the fix adapt command simply overrides the parameters. +.. note:: + + Pair_coeff settings must be made **explicitly** in order for fix + adapt to be able to change them. Settings inferred from mixing + are not suitable. If necessary all mixed settings can be output + to a file using the :doc:`write_coeff command ` and + then the desired mixed pair_coeff settings copied from that file. + The *pstyle* argument is the name of the pair style. If :doc:`pair_style hybrid or hybrid/overlay ` is used, *pstyle* should be a sub-style name. If there are multiple diff --git a/doc/src/fix_adapt_fep.rst b/doc/src/fix_adapt_fep.rst index 3bcdfc5035..981a1c5298 100644 --- a/doc/src/fix_adapt_fep.rst +++ b/doc/src/fix_adapt_fep.rst @@ -116,12 +116,22 @@ style supports it. Note that the :doc:`pair_style ` and to specify these parameters initially; the fix adapt command simply overrides the parameters. -The *pstyle* argument is the name of the pair style. If :doc:`pair_style hybrid or hybrid/overlay ` is used, *pstyle* should be -a sub-style name. For example, *pstyle* could be specified as "soft" -or "lubricate". The *pparam* argument is the name of the parameter to -change. This is the current list of pair styles and parameters that -can be varied by this fix. See the doc pages for individual pair -styles and their energy formulas for the meaning of these parameters: +.. note:: + + Pair_coeff settings must be made **explicitly** in order for fix + adapt/fep to be able to change them. Settings inferred from mixing + are not suitable. If necessary all mixed settings can be output + to a file using the :doc:`write_coeff command ` and + then the desired mixed pair_coeff settings copied from that file. + +The *pstyle* argument is the name of the pair style. If +:doc:`pair_style hybrid or hybrid/overlay ` is used, +*pstyle* should be a sub-style name. For example, *pstyle* could be +specified as "soft" or "lubricate". The *pparam* argument is the name +of the parameter to change. This is the current list of pair styles and +parameters that can be varied by this fix. See the doc pages for +individual pair styles and their energy formulas for the meaning of +these parameters: +------------------------------------------------------------------------------+-------------------------+------------+ | :doc:`born ` | a,b,c | type pairs | From 0f9f80ad453b47fb3e72fcda7868874f62a8cf52 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 14 Nov 2024 15:30:09 -0500 Subject: [PATCH 270/294] update for fix colvars/kk --- doc/src/Commands_fix.rst | 2 +- doc/src/fix_colvars.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 04d1a9969a..58fda862c6 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -44,7 +44,7 @@ OPT. * :doc:`brownian/sphere ` * :doc:`charge/regulation ` * :doc:`cmap (k) ` - * :doc:`colvars ` + * :doc:`colvars (k) ` * :doc:`controller ` * :doc:`damping/cundall ` * :doc:`deform (k) ` diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst index 4730e29c67..785893077c 100644 --- a/doc/src/fix_colvars.rst +++ b/doc/src/fix_colvars.rst @@ -1,8 +1,11 @@ .. index:: fix colvars +.. index:: fix colvars/kk fix colvars command =================== +Accelerator Variants: *colvars/kk* + Syntax """""" @@ -118,6 +121,11 @@ thermostat target temperature. The *seed* keyword contains the seed for the random number generator that will be used in the colvars module. +---------- + +.. include:: accel_styles.rst + +---------- Restarting """""""""" From 74fea8ae08eba6946f58675fb0444d8efc3bb0d2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 06:24:29 -0500 Subject: [PATCH 271/294] must initialize copymode in base class --- src/command.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/command.h b/src/command.h index 222ed5e34f..0122727463 100644 --- a/src/command.h +++ b/src/command.h @@ -20,7 +20,7 @@ namespace LAMMPS_NS { class Command : protected Pointers { public: - Command(class LAMMPS *lmp) : Pointers(lmp) {}; + Command(class LAMMPS *lmp) : Pointers(lmp), copymode(0) {}; virtual void command(int, char **) = 0; protected: From 0f70c528a1a43534650846582b3ca8a50d2dd9ef Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 10:42:21 -0500 Subject: [PATCH 272/294] avoid access to uninitialized per-atom data to make valgrind happy --- src/REAXFF/fix_reaxff_species.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 0183d2670b..0ded498c31 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -143,9 +143,10 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : x0 = nullptr; clusterID = nullptr; - int ntmp = 1; + int ntmp = atom->nmax; memory->create(x0, ntmp, "reaxff/species:x0"); memory->create(clusterID, ntmp, "reaxff/species:clusterID"); + memset(clusterID, 0, sizeof(double) * ntmp); vector_atom = clusterID; nmax = 0; @@ -441,6 +442,7 @@ void FixReaxFFSpecies::Output_ReaxFF_Bonds(bigint ntimestep, FILE * /*fp*/) memory->destroy(clusterID); memory->create(x0, nmax, "reaxff/species:x0"); memory->create(clusterID, nmax, "reaxff/species:clusterID"); + memset(clusterID, 0, sizeof(double) * nmax); vector_atom = clusterID; } From 6796dc34ad3bd805c907feb61ea47a50c3eb0f3c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 10:42:30 -0500 Subject: [PATCH 273/294] clang-format --- src/REAXFF/fix_reaxff_species.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 0ded498c31..915b2b0dd0 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -194,8 +194,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : if (iarg + ntypes + 1 > narg) utils::missing_cmd_args(FLERR, "fix reaxff/species element", error); - for (int i = 0; i < ntypes; i++) - eletype[i] = arg[iarg + 1 + i]; + for (int i = 0; i < ntypes; i++) eletype[i] = arg[iarg + 1 + i]; GetUniqueElements(); iarg += ntypes + 1; @@ -349,8 +348,7 @@ void FixReaxFFSpecies::setup(int /*vflag*/) ntotal = static_cast(atom->natoms); if (!eleflag) { - for (int i = 0; i < ntypes; i++) - eletype[i] = reaxff->eletype[i+1]; + for (int i = 0; i < ntypes; i++) eletype[i] = reaxff->eletype[i + 1]; GetUniqueElements(); } memory->destroy(Name); From 38d53877c5aa6a47313012723f54b022ab0cfcd8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 11:01:19 -0500 Subject: [PATCH 274/294] move citeme call(s) to places where labelmaps are used --- src/label_map.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/label_map.cpp b/src/label_map.cpp index c4865d5ace..9934868c49 100644 --- a/src/label_map.cpp +++ b/src/label_map.cpp @@ -43,8 +43,6 @@ LabelMap::LabelMap(LAMMPS *_lmp, int _natomtypes, int _nbondtypes, int _nanglety Pointers(_lmp), natomtypes(_natomtypes), nbondtypes(_nbondtypes), nangletypes(_nangletypes), ndihedraltypes(_ndihedraltypes), nimpropertypes(_nimpropertypes) { - if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); - lmap2lmap.atom = lmap2lmap.bond = lmap2lmap.angle = lmap2lmap.dihedral = lmap2lmap.improper = nullptr; reset_type_labels(); @@ -112,6 +110,8 @@ void LabelMap::modify_lmap(int narg, char **arg) if ((narg < 1) || ((narg > 2) && ((narg % 2) == 0))) error->all(FLERR, "Incorrect number of arguments for labelmap command"); + if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); + int ntypes; std::vector *labels; std::unordered_map *labels_map; @@ -238,6 +238,8 @@ int LabelMap::find_or_create(const std::string &mylabel, std::vectorsecond; + if (lmp->citeme) lmp->citeme->add(cite_type_label_framework); + // if no match found, create new label at next available index // label map assumed to be intialized with numeric index // user labels are assumed to be alphanumeric (not a number) From 9a096f295f902e9abf9c3868b392bddb9b556993 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 11:06:38 -0500 Subject: [PATCH 275/294] update/correct code owners list --- .github/CODEOWNERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1a7a35e61f..86418574ce 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -101,7 +101,8 @@ src/group.* @sjplimp src/improper.* @sjplimp src/info.* @akohlmey src/kspace.* @sjplimp -src/lmptyp.h @sjplimp +src/lmptype.h @sjplimp +src/label_map.* @jrgissing @akohlmey src/library.* @sjplimp @akohlmey src/main.cpp @sjplimp src/min_*.* @sjplimp From cf5c42889d64c3bfb13d8a61e8e834eb10dbc16c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 13:01:23 -0500 Subject: [PATCH 276/294] avoid dereferencing reaxff object when no reaxff pair style in use --- src/REAXFF/fix_qeq_reaxff.cpp | 2 +- src/REAXFF/fix_qtpie_reaxff.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/REAXFF/fix_qeq_reaxff.cpp b/src/REAXFF/fix_qeq_reaxff.cpp index 067272f0f7..dc8fbd5afd 100644 --- a/src/REAXFF/fix_qeq_reaxff.cpp +++ b/src/REAXFF/fix_qeq_reaxff.cpp @@ -344,7 +344,7 @@ void FixQEqReaxFF::allocate_matrix() int mincap; double safezone; - if (reaxflag) { + if (reaxflag && reaxff) { mincap = reaxff->api->system->mincap; safezone = reaxff->api->system->safezone; } else { diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 48c1109178..5acdea0482 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -370,7 +370,7 @@ void FixQtpieReaxFF::allocate_matrix() int mincap; double safezone; - if (reaxflag) { + if (reaxflag && reaxff) { mincap = reaxff->api->system->mincap; safezone = reaxff->api->system->safezone; } else { From 70f41341c91cc5b1eabd391090ab374162b79140 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 13:26:34 -0500 Subject: [PATCH 277/294] add unit test for qtpie/reaxff --- .../tests/atomic-pair-reaxff-qtpie.yaml | 177 ++++++++++++++++++ unittest/force-styles/tests/gauss_exp.txt | 6 + 2 files changed, 183 insertions(+) create mode 100644 unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml create mode 100644 unittest/force-styles/tests/gauss_exp.txt diff --git a/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml b/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml new file mode 100644 index 0000000000..00524a037d --- /dev/null +++ b/unittest/force-styles/tests/atomic-pair-reaxff-qtpie.yaml @@ -0,0 +1,177 @@ +--- +lammps_version: 29 Aug 2024 +tags: slow, unstable +date_generated: Fri Nov 15 13:22:57 2024 +epsilon: 2e-10 +skip_tests: kokkos_omp +prerequisites: ! | + pair reaxff + fix qtpie/reaxff +pre_commands: ! | + echo screen + variable newton_pair delete + variable newton_pair index on + atom_modify map array + units real + atom_style charge + lattice diamond 3.77 + region box block 0 2 0 2 0 2 + create_box 3 box + create_atoms 1 box + displace_atoms all random 0.1 0.1 0.1 623426 + mass 1 1.0 + mass 2 12.0 + mass 3 16.0 + set type 1 type/fraction 2 0.5 998877 + set type 2 type/fraction 3 0.5 887766 + set type 1 charge 0.00 + set type 2 charge 0.01 + set type 3 charge -0.01 + velocity all create 100 4534624 loop geom +post_commands: ! | + fix qeq all qtpie/reaxff 1 0.0 8.0 1.0e-20 reaxff ${input_dir}/gauss_exp.txt +input_file: in.empty +pair_style: reaxff NULL checkqeq yes +pair_coeff: ! | + * * ffield.reax.mattsson H C O +extract: ! "" +natoms: 64 +init_vdwl: -3296.3503506624793 +init_coul: -268.63677950571287 +init_stress: ! |- + -1.0115766891336298e+03 -1.2088131753999489e+03 -8.2791874767348656e+02 -2.3899453277851464e+02 1.9901049958702231e+02 -6.5090424115686358e+02 +init_forces: ! |2 + 1 -8.6436073169042203e+01 -2.4400814471028966e+01 1.0905778351740135e+02 + 2 -1.0912360851161381e+02 -1.7785390915789219e+02 -2.2174396944101937e+02 + 3 -1.7205686042941795e+02 1.8323463801374010e+02 1.3330073949860362e+01 + 4 3.5074883698050790e+01 -5.4658573523678996e+01 8.8946983653047340e+01 + 5 1.8037666038924445e+02 1.5258130212658664e+01 -8.1473951707266664e+01 + 6 1.3687016587487102e+02 -2.9993359047535205e+02 3.1610843762057812e+01 + 7 -5.3040478511967407e+01 -1.2906564054460426e+02 -1.6364626011721845e+02 + 8 -1.5098093043741406e+02 4.1651970160590970e+01 1.5340103366817354e+02 + 9 1.7340129928959293e+01 8.8054766076132950e+01 2.4310262794648661e+01 + 10 8.3732473664111822e+01 1.4163802420547773e+02 1.2086953212785974e+02 + 11 -4.2433035390388874e+01 6.5812517236648006e+01 1.3461956725415703e+02 + 12 -9.6266601388748128e+01 -2.5379001755851835e+01 7.7659919394842500e+00 + 13 -6.5393078499150931e+01 -4.9454699468880946e+01 -3.8254305288102898e+01 + 14 8.2607951316806449e+01 -6.8984439086417567e+01 -9.2468559306204710e+01 + 15 -6.6187434150450684e+01 2.1289685674401704e+02 7.8580288164394176e+01 + 16 1.8897543273751987e+02 5.3397684583320228e+01 6.3460608305143431e+01 + 17 1.4554759813765031e+02 -3.9370375612854211e+01 -9.7056561289015221e+01 + 18 -4.5104710861585204e+01 -1.8896520586433442e+02 1.1164278206176432e+02 + 19 2.6896526778434691e+02 3.6948523216182610e+02 -3.3986956263391414e+02 + 20 -7.5462858727084978e+01 -8.6024949892680127e+01 -4.6529428267542672e-01 + 21 -7.3095936427312608e+01 -5.7463824581552551e+01 -1.1787940398446622e+02 + 22 1.5517223233172072e+02 -1.2975611741340879e+02 8.0541352960578152e+01 + 23 7.9113780255105098e+01 -1.3159747874504722e+01 -2.5876270623789040e+01 + 24 -2.0721357932150298e+02 2.1426998940883044e+02 -1.2404916276617425e+02 + 25 -1.1375172605965993e+02 1.9145650691697844e+02 -8.3221527665980318e+01 + 26 2.8613004810395404e+02 -2.1814761019305757e+02 2.3221022419460809e+02 + 27 -6.4957092731555079e+01 6.8730894090822545e+01 1.7879679090575814e+02 + 28 -3.5591147454501368e+00 3.8919120211497962e+01 3.2899891202019738e+01 + 29 -7.1006654872719238e+01 3.4395396834237154e+01 2.5490290273424105e+01 + 30 -1.7028325267142560e+02 -1.9865767607180553e+02 -1.1525322636144483e+02 + 31 -1.4030343203666592e+02 1.6505252880438636e+02 -1.2536824728321348e+02 + 32 2.8083940532792852e+01 -5.9613080258602423e+01 -1.7589243121123940e+02 + 33 -6.0786787938223988e+01 -6.2914083425882403e+01 6.1724187337270543e+01 + 34 -2.2381506031982109e+01 1.0258481649700494e+02 -7.2581426601092289e+01 + 35 2.6826897000164860e+02 -2.1830766483611634e+02 1.2929910897210587e+02 + 36 1.0617788606545010e+02 1.8844480945948908e+02 -1.9952637621677195e+02 + 37 -1.8500771626166176e+02 1.2691494490100189e+02 -6.1799194093629900e+01 + 38 -2.9568079732966703e+02 1.0466330586524869e+02 1.5539102098367138e+02 + 39 9.0313772894092821e+01 -5.7763893527493209e+01 2.4405485803219346e+01 + 40 2.0368385039374967e+01 -1.0212243725288241e+02 5.8977256992383225e+01 + 41 -5.4472403223120203e+01 1.3664060494004097e+01 -7.0498722233072925e+01 + 42 -1.0110613172512772e+02 3.7733470557342038e+01 -7.1384152705391784e+01 + 43 2.2496360806160274e+02 9.5374846176895645e+01 1.2314600313131321e+02 + 44 8.9930414141415653e+01 -9.8548000406723986e+01 -8.6016793720029909e+01 + 45 1.8042451487789609e+01 1.2291681595021672e+02 4.8750513798444771e+01 + 46 1.4360528923605511e+01 -2.8393705665522205e+01 2.5000665776800748e+01 + 47 3.3430180733495789e+01 -1.9134003719608592e+02 -6.9378872162649785e+01 + 48 6.5977481507414737e+01 -2.0569144157798024e+02 -2.3305633430859448e+01 + 49 -3.6805309882726834e+02 -2.5080697208449138e+01 1.0725330582435194e+02 + 50 3.3568380074196661e+01 -7.2887959381521910e+01 8.4247363622347109e+01 + 51 1.3937571489940009e+02 -1.0241922204671263e+02 1.5837793886668237e+02 + 52 -1.1015736921987484e+02 1.2143381508599626e+02 -1.3301954780997897e+02 + 53 -5.5053511032747757e+01 3.3318719859853354e+02 -5.4695756604566270e-02 + 54 -9.3809048702850468e+00 6.3003589530582012e+01 1.8485428176102951e+02 + 55 1.1268442794862931e+02 -1.7912573126673601e+02 7.6216834182693233e+01 + 56 -4.5797371929680743e+01 -1.3685285921363297e+02 4.1670118112644616e+01 + 57 9.5486141883804535e+01 7.3675172824019725e+01 -6.4017779318177091e+01 + 58 -2.6940387352055815e+01 -9.1882719416077734e+00 -1.6496876672222488e+01 + 59 -6.1110604213739784e+01 1.0339564891274583e+02 8.0387279300896552e+01 + 60 -1.3438307290047304e+01 6.2821560489016619e+01 -1.4960910695536089e+02 + 61 -3.7436807177941901e+01 -1.0060440409572418e+02 -7.2129567761936670e+01 + 62 1.5913069360497735e+01 -7.4587444709681003e+00 2.7411930168532034e+01 + 63 1.3132402121938716e+02 -1.2633666867998599e+02 -5.9237822651056007e+01 + 64 1.0528416436964088e+02 1.1185676743727093e+02 7.6927706681792429e+01 +run_vdwl: -3296.3468820327244 +run_coul: -268.63661874354375 +run_stress: ! |- + -1.0114879837664473e+03 -1.2087430287015966e+03 -8.2783958944769802e+02 -2.3908224870200783e+02 1.9895929948999779e+02 -6.5093393277361292e+02 +run_forces: ! |2 + 1 -8.6437645087040067e+01 -2.4400562180957351e+01 1.0906066254553539e+02 + 2 -1.0912275665241053e+02 -1.7786252590616218e+02 -2.2173930761917526e+02 + 3 -1.7205763157578019e+02 1.8323288100722453e+02 1.3329429976402201e+01 + 4 3.5076762638460409e+01 -5.4659870228622246e+01 8.8949923949362500e+01 + 5 1.8037851727311079e+02 1.5258819157296621e+01 -8.1471824505046186e+01 + 6 1.3693758232722558e+02 -2.9996076554788232e+02 3.1544103577304767e+01 + 7 -5.3040940882157734e+01 -1.2906590024224255e+02 -1.6365072194268137e+02 + 8 -1.5098199162393547e+02 4.1652246872814757e+01 1.5340185354669362e+02 + 9 1.7338937131206805e+01 8.8052623671376480e+01 2.4311464733168261e+01 + 10 8.3731216255426517e+01 1.4163544349001512e+02 1.2086773194932107e+02 + 11 -4.2431645167783358e+01 6.5809284879610303e+01 1.3461058585127651e+02 + 12 -9.6268502974108401e+01 -2.5380508031933715e+01 7.7613919613213387e+00 + 13 -6.5394480799184038e+01 -4.9454690134569510e+01 -3.8251921533789378e+01 + 14 8.2607648143581144e+01 -6.8981518310093449e+01 -9.2465919648906919e+01 + 15 -6.6184911564685194e+01 2.1289827790699042e+02 7.8579764996083128e+01 + 16 1.8897212100631526e+02 5.3401557991805042e+01 6.3454265397142500e+01 + 17 1.4554879673647156e+02 -3.9369818695332363e+01 -9.7059433050641857e+01 + 18 -4.5104364652465691e+01 -1.8896630466961875e+02 1.1164283921546559e+02 + 19 2.6896544439509694e+02 3.6948380976016932e+02 -3.3986849873937234e+02 + 20 -7.5474652635907873e+01 -8.6015213272245504e+01 -4.8259676480768809e-01 + 21 -7.3095940937548960e+01 -5.7464788467137694e+01 -1.1787855066724634e+02 + 22 1.5517351794732136e+02 -1.2975689546114967e+02 8.0540859481818742e+01 + 23 7.9115865455975012e+01 -1.3150472802357561e+01 -2.5876503956346053e+01 + 24 -2.0722276003223385e+02 2.1428270990874043e+02 -1.2405357611708547e+02 + 25 -1.1375429959489580e+02 1.9146061264686494e+02 -8.3222256641294663e+01 + 26 2.8613970510624404e+02 -2.1815485596507278e+02 2.3221441960858138e+02 + 27 -6.4953545994003377e+01 6.8732504333245046e+01 1.7879777515705371e+02 + 28 -3.5609706858523174e+00 3.8914845051174211e+01 3.2895014258375397e+01 + 29 -7.1006962970615220e+01 3.4395946962585107e+01 2.5490302918559674e+01 + 30 -1.7028397298399719e+02 -1.9865780309887722e+02 -1.1525330878428163e+02 + 31 -1.4030595583104076e+02 1.6505149109610096e+02 -1.2536961295119134e+02 + 32 2.8084746879715730e+01 -5.9606705649399501e+01 -1.7588207877218977e+02 + 33 -6.0786242960574022e+01 -6.2913885176828828e+01 6.1724433850103161e+01 + 34 -2.2381168039316268e+01 1.0258398798459802e+02 -7.2580420302975753e+01 + 35 2.6830990081622787e+02 -2.1835204404249015e+02 1.2931018843147504e+02 + 36 1.0617963217845744e+02 1.8843796909876622e+02 -1.9952176446060335e+02 + 37 -1.8503402227685811e+02 1.2695504061699209e+02 -6.1823411355500355e+01 + 38 -2.9568399770704622e+02 1.0466662636959578e+02 1.5539009610875451e+02 + 39 9.0314570392701199e+01 -5.7764115394488343e+01 2.4405095735494033e+01 + 40 2.0369156227530599e+01 -1.0212187254286960e+02 5.8977622919450276e+01 + 41 -5.4470510869266015e+01 1.3662227244573693e+01 -7.0490380604491506e+01 + 42 -1.0110530047858049e+02 3.7731202989968210e+01 -7.1386374496910719e+01 + 43 2.2495960799582232e+02 9.5372714343303912e+01 1.2314241456430595e+02 + 44 8.9934882887196963e+01 -9.8543527246862695e+01 -8.6022172212180365e+01 + 45 1.8033520031139599e+01 1.2290938260195539e+02 4.8756688104804162e+01 + 46 1.4361839506594013e+01 -2.8393917262125949e+01 2.5000422631182474e+01 + 47 3.3429877782459641e+01 -1.9133540583883391e+02 -6.9377262749149708e+01 + 48 6.5990004078562464e+01 -2.0569338794984392e+02 -2.3314637198125183e+01 + 49 -3.6809322631363733e+02 -2.5025981981696823e+01 1.0734144239898833e+02 + 50 3.3569219922950602e+01 -7.2886698417202680e+01 8.4245785013977056e+01 + 51 1.3937840161131851e+02 -1.0242257034381630e+02 1.5838167570232804e+02 + 52 -1.1015632519571759e+02 1.2143397586854937e+02 -1.3302347496060023e+02 + 53 -5.5089426238213143e+01 3.3316281975316286e+02 -7.4271240172002706e-02 + 54 -9.3687742955109137e+00 6.2990741970756417e+01 1.8487238090605700e+02 + 55 1.1268678311310606e+02 -1.7912913164827566e+02 7.6217416717889748e+01 + 56 -4.5806154616486943e+01 -1.3684453580988600e+02 4.1676759373489766e+01 + 57 9.5485704227965911e+01 7.3672454441368231e+01 -6.4019152364471736e+01 + 58 -2.6940596532815384e+01 -9.1863734979434319e+00 -1.6497051289566816e+01 + 59 -6.1118963218972965e+01 1.0340128135495597e+02 8.0392569432165885e+01 + 60 -1.3439582130914836e+01 6.2820376074347955e+01 -1.4960914576393716e+02 + 61 -3.7435810911347573e+01 -1.0060262070691427e+02 -7.2129466739698870e+01 + 62 1.5913491338811767e+01 -7.4593609075659995e+00 2.7412825126279046e+01 + 63 1.3132146995609114e+02 -1.2633718643910591e+02 -5.9233334916534574e+01 + 64 1.0528511106781713e+02 1.1185795842149707e+02 7.6928226208764087e+01 +... diff --git a/unittest/force-styles/tests/gauss_exp.txt b/unittest/force-styles/tests/gauss_exp.txt new file mode 100644 index 0000000000..30c637d124 --- /dev/null +++ b/unittest/force-styles/tests/gauss_exp.txt @@ -0,0 +1,6 @@ +# Gaussian orbital exponents (required for fix qtpie/reaxff) taken from Table 2.2 +# of Chen, J. (2009). Theory and applications of fluctuating-charge models. +# The units of the exponents are 1 / (Bohr radius)^2 . +1 0.5434 # H +2 0.2069 # C +3 0.2240 # O From 55f53f06baa29d5596f3d69a1f4723506ecabc31 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 13:27:11 -0500 Subject: [PATCH 278/294] use utils::open_potential for gaussian exponent file --- src/REAXFF/fix_qtpie_reaxff.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index 5acdea0482..c9b924a685 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -199,7 +199,9 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) if (comm->me == 0) { gauss_exp[0] = 0.0; try { - TextFileReader reader(gauss_file,"qtpie/reaxff gaussian exponents"); + FILE *fp = utils::open_potential(gauss_file, lmp, nullptr); + if (!fp) throw TokenizerException("Fix qtpie/reaxff: coul not open gauss file", gauss_file); + TextFileReader reader(fp,"qtpie/reaxff gaussian exponents"); reader.ignore_comments = true; for (int i = 1; i <= ntypes; i++) { const char *line = reader.next_line(); From ae1c5651ef2ea853c6308568fc433d79d378050a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 13:27:28 -0500 Subject: [PATCH 279/294] accept qtpie/reaxff also for reaxff/omp --- src/OPENMP/pair_reaxff_omp.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/OPENMP/pair_reaxff_omp.cpp b/src/OPENMP/pair_reaxff_omp.cpp index 96e6389870..85369cc7bf 100644 --- a/src/OPENMP/pair_reaxff_omp.cpp +++ b/src/OPENMP/pair_reaxff_omp.cpp @@ -106,7 +106,9 @@ void PairReaxFFOMP::init_style() auto acks2_fixes = modify->get_fix_by_style("^acks2/reax"); int have_qeq = modify->get_fix_by_style("^qeq/reax").size() - + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size(); + + modify->get_fix_by_style("^qeq/shielded").size() + acks2_fixes.size() + + modify->get_fix_by_style("^qtpie/reax").size(); + if (qeqflag && (have_qeq != 1)) error->all(FLERR,"Pair style reaxff/omp requires use of exactly one of the " From 4dd1448dd033ba040abd38a38f61d3a61c5c6a34 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 14:12:19 -0500 Subject: [PATCH 280/294] support providing spring constant as equal or atom style variable --- doc/src/fix_spring_self.rst | 19 +++- src/fix_spring_self.cpp | 100 ++++++++++++++---- src/fix_spring_self.h | 5 +- .../tests/fix-timestep-spring_self_atom.yaml | 77 ++++++++++++++ .../tests/fix-timestep-spring_self_equal.yaml | 77 ++++++++++++++ 5 files changed, 256 insertions(+), 22 deletions(-) create mode 100644 unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml create mode 100644 unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 4453fd61c5..f8354b17d7 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -13,7 +13,7 @@ Syntax * ID, group-ID are documented in :doc:`fix ` command * spring/self = style name of this fix command -* K = spring constant (force/distance units) +* K = spring constant (force/distance units), can be a variable (see below) * dir = xyz, xy, xz, yz, x, y, or z (optional, default: xyz) Examples @@ -22,6 +22,7 @@ Examples .. code-block:: LAMMPS fix tether boundary-atoms spring/self 10.0 + fix var all spring/self v_kvar fix zrest move spring/self 10.0 z Description @@ -42,6 +43,22 @@ directions, but it can be limited to the xy-, xz-, yz-plane and the x-, y-, or z-direction, thus restraining the atoms to a line or a plane, respectively. +The force constant *k* can be specified as an equal-style or atom-style +:doc:`variable `. If the value is a variable, it should be specified +as v_name, where name is the variable name. In this case, the variable +will be evaluated each time step, and its value(s) will be used as +force constant for the spring force. + +Equal-style variables can specify formulas with various mathematical +functions and include :doc:`thermo_style ` command +keywords for the simulation box parameters, time step, and elapsed time. +Thus, it is easy to specify a time-dependent force field. + +Atom-style variables can specify the same formulas as equal-style +variables but can also include per-atom values, such as atom +coordinates. Thus, it is easy to specify a spatially-dependent force +field with optional time-dependence as well. + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index df00a2ba8c..bc59ff9987 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -21,20 +21,23 @@ #include "atom.h" #include "domain.h" #include "error.h" +#include "input.h" #include "memory.h" #include "respa.h" #include "update.h" +#include "variable.h" #include using namespace LAMMPS_NS; using namespace FixConst; +enum { NONE, CONSTANT, EQUAL, ATOM }; + /* ---------------------------------------------------------------------- */ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - xoriginal(nullptr) + Fix(lmp, narg, arg), xoriginal(nullptr), kstr(nullptr) { if ((narg < 4) || (narg > 5)) error->all(FLERR,"Illegal fix spring/self command"); @@ -46,8 +49,14 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : energy_global_flag = 1; respa_level_support = 1; - k = utils::numeric(FLERR,arg[3],false,lmp); - if (k <= 0.0) error->all(FLERR,"Illegal fix spring/self command"); + if (utils::strmatch(arg[3], "^v_")) { + kstr = utils::strdup(arg[3] + 2); + kstyle = NONE; + } else { + k = utils::numeric(FLERR,arg[3],false,lmp); + kstyle = CONSTANT; + if (k <= 0.0) error->all(FLERR,"Illegal force constatnt for fix spring/self command"); + } xflag = yflag = zflag = 1; @@ -123,6 +132,25 @@ int FixSpringSelf::setmask() void FixSpringSelf::init() { + // check variable + + if (kstr) { + kvar = input->variable->find(kstr); + if (kvar < 0) error->all(FLERR, "Variable {} for fix spring/self does not exist", kstr); + if (input->variable->equalstyle(kvar)) + kstyle = EQUAL; + else if (input->variable->atomstyle(kvar)) + kstyle = ATOM; + else + error->all(FLERR, "Variable {} for fix spring/self is invalid style", kstr); + } + + + if ((kstyle == ATOM) && (atom->nmax > maxatom)) { + maxatom = atom->nmax; + memory->destroy(kval); + memory->create(kval, maxatom, "sprint/self:kval"); + } if (utils::strmatch(update->integrate_style,"^respa")) { ilevel_respa = (dynamic_cast(update->integrate))->nlevels-1; if (respa_level >= 0) ilevel_respa = MIN(respa_level,ilevel_respa); @@ -162,24 +190,55 @@ void FixSpringSelf::post_force(int /*vflag*/) double dx,dy,dz; double unwrap[3]; + // reallocate kval array if necessary + + if ((kstyle == ATOM) && (atom->nmax > maxatom)) { + maxatom = atom->nmax; + memory->destroy(kval); + memory->create(kval, maxatom, "sprint/self:kval"); + } + espring = 0.0; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - domain->unmap(x[i],image[i],unwrap); - dx = unwrap[0] - xoriginal[i][0]; - dy = unwrap[1] - xoriginal[i][1]; - dz = unwrap[2] - xoriginal[i][2]; - if (!xflag) dx = 0.0; - if (!yflag) dy = 0.0; - if (!zflag) dz = 0.0; - f[i][0] -= k*dx; - f[i][1] -= k*dy; - f[i][2] -= k*dz; - espring += k * (dx*dx + dy*dy + dz*dz); - } + if ((kstyle == CONSTANT) || (kstyle == EQUAL)) { + // update k if equal style variable + if (kstyle == EQUAL) k = input->variable->compute_equal(kvar); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + domain->unmap(x[i],image[i],unwrap); + dx = unwrap[0] - xoriginal[i][0]; + dy = unwrap[1] - xoriginal[i][1]; + dz = unwrap[2] - xoriginal[i][2]; + if (!xflag) dx = 0.0; + if (!yflag) dy = 0.0; + if (!zflag) dz = 0.0; + f[i][0] -= k*dx; + f[i][1] -= k*dy; + f[i][2] -= k*dz; + espring += k * (dx*dx + dy*dy + dz*dz); + } + espring *= 0.5; + } else { + // update kval for kstyle == ATOM + input->variable->compute_atom(kvar, igroup, kval, 1, 0); + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + domain->unmap(x[i],image[i],unwrap); + dx = unwrap[0] - xoriginal[i][0]; + dy = unwrap[1] - xoriginal[i][1]; + dz = unwrap[2] - xoriginal[i][2]; + if (!xflag) dx = 0.0; + if (!yflag) dy = 0.0; + if (!zflag) dz = 0.0; + f[i][0] -= kval[i]*dx; + f[i][1] -= kval[i]*dy; + f[i][2] -= kval[i]*dz; + espring += kval[i] * (dx*dx + dy*dy + dz*dz); + } - espring *= 0.5; + espring *= 0.5; + } } /* ---------------------------------------------------------------------- */ @@ -213,7 +272,8 @@ double FixSpringSelf::compute_scalar() double FixSpringSelf::memory_usage() { - double bytes = (double)atom->nmax*3 * sizeof(double); + double bytes = (double)atom->nmax*4 * sizeof(double); + if (kstyle == ATOM) bytes += (double)atom->nmax * sizeof(double); return bytes; } diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index f13f2be918..e3220b8cfd 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -50,8 +50,11 @@ class FixSpringSelf : public Fix { protected: double k, espring; double **xoriginal; // original coords of atoms + char *kstr; // name of variable for K + double *kval; // per-atom variable values for K + int kvar, kstyle; int xflag, yflag, zflag; - int ilevel_respa; + int ilevel_respa, maxatom; }; } // namespace LAMMPS_NS diff --git a/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml b/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml new file mode 100644 index 0000000000..e8b35e6455 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-spring_self_atom.yaml @@ -0,0 +1,77 @@ +--- +lammps_version: 17 Feb 2022 +date_generated: Fri Mar 18 22:18:01 2022 +epsilon: 5e-14 +skip_tests: kokkos_omp +prerequisites: ! | + atom full + fix spring/self +pre_commands: ! | + variable kvar atom 10.0 +post_commands: ! | + fix move all nve + fix test solute spring/self v_kvar xyz +input_file: in.fourmol +natoms: 29 +global_scalar: 0.12623705370750438 +run_pos: ! |2 + 1 -2.7045669792379945e-01 2.4912140072031601e+00 -1.6695897908630153e-01 + 2 3.1003572362014503e-01 2.9612290242130319e+00 -8.5466689099875615e-01 + 3 -7.0398410505917419e-01 1.2305522448803927e+00 -6.2777452858703953e-01 + 4 -1.5818137373512378e+00 1.4837442978868092e+00 -1.2538665277734968e+00 + 5 -9.0719266809604937e-01 9.2652365891304722e-01 3.9954299416556610e-01 + 6 2.4832271436421161e-01 2.8312320769893828e-01 -1.2314303818391423e+00 + 7 3.4143518373063453e-01 -2.2645187306940717e-02 -2.5292229406165907e+00 + 8 1.1743540824610306e+00 -4.8863162985819381e-01 -6.3783828665914544e-01 + 9 1.3800508883119953e+00 -2.5274565574966718e-01 2.8353436538531862e-01 + 10 2.0510726036138696e+00 -1.4604027090169940e+00 -9.8323554549077119e-01 + 11 1.7878052123899795e+00 -1.9921835931655048e+00 -1.8890566136917575e+00 + 12 3.0063000763288268e+00 -4.9013323763786637e-01 -1.6231890083142151e+00 + 13 4.0515352007695906e+00 -8.9202569828585798e-01 -1.6399995011867139e+00 + 14 2.6066952690925067e+00 -4.1788633645045253e-01 -2.6633949696742012e+00 + 15 2.9695337662435719e+00 5.5423141568538492e-01 -1.2342076641871542e+00 + 16 2.6747001492056977e+00 -2.4124097322472577e+00 -2.3429048072365732e-02 + 17 2.2153591049025310e+00 -2.0897997214660862e+00 1.1963106355359285e+00 + 18 2.1369701704115056e+00 3.0158507413628213e+00 -3.5179348337213843e+00 + 19 1.5355837136087338e+00 2.6255292355375399e+00 -4.2353987779878857e+00 + 20 2.7727573005678758e+00 3.6923910449610102e+00 -3.9330842459133470e+00 + 21 4.9040128073205524e+00 -4.0752348172959030e+00 -3.6210314709893159e+00 + 22 4.3582355554440877e+00 -4.2126119427287101e+00 -4.4612844196314150e+00 + 23 5.7439382849307670e+00 -3.5821957939275060e+00 -3.8766361295935892e+00 + 24 2.0689243582422914e+00 3.1513346907271247e+00 3.1550389754829422e+00 + 25 1.3045351331492516e+00 3.2665125705842941e+00 2.5111855257434352e+00 + 26 2.5809237402711318e+00 4.0117602605482858e+00 3.2212060529089945e+00 + 27 -1.9611343130357277e+00 -4.3563411931359841e+00 2.1098293115523528e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433952e+00 + 29 -1.3126000191359812e+00 -3.5962518039482934e+00 2.2746342468737817e+00 +run_vel: ! |2 + 1 8.1685220941861477e-03 1.6512578512542727e-02 4.7892799147935001e-03 + 2 5.4427456394786321e-03 5.1693257879352533e-03 -1.4414043022813649e-03 + 3 -8.2272458036248362e-03 -1.2923813188884230e-02 -4.0970749471144546e-03 + 4 -3.7660861920462349e-03 -6.5659911420830365e-03 -1.1120922532834726e-03 + 5 -1.1012635909013241e-02 -9.8847866026321157e-03 -2.8391869073674538e-03 + 6 -3.9665990411620729e-02 4.6803722380071487e-02 3.7135522426802389e-02 + 7 9.1016763589152859e-04 -1.0126055720737583e-02 -5.1556610019025714e-02 + 8 7.9043585267658430e-03 -3.3496064544244345e-03 3.4549326598010660e-02 + 9 1.5620907286754389e-03 3.7378245105921431e-03 1.5036774253075934e-02 + 10 2.9193799040059056e-02 -2.9242248165535563e-02 -1.5014281912567770e-02 + 11 -4.7797459644718264e-03 -3.7436196398511232e-03 -2.3410499103477603e-03 + 12 2.2686069875175316e-03 -3.4732729502899497e-04 -3.0627334265471650e-03 + 13 2.7456854188010020e-03 5.8081889921879817e-03 -7.9308949311655092e-04 + 14 3.5223319737918667e-03 -5.7842699330258648e-03 -3.9396805101296825e-03 + 15 -1.8475459117759364e-03 -5.8469790281561471e-03 6.2849983323582511e-03 + 16 1.8676069228413028e-02 -1.3258381729410438e-02 -4.5625616778429308e-02 + 17 -1.2893668780819389e-02 9.7505325833410258e-03 3.7288200735675299e-02 + 18 -8.0065794869105819e-04 -8.6270473288011819e-04 -1.4483040693746142e-03 + 19 1.2452390836051499e-03 -2.5061097119616180e-03 7.2998631010316650e-03 + 20 3.5930060229538143e-03 3.6938860309035470e-03 3.2322732687958995e-03 + 21 -1.4689220366910704e-03 -2.7352129796142532e-04 7.0581624168175334e-04 + 22 -7.0694199254520765e-03 -4.2577148925037030e-03 2.8079117611209205e-04 + 23 6.0446963117617505e-03 -1.4000131614895336e-03 2.5819754846773601e-03 + 24 3.1926367911308686e-04 -9.9445664741642462e-04 1.4999996978363057e-04 + 25 1.3789754526895179e-04 -4.4335894884219599e-03 -8.1808136698604454e-04 + 26 2.0485904035342870e-03 2.7813358633902757e-03 4.3245727149365584e-03 + 27 4.5604120291626942e-04 -1.0305523027244966e-03 2.1188058375789067e-04 + 28 -6.2544520861839200e-03 1.4127711176141612e-03 -1.8429821884806304e-03 + 29 6.4110631535703737e-04 3.1273432719578029e-03 3.7253671105604122e-03 +... diff --git a/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml b/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml new file mode 100644 index 0000000000..382cc2cc73 --- /dev/null +++ b/unittest/force-styles/tests/fix-timestep-spring_self_equal.yaml @@ -0,0 +1,77 @@ +--- +lammps_version: 17 Feb 2022 +date_generated: Fri Mar 18 22:18:01 2022 +epsilon: 5e-14 +skip_tests: kokkos_omp +prerequisites: ! | + atom full + fix spring/self +pre_commands: ! | + variable kvar equal 10.0 +post_commands: ! | + fix move all nve + fix test solute spring/self v_kvar xyz +input_file: in.fourmol +natoms: 29 +global_scalar: 0.12623705370750438 +run_pos: ! |2 + 1 -2.7045669792379945e-01 2.4912140072031601e+00 -1.6695897908630153e-01 + 2 3.1003572362014503e-01 2.9612290242130319e+00 -8.5466689099875615e-01 + 3 -7.0398410505917419e-01 1.2305522448803927e+00 -6.2777452858703953e-01 + 4 -1.5818137373512378e+00 1.4837442978868092e+00 -1.2538665277734968e+00 + 5 -9.0719266809604937e-01 9.2652365891304722e-01 3.9954299416556610e-01 + 6 2.4832271436421161e-01 2.8312320769893828e-01 -1.2314303818391423e+00 + 7 3.4143518373063453e-01 -2.2645187306940717e-02 -2.5292229406165907e+00 + 8 1.1743540824610306e+00 -4.8863162985819381e-01 -6.3783828665914544e-01 + 9 1.3800508883119953e+00 -2.5274565574966718e-01 2.8353436538531862e-01 + 10 2.0510726036138696e+00 -1.4604027090169940e+00 -9.8323554549077119e-01 + 11 1.7878052123899795e+00 -1.9921835931655048e+00 -1.8890566136917575e+00 + 12 3.0063000763288268e+00 -4.9013323763786637e-01 -1.6231890083142151e+00 + 13 4.0515352007695906e+00 -8.9202569828585798e-01 -1.6399995011867139e+00 + 14 2.6066952690925067e+00 -4.1788633645045253e-01 -2.6633949696742012e+00 + 15 2.9695337662435719e+00 5.5423141568538492e-01 -1.2342076641871542e+00 + 16 2.6747001492056977e+00 -2.4124097322472577e+00 -2.3429048072365732e-02 + 17 2.2153591049025310e+00 -2.0897997214660862e+00 1.1963106355359285e+00 + 18 2.1369701704115056e+00 3.0158507413628213e+00 -3.5179348337213843e+00 + 19 1.5355837136087338e+00 2.6255292355375399e+00 -4.2353987779878857e+00 + 20 2.7727573005678758e+00 3.6923910449610102e+00 -3.9330842459133470e+00 + 21 4.9040128073205524e+00 -4.0752348172959030e+00 -3.6210314709893159e+00 + 22 4.3582355554440877e+00 -4.2126119427287101e+00 -4.4612844196314150e+00 + 23 5.7439382849307670e+00 -3.5821957939275060e+00 -3.8766361295935892e+00 + 24 2.0689243582422914e+00 3.1513346907271247e+00 3.1550389754829422e+00 + 25 1.3045351331492516e+00 3.2665125705842941e+00 2.5111855257434352e+00 + 26 2.5809237402711318e+00 4.0117602605482858e+00 3.2212060529089945e+00 + 27 -1.9611343130357277e+00 -4.3563411931359841e+00 2.1098293115523528e+00 + 28 -2.7473562684513411e+00 -4.0200819932379330e+00 1.5830052163433952e+00 + 29 -1.3126000191359812e+00 -3.5962518039482934e+00 2.2746342468737817e+00 +run_vel: ! |2 + 1 8.1685220941861477e-03 1.6512578512542727e-02 4.7892799147935001e-03 + 2 5.4427456394786321e-03 5.1693257879352533e-03 -1.4414043022813649e-03 + 3 -8.2272458036248362e-03 -1.2923813188884230e-02 -4.0970749471144546e-03 + 4 -3.7660861920462349e-03 -6.5659911420830365e-03 -1.1120922532834726e-03 + 5 -1.1012635909013241e-02 -9.8847866026321157e-03 -2.8391869073674538e-03 + 6 -3.9665990411620729e-02 4.6803722380071487e-02 3.7135522426802389e-02 + 7 9.1016763589152859e-04 -1.0126055720737583e-02 -5.1556610019025714e-02 + 8 7.9043585267658430e-03 -3.3496064544244345e-03 3.4549326598010660e-02 + 9 1.5620907286754389e-03 3.7378245105921431e-03 1.5036774253075934e-02 + 10 2.9193799040059056e-02 -2.9242248165535563e-02 -1.5014281912567770e-02 + 11 -4.7797459644718264e-03 -3.7436196398511232e-03 -2.3410499103477603e-03 + 12 2.2686069875175316e-03 -3.4732729502899497e-04 -3.0627334265471650e-03 + 13 2.7456854188010020e-03 5.8081889921879817e-03 -7.9308949311655092e-04 + 14 3.5223319737918667e-03 -5.7842699330258648e-03 -3.9396805101296825e-03 + 15 -1.8475459117759364e-03 -5.8469790281561471e-03 6.2849983323582511e-03 + 16 1.8676069228413028e-02 -1.3258381729410438e-02 -4.5625616778429308e-02 + 17 -1.2893668780819389e-02 9.7505325833410258e-03 3.7288200735675299e-02 + 18 -8.0065794869105819e-04 -8.6270473288011819e-04 -1.4483040693746142e-03 + 19 1.2452390836051499e-03 -2.5061097119616180e-03 7.2998631010316650e-03 + 20 3.5930060229538143e-03 3.6938860309035470e-03 3.2322732687958995e-03 + 21 -1.4689220366910704e-03 -2.7352129796142532e-04 7.0581624168175334e-04 + 22 -7.0694199254520765e-03 -4.2577148925037030e-03 2.8079117611209205e-04 + 23 6.0446963117617505e-03 -1.4000131614895336e-03 2.5819754846773601e-03 + 24 3.1926367911308686e-04 -9.9445664741642462e-04 1.4999996978363057e-04 + 25 1.3789754526895179e-04 -4.4335894884219599e-03 -8.1808136698604454e-04 + 26 2.0485904035342870e-03 2.7813358633902757e-03 4.3245727149365584e-03 + 27 4.5604120291626942e-04 -1.0305523027244966e-03 2.1188058375789067e-04 + 28 -6.2544520861839200e-03 1.4127711176141612e-03 -1.8429821884806304e-03 + 29 6.4110631535703737e-04 3.1273432719578029e-03 3.7253671105604122e-03 +... From 72b649ec2bd7e45c58fbec275713063f40baaa23 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 14:14:53 -0500 Subject: [PATCH 281/294] whitespace --- src/fix_spring_self.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index bc59ff9987..302dc2d952 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -144,7 +144,7 @@ void FixSpringSelf::init() else error->all(FLERR, "Variable {} for fix spring/self is invalid style", kstr); } - + if ((kstyle == ATOM) && (atom->nmax > maxatom)) { maxatom = atom->nmax; @@ -203,7 +203,7 @@ void FixSpringSelf::post_force(int /*vflag*/) if ((kstyle == CONSTANT) || (kstyle == EQUAL)) { // update k if equal style variable if (kstyle == EQUAL) k = input->variable->compute_equal(kvar); - + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { domain->unmap(x[i],image[i],unwrap); From bca2c4d2456ea2b0796225863799ada0e0d7af3a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 14:26:10 -0500 Subject: [PATCH 282/294] add missing initializers --- src/fix_spring_self.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 302dc2d952..797255c381 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -37,7 +37,7 @@ enum { NONE, CONSTANT, EQUAL, ATOM }; /* ---------------------------------------------------------------------- */ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), xoriginal(nullptr), kstr(nullptr) + Fix(lmp, narg, arg), xoriginal(nullptr), kstr(nullptr), kval(nullptr) { if ((narg < 4) || (narg > 5)) error->all(FLERR,"Illegal fix spring/self command"); @@ -48,6 +48,8 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : extscalar = 1; energy_global_flag = 1; respa_level_support = 1; + maxatom = 0; + kvar = -1; if (utils::strmatch(arg[3], "^v_")) { kstr = utils::strdup(arg[3] + 2); From ec129f167e2d24ebfbce6d4ef153a114a76f3d84 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 14:33:25 -0500 Subject: [PATCH 283/294] error out in fix spring/self/kk with variable spring constant --- src/KOKKOS/fix_spring_self_kokkos.cpp | 3 +++ src/fix_spring_self.cpp | 2 -- src/fix_spring_self.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 9ba796b1ab..1b6d45ead7 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -79,6 +79,9 @@ void FixSpringSelfKokkos::init() { FixSpringSelf::init(); + if (kstyle != CONSTANT) + error->all(FLERR, "Fix spring/self/kk does not support variable spring constants (yet)"); + if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot (yet) use respa with Kokkos"); } diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 797255c381..f8720a9217 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -32,8 +32,6 @@ using namespace LAMMPS_NS; using namespace FixConst; -enum { NONE, CONSTANT, EQUAL, ATOM }; - /* ---------------------------------------------------------------------- */ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index e3220b8cfd..9699ca33f3 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -26,6 +26,7 @@ namespace LAMMPS_NS { class FixSpringSelf : public Fix { public: + enum { NONE, CONSTANT, EQUAL, ATOM }; FixSpringSelf(class LAMMPS *, int, char **); ~FixSpringSelf() override; int setmask() override; From 35f1fb9d6ddaff50a0d7a1e76783f56f77bc43c2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 15:12:56 -0500 Subject: [PATCH 284/294] check equal style variable evaluation for bad values --- src/fix_spring_self.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index f8720a9217..c9a9f7d0f7 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -55,7 +55,7 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : } else { k = utils::numeric(FLERR,arg[3],false,lmp); kstyle = CONSTANT; - if (k <= 0.0) error->all(FLERR,"Illegal force constatnt for fix spring/self command"); + if (k <= 0.0) error->all(FLERR,"Illegal force constant for fix spring/self command"); } xflag = yflag = zflag = 1; @@ -202,8 +202,11 @@ void FixSpringSelf::post_force(int /*vflag*/) if ((kstyle == CONSTANT) || (kstyle == EQUAL)) { // update k if equal style variable - if (kstyle == EQUAL) k = input->variable->compute_equal(kvar); - + if (kstyle == EQUAL) { + k = input->variable->compute_equal(kvar); + if (k < 0.0) + error->all(FLERR,"Evaluation of {} gave bad value {} for fix spring/self", kstr, k); + } for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { domain->unmap(x[i],image[i],unwrap); From b3f88da9390f5755b660bf73ada4e6ce7fad8155 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 15 Nov 2024 15:22:19 -0500 Subject: [PATCH 285/294] document KOKKOS restriction of fix spring/self --- doc/src/fix_spring_self.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index f8354b17d7..a78e7575f8 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -106,7 +106,9 @@ invoked by the :doc:`minimize ` command. Restrictions """""""""""" - none + +The KOKKOS version, *fix spring/self/kk* may only be used with a constant +value of K, not a variable. Related commands """""""""""""""" From f7b87a21c79e9c25bf4ce51daedd0fb3fc182f28 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 16 Nov 2024 11:14:18 -0500 Subject: [PATCH 286/294] apply special treatment for fix colvars/kk as suggested by @stanmoore1 --- doc/src/Commands_fix.rst | 2 +- doc/src/fix_colvars.rst | 7 ++++++- doc/utils/check-styles.py | 9 +++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 58fda862c6..04d1a9969a 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -44,7 +44,7 @@ OPT. * :doc:`brownian/sphere ` * :doc:`charge/regulation ` * :doc:`cmap (k) ` - * :doc:`colvars (k) ` + * :doc:`colvars ` * :doc:`controller ` * :doc:`damping/cundall ` * :doc:`deform (k) ` diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst index 785893077c..21a9d06714 100644 --- a/doc/src/fix_colvars.rst +++ b/doc/src/fix_colvars.rst @@ -123,7 +123,12 @@ that will be used in the colvars module. ---------- -.. include:: accel_styles.rst +.. note:: + + Fix colvars/kk is not really ported to KOKKOS, since the colvars + library has not been ported to KOKKOS. It merely has some + optimizations to reduce the data transfers between host and device + for KOKKOS with GPUs. ---------- diff --git a/doc/utils/check-styles.py b/doc/utils/check-styles.py index 89c8920760..6e4d133cd4 100644 --- a/doc/utils/check-styles.py +++ b/doc/utils/check-styles.py @@ -73,8 +73,6 @@ omp = re.compile("(.+)/omp\\s*$") opt = re.compile("(.+)/opt\\s*$") removed = re.compile("(.*)Deprecated$") -accel_pattern = re.compile(r"^.. include::\s+accel_styles.rst$") - def require_accel_include(path): found = False needs = False @@ -94,6 +92,7 @@ def require_accel_include(path): if kokkos.match(line): needs = True if intel.match(line): needs = True if opt.match(line): needs = True + if path == "src/fix_colvars.rst": needs = False m = cmd_pattern.match(line) if m: if gpu.match(line): needs = True @@ -167,7 +166,9 @@ def check_style(filename, dirname, pattern, styles, name, suffix=False, skip=set # known undocumented aliases we need to skip if c in skip: continue s = c - if suffix: s = add_suffix(styles, c) + if suffix: + s = add_suffix(styles, c) + if s == 'colvars (k)' : continue if not s in matches: if not styles[c]['removed']: print(f"{name} style entry {s} is missing or incomplete in {filename}") @@ -300,7 +301,7 @@ for command_type, entries in index.items(): print("Total number of style index entries:", total_index) skip_angle = ('sdk') -skip_fix = ('python', 'NEIGH_HISTORY/omp','acks2/reax','qeq/reax','reax/c/bonds','reax/c/species', 'pimd') +skip_fix = ('python', 'NEIGH_HISTORY/omp','acks2/reax','qeq/reax','reax/c/bonds','reax/c/species', 'pimd', 'colvars/kk') skip_pair = ('meam/c','lj/sf','reax/c','lj/sdk','lj/sdk/coul/long','lj/sdk/coul/msm') skip_compute = ('pressure/cylinder') From 07a8806fd0c677404008caca19905213a28e5db8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 17 Nov 2024 12:14:07 -0500 Subject: [PATCH 287/294] fix typo --- src/REAXFF/fix_qtpie_reaxff.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.cpp b/src/REAXFF/fix_qtpie_reaxff.cpp index c9b924a685..0279032c7f 100644 --- a/src/REAXFF/fix_qtpie_reaxff.cpp +++ b/src/REAXFF/fix_qtpie_reaxff.cpp @@ -200,7 +200,7 @@ void FixQtpieReaxFF::pertype_parameters(char *arg) gauss_exp[0] = 0.0; try { FILE *fp = utils::open_potential(gauss_file, lmp, nullptr); - if (!fp) throw TokenizerException("Fix qtpie/reaxff: coul not open gauss file", gauss_file); + if (!fp) throw TokenizerException("Fix qtpie/reaxff: could not open gauss file", gauss_file); TextFileReader reader(fp,"qtpie/reaxff gaussian exponents"); reader.ignore_comments = true; for (int i = 1; i <= ntypes; i++) { From a635f20b1b7ea4cce4e93f4583a9d99cb5c2872b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 17 Nov 2024 12:14:14 -0500 Subject: [PATCH 288/294] whitespace --- src/REAXFF/fix_qtpie_reaxff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/REAXFF/fix_qtpie_reaxff.h b/src/REAXFF/fix_qtpie_reaxff.h index 2d82ad197c..2f86e27a7a 100644 --- a/src/REAXFF/fix_qtpie_reaxff.h +++ b/src/REAXFF/fix_qtpie_reaxff.h @@ -81,16 +81,16 @@ class FixQtpieReaxFF : public Fix { double *Hdia_inv; double *b_s, *b_t; double *b_prc, *b_prm; - double *chi_eff; // array of effective electronegativities + double *chi_eff; // array of effective electronegativities //CG storage double *p, *q, *r, *d; int imax, maxwarn; char *pertype_option; // argument to determine how per-type info is obtained - char *gauss_file; // input file for gaussian orbital exponents - double *gauss_exp; // array of gaussian orbital exponents for each atom type - double dist_cutoff; // separation distance beyond which to neglect overlap integrals + char *gauss_file; // input file for gaussian orbital exponents + double *gauss_exp; // array of gaussian orbital exponents for each atom type + double dist_cutoff; // separation distance beyond which to neglect overlap integrals void pertype_parameters(char *); void init_shielding(); From 18329ac86adbaf54d4a50c67d5c02c8f31b0a173 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 17 Nov 2024 12:14:28 -0500 Subject: [PATCH 289/294] avoid memory leaks --- src/fix_spring_self.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index c9a9f7d0f7..d0c7ed8f94 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -112,8 +112,10 @@ FixSpringSelf::~FixSpringSelf() atom->delete_callback(id,Atom::GROW); atom->delete_callback(id,Atom::RESTART); - // delete locally stored array + // delete locally stored arrays + delete[] kstr; + memory->destroy(kval); memory->destroy(xoriginal); } From 2ac0d065a7cdfc996fb014a92a9f4b8f8cf61ec7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 25 Oct 2024 21:51:26 -0400 Subject: [PATCH 290/294] step version strings for next release --- doc/lammps.1 | 4 ++-- doc/src/fix_qeq.rst | 2 +- doc/src/pair_coul.rst | 2 +- src/atom.cpp | 2 +- src/library.cpp | 2 +- src/version.h | 3 +-- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/lammps.1 b/doc/lammps.1 index 75581bd008..d08ac71fd4 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "29 August 2024" "2024-08-29" +.TH LAMMPS "1" "29 October 2024" "2024-10-29" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 29 August 2024 +\- Molecular Dynamics Simulator. Version 29 October 2024 .SH SYNOPSIS .B lmp diff --git a/doc/src/fix_qeq.rst b/doc/src/fix_qeq.rst index fd317666d0..9929dd5796 100644 --- a/doc/src/fix_qeq.rst +++ b/doc/src/fix_qeq.rst @@ -190,7 +190,7 @@ on atoms via the matrix inversion method. A tolerance of 1.0e-6 is usually a good number. Keyword *alpha* can be used to change the Slater type orbital exponent. -.. versionadded:: TBD +.. versionadded:: 29Oct2024 The *qeq/ctip* style describes partial charges on atoms in the same way as style *qeq/shielded* but also enables the definition of charge diff --git a/doc/src/pair_coul.rst b/doc/src/pair_coul.rst index 17e9358754..643a644e80 100644 --- a/doc/src/pair_coul.rst +++ b/doc/src/pair_coul.rst @@ -180,7 +180,7 @@ coulomb styles in :doc:`hybrid pair styles `. ---------- -.. versionadded:: TBD +.. versionadded:: 29Oct2024 Style *coul/ctip* computes the Coulomb interactions as described in :ref:`Plummer `. It uses the the damped shifted model as in diff --git a/src/atom.cpp b/src/atom.cpp index e0fceffe9c..c726b1500c 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -3276,7 +3276,7 @@ int Atom::extract_datatype(const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 29Oct2024 \endverbatim * diff --git a/src/library.cpp b/src/library.cpp index 0065f10454..fd27ada6ac 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -2159,7 +2159,7 @@ int lammps_extract_atom_datatype(void *handle, const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 29Oct2024 This function returns an integer with the size of the per-atom property with the specified name. This allows to accurately determine diff --git a/src/version.h b/src/version.h index 9c382b3768..467a516f3e 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1 @@ -#define LAMMPS_VERSION "29 Aug 2024" -#define LAMMPS_UPDATE "Development" +#define LAMMPS_VERSION "29 Oct 2024" From c926bfd15692b168fa6c82b17de667231c2dd483 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 25 Oct 2024 22:35:41 -0400 Subject: [PATCH 291/294] start documenting individual steps for a LAMMPS release --- .github/release_steps.md | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/release_steps.md diff --git a/.github/release_steps.md b/.github/release_steps.md new file mode 100644 index 0000000000..bcd29fd818 --- /dev/null +++ b/.github/release_steps.md @@ -0,0 +1,58 @@ +# LAMMPS Release Steps + +The following notes chronicle the current steps for preparing and publishing LAMMPS releases. +For definition of what LAMMPS versions and the different kinds of releases mean, please +refer to [the corresponding section in the LAMMPS manual](https://docs.lammps.org/Manual_version.html). + +## LAMMPS Feature Release + +A LAMMPS feature release is currently prepared after about 500 to 750 commits to the +'develop' branch or after a period of four weeks up to two months. + +### Preparing a 'next\_release' branch + +Create a 'next\_release' branch off 'develop' and make the following changes: +- set the LAMMPS\_VERSION define to the planned release date in src/version.h in the format "D Mmm YYYY" or "DD Mmm YYYY" +- remove the LAMMPS\_UPDATE define in src/version.h +- update the release date in doc/lammps.1 +- update all TBD arguments for ..versionadded::, ..versionchanged:: ..deprecated:: to the + planned release date in the format "DMmmYYYY" or "DDMmmYYYY" + +Submit this pull request, rebase if needed. This is the last pull request merged for the release +and should not contain any other changes. (Exceptions: this document, last minute trivial(!) changes). +This PR shall not be merged before **all** pending tests have completed and cleared. If needed, a +bugfix pull request should be created and merged to clear all tests. + +## LAMMPS Stable Release + +A LAMMPS stable release is prepared about once per year in the months July, August, or September. +One (or two, if needed) feature releases before the stable release shall contain only bug fixes +or minor feature updates in optional packages. Also substantial changes to the core of the code +shall be applied rather toward the beginning of a development cycle between two stable releases +than toward the end. The intention is to stablilize significant change to the core and have +outside users and developers try them out during the development cycle; the sooner the changes +are included, the better chances for spotting peripheral bugs and issues. + +### Prerequesites + +Before making a stable release all remaining backported bugfixes shall be released as a (final) +stable update release (see below). + +A LAMMPS stable release process starts like a feature release (see above), only that this feature +release is called a "Stable Release Candidate" and no assets are uploaded to GitHub. + +### Synchronize 'maintenance' branch with 'release' + +The state of the 'release' branch is then transferred to the 'maintenance' branch (which will +have diverged significantly from 'release' due to the selectively backported bug fixes). + +### Fast-forward merge of 'maintenance' into 'stable' and apply tag + +At this point it should be possible to do a fast-forward merge of 'maintenance' to 'stable' +and then apply the stable\_DMmmYYYY tag. + +### Push branches and tags + + + +## LAMMPS Stable Update Release From c50a8d83d1e5411023d0536fe71367a9e31f9a24 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 19 Nov 2024 02:10:49 -0500 Subject: [PATCH 292/294] New release date 19 November 2024 --- doc/lammps.1 | 4 ++-- doc/src/fix_qeq.rst | 2 +- doc/src/fix_qtpie_reaxff.rst | 2 ++ doc/src/pair_coul.rst | 2 +- src/atom.cpp | 2 +- src/library.cpp | 2 +- src/version.h | 2 +- 7 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/lammps.1 b/doc/lammps.1 index d08ac71fd4..cb52813a4d 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "29 October 2024" "2024-10-29" +.TH LAMMPS "1" "19 November 2024" "2024-11-19" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 29 October 2024 +\- Molecular Dynamics Simulator. Version 19 November 2024 .SH SYNOPSIS .B lmp diff --git a/doc/src/fix_qeq.rst b/doc/src/fix_qeq.rst index 9929dd5796..06a1f98375 100644 --- a/doc/src/fix_qeq.rst +++ b/doc/src/fix_qeq.rst @@ -190,7 +190,7 @@ on atoms via the matrix inversion method. A tolerance of 1.0e-6 is usually a good number. Keyword *alpha* can be used to change the Slater type orbital exponent. -.. versionadded:: 29Oct2024 +.. versionadded:: 19Nov2024 The *qeq/ctip* style describes partial charges on atoms in the same way as style *qeq/shielded* but also enables the definition of charge diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index cf59e4701a..e96cbec459 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -35,6 +35,8 @@ Examples Description """"""""""" +.. versionadded:: 19Nov2024 + The QTPIE charge equilibration method is an extension of the QEq charge equilibration method. With QTPIE, the partial charges on individual atoms are computed by minimizing the electrostatic energy of the system in the diff --git a/doc/src/pair_coul.rst b/doc/src/pair_coul.rst index 643a644e80..77c0e0b18b 100644 --- a/doc/src/pair_coul.rst +++ b/doc/src/pair_coul.rst @@ -180,7 +180,7 @@ coulomb styles in :doc:`hybrid pair styles `. ---------- -.. versionadded:: 29Oct2024 +.. versionadded:: 19Nov2024 Style *coul/ctip* computes the Coulomb interactions as described in :ref:`Plummer `. It uses the the damped shifted model as in diff --git a/src/atom.cpp b/src/atom.cpp index c726b1500c..04a21b9c52 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -3276,7 +3276,7 @@ int Atom::extract_datatype(const char *name) * \verbatim embed:rst -.. versionadded:: 29Oct2024 +.. versionadded:: 19Nov2024 \endverbatim * diff --git a/src/library.cpp b/src/library.cpp index fd27ada6ac..9876d363e5 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -2159,7 +2159,7 @@ int lammps_extract_atom_datatype(void *handle, const char *name) * \verbatim embed:rst -.. versionadded:: 29Oct2024 +.. versionadded:: 19Nov2024 This function returns an integer with the size of the per-atom property with the specified name. This allows to accurately determine diff --git a/src/version.h b/src/version.h index 467a516f3e..88a65b1657 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "29 Oct 2024" +#define LAMMPS_VERSION "19 Nov 2024" From e200d557ec94cc698d6d95d0290aff6d6202420c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 19 Nov 2024 13:16:22 -0500 Subject: [PATCH 293/294] replace static with const for compatibility with SYCL --- src/KOKKOS/fix_cmap_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_cmap_kokkos.cpp b/src/KOKKOS/fix_cmap_kokkos.cpp index 7501338826..dd92afe9cc 100644 --- a/src/KOKKOS/fix_cmap_kokkos.cpp +++ b/src/KOKKOS/fix_cmap_kokkos.cpp @@ -850,7 +850,7 @@ void FixCMAPKokkos::bc_interpol(double x1, double x2, int low1, int // calculate the bicubic interpolation coefficients c_ij - static int wt[16][16] = + const int wt[16][16] = { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, {-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0}, From 53b2930ef41a6c0ad2c2b3d6553ebda457cd59cc Mon Sep 17 00:00:00 2001 From: lichanghao <542103276@qq.com> Date: Tue, 19 Nov 2024 23:50:50 -0500 Subject: [PATCH 294/294] modified dump_image.cpp, line 1100, fixed an indexing bug causing rendering error --- src/dump_image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dump_image.cpp b/src/dump_image.cpp index f8403e7c71..9610ef4d9a 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -1097,7 +1097,7 @@ void DumpImage::create_image() color = colortype[itype]; } - ibonus = body[i]; + ibonus = body[j]; n = bptr->image(ibonus,bodyflag1,bodyflag2,bodyvec,bodyarray); for (k = 0; k < n; k++) { if (bodyvec[k] == SPHERE)