Merge branch 'develop' into collected-small-changes
This commit is contained in:
@ -449,6 +449,8 @@ class PyLammps(object):
|
||||
:type ptr: pointer
|
||||
:param comm: MPI communicator (as provided by `mpi4py <mpi4py_docs_>`_). ``None`` means use ``MPI_COMM_WORLD`` implicitly.
|
||||
:type comm: MPI_Comm
|
||||
:param verbose: print all LAMMPS output to stdout
|
||||
:type verbose: bool
|
||||
|
||||
:ivar lmp: instance of original LAMMPS Python interface
|
||||
:vartype lmp: :py:class:`lammps`
|
||||
@ -457,8 +459,9 @@ class PyLammps(object):
|
||||
:vartype run: list
|
||||
"""
|
||||
|
||||
def __init__(self, name="", cmdargs=None, ptr=None, comm=None):
|
||||
def __init__(self, name="", cmdargs=None, ptr=None, comm=None, verbose=False):
|
||||
self.has_echo = False
|
||||
self.verbose = verbose
|
||||
|
||||
if cmdargs:
|
||||
if '-echo' in cmdargs:
|
||||
@ -869,8 +872,8 @@ class PyLammps(object):
|
||||
if comm:
|
||||
output = self.lmp.comm.bcast(output, root=0)
|
||||
|
||||
if 'verbose' in kwargs and kwargs['verbose']:
|
||||
print(output)
|
||||
if self.verbose or ('verbose' in kwargs and kwargs['verbose']):
|
||||
print(output, end = '')
|
||||
|
||||
lines = output.splitlines()
|
||||
|
||||
|
||||
@ -20,6 +20,8 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
typedef MemoryKokkos MemKK;
|
||||
|
||||
class MemoryKokkos : public Memory {
|
||||
public:
|
||||
MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {}
|
||||
@ -278,46 +280,11 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
|
||||
deallocate first to reduce memory use
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1)
|
||||
template <typename TYPE, typename... Indices>
|
||||
static void realloc_kokkos(TYPE &data, const char *name, Indices... ns)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6);
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)), ns...);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -325,7 +292,7 @@ void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
double memory_usage(TYPE &data)
|
||||
static double memory_usage(TYPE &data)
|
||||
{
|
||||
return data.span() * sizeof(typename TYPE::value_type);
|
||||
}
|
||||
|
||||
@ -104,55 +104,55 @@ void PairPACEKokkos<DeviceType>::grow(int natom, int maxneigh)
|
||||
|
||||
if ((int)A.extent(0) < natom) {
|
||||
|
||||
memoryKK->realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase);
|
||||
MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase);
|
||||
|
||||
memoryKK->realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax);
|
||||
//size is +1 of max to avoid out-of-boundary array access in double-triangular scheme
|
||||
memoryKK->realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1);
|
||||
MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1);
|
||||
|
||||
memoryKK->realloc_kokkos(e_atom, "pace:e_atom", natom);
|
||||
memoryKK->realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
|
||||
memoryKK->realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
|
||||
MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom);
|
||||
MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
|
||||
MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
|
||||
|
||||
memoryKK->realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase);
|
||||
MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase);
|
||||
|
||||
// hard-core repulsion
|
||||
memoryKK->realloc_kokkos(rho_core, "pace:rho_core", natom);
|
||||
memoryKK->realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom);
|
||||
memoryKK->realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom);
|
||||
MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom);
|
||||
MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax);
|
||||
}
|
||||
|
||||
if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) {
|
||||
|
||||
// radial functions
|
||||
memoryKK->realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1);
|
||||
memoryKK->realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1);
|
||||
memoryKK->realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase);
|
||||
memoryKK->realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase);
|
||||
MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1);
|
||||
MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1);
|
||||
MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase);
|
||||
MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase);
|
||||
const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1));
|
||||
memoryKK->realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions);
|
||||
memoryKK->realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions);
|
||||
MemKK::realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions);
|
||||
MemKK::realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions);
|
||||
|
||||
// hard-core repulsion
|
||||
memoryKK->realloc_kokkos(cr, "pace:cr", natom, maxneigh);
|
||||
memoryKK->realloc_kokkos(dcr, "pace:dcr", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh);
|
||||
|
||||
// spherical harmonics
|
||||
memoryKK->realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
|
||||
|
||||
// short neigh list
|
||||
memoryKK->realloc_kokkos(d_ncount, "pace:ncount", natom);
|
||||
memoryKK->realloc_kokkos(d_mu, "pace:mu", natom, maxneigh);
|
||||
memoryKK->realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh);
|
||||
memoryKK->realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh);
|
||||
memoryKK->realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom);
|
||||
MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh);
|
||||
|
||||
memoryKK->realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh);
|
||||
MemKK::realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh);
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,11 +163,11 @@ void PairPACEKokkos<DeviceType>::copy_pertype()
|
||||
{
|
||||
auto basis_set = aceimpl->basis_set;
|
||||
|
||||
memoryKK->realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements);
|
||||
memoryKK->realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements);
|
||||
memoryKK->realloc_kokkos(d_E0vals, "pace:E0vals", nelements);
|
||||
memoryKK->realloc_kokkos(d_ndensity, "pace:ndensity", nelements);
|
||||
memoryKK->realloc_kokkos(d_npoti, "pace:npoti", nelements);
|
||||
MemKK::realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements);
|
||||
MemKK::realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements);
|
||||
MemKK::realloc_kokkos(d_E0vals, "pace:E0vals", nelements);
|
||||
MemKK::realloc_kokkos(d_ndensity, "pace:ndensity", nelements);
|
||||
MemKK::realloc_kokkos(d_npoti, "pace:npoti", nelements);
|
||||
|
||||
auto h_rho_core_cutoff = Kokkos::create_mirror_view(d_rho_core_cutoff);
|
||||
auto h_drho_core_cutoff = Kokkos::create_mirror_view(d_drho_core_cutoff);
|
||||
@ -196,8 +196,8 @@ void PairPACEKokkos<DeviceType>::copy_pertype()
|
||||
Kokkos::deep_copy(d_ndensity, h_ndensity);
|
||||
Kokkos::deep_copy(d_npoti, h_npoti);
|
||||
|
||||
memoryKK->realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax);
|
||||
memoryKK->realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax);
|
||||
MemKK::realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax);
|
||||
MemKK::realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax);
|
||||
|
||||
auto h_wpre = Kokkos::create_mirror_view(d_wpre);
|
||||
auto h_mexp = Kokkos::create_mirror_view(d_mexp);
|
||||
@ -266,7 +266,7 @@ void PairPACEKokkos<DeviceType>::copy_tilde()
|
||||
idx_rho_max = 0;
|
||||
int total_basis_size_max = 0;
|
||||
|
||||
memoryKK->realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements);
|
||||
MemKK::realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements);
|
||||
auto h_idx_rho_count = Kokkos::create_mirror_view(d_idx_rho_count);
|
||||
|
||||
for (int n = 0; n < nelements; n++) {
|
||||
@ -295,14 +295,14 @@ void PairPACEKokkos<DeviceType>::copy_tilde()
|
||||
|
||||
Kokkos::deep_copy(d_idx_rho_count, h_idx_rho_count);
|
||||
|
||||
memoryKK->realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max);
|
||||
memoryKK->realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max);
|
||||
memoryKK->realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max);
|
||||
memoryKK->realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
memoryKK->realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
memoryKK->realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
memoryKK->realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax);
|
||||
memoryKK->realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax);
|
||||
MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max);
|
||||
MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max);
|
||||
MemKK::realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max);
|
||||
MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax);
|
||||
MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax);
|
||||
|
||||
auto h_rank = Kokkos::create_mirror_view(d_rank);
|
||||
auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs);
|
||||
@ -418,10 +418,10 @@ void PairPACEKokkos<DeviceType>::init_style()
|
||||
|
||||
// spherical harmonics
|
||||
|
||||
memoryKK->realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1));
|
||||
memoryKK->realloc_kokkos(cl, "pace:cl", lmax + 1);
|
||||
memoryKK->realloc_kokkos(dl, "pace:dl", lmax + 1);
|
||||
MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1));
|
||||
MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1);
|
||||
MemKK::realloc_kokkos(dl, "pace:dl", lmax + 1);
|
||||
|
||||
pre_compute_harmonics(lmax);
|
||||
copy_pertype();
|
||||
@ -474,12 +474,12 @@ void PairPACEKokkos<DeviceType>::allocate()
|
||||
PairPACE::allocate();
|
||||
|
||||
int n = atom->ntypes + 1;
|
||||
memoryKK->realloc_kokkos(d_map, "pace:map", n);
|
||||
MemKK::realloc_kokkos(d_map, "pace:map", n);
|
||||
|
||||
memoryKK->realloc_kokkos(k_cutsq, "pace:cutsq", n, n);
|
||||
MemKK::realloc_kokkos(k_cutsq, "pace:cutsq", n, n);
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
|
||||
memoryKK->realloc_kokkos(k_scale, "pace:scale", n, n);
|
||||
MemKK::realloc_kokkos(k_scale, "pace:scale", n, n);
|
||||
d_scale = k_scale.template view<DeviceType>();
|
||||
}
|
||||
|
||||
@ -1651,56 +1651,56 @@ double PairPACEKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = 0;
|
||||
|
||||
bytes += memoryKK->memory_usage(A);
|
||||
bytes += memoryKK->memory_usage(A_rank1);
|
||||
bytes += memoryKK->memory_usage(A_list);
|
||||
bytes += memoryKK->memory_usage(A_forward_prod);
|
||||
bytes += memoryKK->memory_usage(e_atom);
|
||||
bytes += memoryKK->memory_usage(rhos);
|
||||
bytes += memoryKK->memory_usage(dF_drho);
|
||||
bytes += memoryKK->memory_usage(weights);
|
||||
bytes += memoryKK->memory_usage(weights_rank1);
|
||||
bytes += memoryKK->memory_usage(rho_core);
|
||||
bytes += memoryKK->memory_usage(dF_drho_core);
|
||||
bytes += memoryKK->memory_usage(dB_flatten);
|
||||
bytes += memoryKK->memory_usage(fr);
|
||||
bytes += memoryKK->memory_usage(dfr);
|
||||
bytes += memoryKK->memory_usage(gr);
|
||||
bytes += memoryKK->memory_usage(dgr);
|
||||
bytes += memoryKK->memory_usage(d_values);
|
||||
bytes += memoryKK->memory_usage(d_derivatives);
|
||||
bytes += memoryKK->memory_usage(cr);
|
||||
bytes += memoryKK->memory_usage(dcr);
|
||||
bytes += memoryKK->memory_usage(plm);
|
||||
bytes += memoryKK->memory_usage(dplm);
|
||||
bytes += memoryKK->memory_usage(ylm);
|
||||
bytes += memoryKK->memory_usage(dylm);
|
||||
bytes += memoryKK->memory_usage(d_ncount);
|
||||
bytes += memoryKK->memory_usage(d_mu);
|
||||
bytes += memoryKK->memory_usage(d_rhats);
|
||||
bytes += memoryKK->memory_usage(d_rnorms);
|
||||
bytes += memoryKK->memory_usage(d_nearest);
|
||||
bytes += memoryKK->memory_usage(f_ij);
|
||||
bytes += memoryKK->memory_usage(d_rho_core_cutoff);
|
||||
bytes += memoryKK->memory_usage(d_drho_core_cutoff);
|
||||
bytes += memoryKK->memory_usage(d_E0vals);
|
||||
bytes += memoryKK->memory_usage(d_ndensity);
|
||||
bytes += memoryKK->memory_usage(d_npoti);
|
||||
bytes += memoryKK->memory_usage(d_wpre);
|
||||
bytes += memoryKK->memory_usage(d_mexp);
|
||||
bytes += memoryKK->memory_usage(d_idx_rho_count);
|
||||
bytes += memoryKK->memory_usage(d_rank);
|
||||
bytes += memoryKK->memory_usage(d_num_ms_combs);
|
||||
bytes += memoryKK->memory_usage(d_offsets);
|
||||
bytes += memoryKK->memory_usage(d_mus);
|
||||
bytes += memoryKK->memory_usage(d_ns);
|
||||
bytes += memoryKK->memory_usage(d_ls);
|
||||
bytes += memoryKK->memory_usage(d_ms_combs);
|
||||
bytes += memoryKK->memory_usage(d_ctildes);
|
||||
bytes += memoryKK->memory_usage(alm);
|
||||
bytes += memoryKK->memory_usage(blm);
|
||||
bytes += memoryKK->memory_usage(cl);
|
||||
bytes += memoryKK->memory_usage(dl);
|
||||
bytes += MemKK::memory_usage(A);
|
||||
bytes += MemKK::memory_usage(A_rank1);
|
||||
bytes += MemKK::memory_usage(A_list);
|
||||
bytes += MemKK::memory_usage(A_forward_prod);
|
||||
bytes += MemKK::memory_usage(e_atom);
|
||||
bytes += MemKK::memory_usage(rhos);
|
||||
bytes += MemKK::memory_usage(dF_drho);
|
||||
bytes += MemKK::memory_usage(weights);
|
||||
bytes += MemKK::memory_usage(weights_rank1);
|
||||
bytes += MemKK::memory_usage(rho_core);
|
||||
bytes += MemKK::memory_usage(dF_drho_core);
|
||||
bytes += MemKK::memory_usage(dB_flatten);
|
||||
bytes += MemKK::memory_usage(fr);
|
||||
bytes += MemKK::memory_usage(dfr);
|
||||
bytes += MemKK::memory_usage(gr);
|
||||
bytes += MemKK::memory_usage(dgr);
|
||||
bytes += MemKK::memory_usage(d_values);
|
||||
bytes += MemKK::memory_usage(d_derivatives);
|
||||
bytes += MemKK::memory_usage(cr);
|
||||
bytes += MemKK::memory_usage(dcr);
|
||||
bytes += MemKK::memory_usage(plm);
|
||||
bytes += MemKK::memory_usage(dplm);
|
||||
bytes += MemKK::memory_usage(ylm);
|
||||
bytes += MemKK::memory_usage(dylm);
|
||||
bytes += MemKK::memory_usage(d_ncount);
|
||||
bytes += MemKK::memory_usage(d_mu);
|
||||
bytes += MemKK::memory_usage(d_rhats);
|
||||
bytes += MemKK::memory_usage(d_rnorms);
|
||||
bytes += MemKK::memory_usage(d_nearest);
|
||||
bytes += MemKK::memory_usage(f_ij);
|
||||
bytes += MemKK::memory_usage(d_rho_core_cutoff);
|
||||
bytes += MemKK::memory_usage(d_drho_core_cutoff);
|
||||
bytes += MemKK::memory_usage(d_E0vals);
|
||||
bytes += MemKK::memory_usage(d_ndensity);
|
||||
bytes += MemKK::memory_usage(d_npoti);
|
||||
bytes += MemKK::memory_usage(d_wpre);
|
||||
bytes += MemKK::memory_usage(d_mexp);
|
||||
bytes += MemKK::memory_usage(d_idx_rho_count);
|
||||
bytes += MemKK::memory_usage(d_rank);
|
||||
bytes += MemKK::memory_usage(d_num_ms_combs);
|
||||
bytes += MemKK::memory_usage(d_offsets);
|
||||
bytes += MemKK::memory_usage(d_mus);
|
||||
bytes += MemKK::memory_usage(d_ns);
|
||||
bytes += MemKK::memory_usage(d_ls);
|
||||
bytes += MemKK::memory_usage(d_ms_combs);
|
||||
bytes += MemKK::memory_usage(d_ctildes);
|
||||
bytes += MemKK::memory_usage(alm);
|
||||
bytes += MemKK::memory_usage(blm);
|
||||
bytes += MemKK::memory_usage(cl);
|
||||
bytes += MemKK::memory_usage(dl);
|
||||
|
||||
if (k_splines_gk.h_view.data()) {
|
||||
for (int i = 0; i < nelements; i++) {
|
||||
|
||||
@ -36,6 +36,7 @@
|
||||
#include "kokkos.h"
|
||||
#include "math_const.h"
|
||||
#include "math_special.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
@ -78,8 +79,8 @@ PairReaxFFKokkos<DeviceType>::PairReaxFFKokkos(LAMMPS *lmp) : PairReaxFF(lmp)
|
||||
k_error_flag = DAT::tdual_int_scalar("pair:error_flag");
|
||||
k_nbuf_local = DAT::tdual_int_scalar("pair:nbuf_local");
|
||||
|
||||
d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",1,2);
|
||||
d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",1,2);
|
||||
MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",1,2);
|
||||
MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",1,2);
|
||||
|
||||
k_count_angular_torsion = DAT::tdual_int_1d("PairReaxFF::count_angular_torsion",2);
|
||||
d_count_angular_torsion = k_count_angular_torsion.template view<DeviceType>();
|
||||
@ -883,17 +884,10 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxBuildListsFull>(0,ignum),*this);
|
||||
|
||||
// allocate duplicated memory
|
||||
if (need_dup) {
|
||||
if (need_dup)
|
||||
dup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_CdDelta);
|
||||
//dup_Cdbo = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbo);
|
||||
//dup_Cdbopi = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi);
|
||||
//dup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi2);
|
||||
} else {
|
||||
else
|
||||
ndup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_CdDelta);
|
||||
//ndup_Cdbo = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbo);
|
||||
//ndup_Cdbopi = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi);
|
||||
//ndup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi2);
|
||||
}
|
||||
|
||||
// reduction over duplicated memory
|
||||
if (need_dup)
|
||||
@ -959,9 +953,9 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
count_torsion = h_count_angular_torsion(1);
|
||||
|
||||
if (count_angular > (int)d_angular_pack.extent(0))
|
||||
d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",(int)(count_angular * 1.1),2);
|
||||
MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",(int)(count_angular * 1.1),2);
|
||||
if (count_torsion > (int)d_torsion_pack.extent(0))
|
||||
d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",(int)(count_torsion * 1.1),2);
|
||||
MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",(int)(count_torsion * 1.1),2);
|
||||
|
||||
// need to zero to re-count
|
||||
h_count_angular_torsion(0) = 0;
|
||||
@ -1033,26 +1027,12 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
if (need_dup) {
|
||||
Kokkos::Experimental::contribute(d_dDeltap_self, dup_dDeltap_self); // needed in ComputeBond2
|
||||
Kokkos::Experimental::contribute(d_CdDelta, dup_CdDelta); // needed in ComputeBond2
|
||||
|
||||
//Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in UpdateBond, but also used in UpdateBond
|
||||
//Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in UpdateBond, but also used in UpdateBond
|
||||
//Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in UpdateBond, but also used in UpdateBond
|
||||
//dup_Cdbo.reset_except(d_Cdbo);
|
||||
//dup_Cdbopi.reset_except(d_Cdbopi);
|
||||
//dup_Cdbopi2.reset_except(d_Cdbopi2);
|
||||
}
|
||||
|
||||
// Bond force
|
||||
if (neighflag == HALF) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxUpdateBond<HALF>>(0,ignum),*this);
|
||||
|
||||
// reduction over duplicated memory
|
||||
//if (need_dup) {
|
||||
// Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
|
||||
// Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
|
||||
// Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
|
||||
//}
|
||||
|
||||
if (vflag_either)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairReaxComputeBond2<HALF,1>>(0,ignum),*this,ev);
|
||||
else
|
||||
@ -1062,13 +1042,6 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxUpdateBond<HALFTHREAD>>(0,ignum),*this);
|
||||
|
||||
// reduction over duplicated memory
|
||||
//if (need_dup) {
|
||||
// Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
|
||||
// Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
|
||||
// Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
|
||||
//}
|
||||
|
||||
if (vflag_either)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairReaxComputeBond2<HALFTHREAD,1>>(0,ignum),*this,ev);
|
||||
else
|
||||
@ -1116,7 +1089,7 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
|
||||
copymode = 0;
|
||||
|
||||
// free duplicated memory
|
||||
// free scatterview memory
|
||||
if (need_dup) {
|
||||
dup_f = decltype(dup_f)();
|
||||
dup_eatom = decltype(dup_eatom)();
|
||||
@ -1124,9 +1097,13 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
dup_dDeltap_self = decltype(dup_dDeltap_self)();
|
||||
dup_total_bo = decltype(dup_total_bo)();
|
||||
dup_CdDelta = decltype(dup_CdDelta)();
|
||||
//dup_Cdbo = decltype(dup_Cdbo)();
|
||||
//dup_Cdbopi = decltype(dup_Cdbopi)();
|
||||
//dup_Cdbopi2 = decltype(dup_Cdbopi2)();
|
||||
} else {
|
||||
ndup_f = decltype(ndup_f)();
|
||||
ndup_eatom = decltype(ndup_eatom)();
|
||||
ndup_vatom = decltype(ndup_vatom)();
|
||||
ndup_dDeltap_self = decltype(ndup_dDeltap_self)();
|
||||
ndup_total_bo = decltype(ndup_total_bo)();
|
||||
ndup_CdDelta = decltype(ndup_CdDelta)();
|
||||
}
|
||||
|
||||
d_neighbors = typename AT::t_neighbors_2d();
|
||||
@ -1512,131 +1489,74 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxComputeTabulatedLJCoulo
|
||||
template<class DeviceType>
|
||||
void PairReaxFFKokkos<DeviceType>::allocate_array()
|
||||
{
|
||||
// deallocate first to reduce memory overhead
|
||||
|
||||
deallocate_array();
|
||||
// free scatterview memory
|
||||
if (need_dup) {
|
||||
dup_dDeltap_self = decltype(dup_dDeltap_self)();
|
||||
dup_total_bo = decltype(dup_total_bo)();
|
||||
dup_CdDelta = decltype(dup_CdDelta)();
|
||||
} else {
|
||||
ndup_dDeltap_self = decltype(ndup_dDeltap_self)();
|
||||
ndup_total_bo = decltype(ndup_total_bo)();
|
||||
ndup_CdDelta = decltype(ndup_CdDelta)();
|
||||
}
|
||||
|
||||
if (cut_hbsq > 0.0) {
|
||||
d_hb_first = typename AT::t_int_1d("reaxff/kk:hb_first",nmax);
|
||||
d_hb_num = typename AT::t_int_1d("reaxff/kk:hb_num",nmax);
|
||||
d_hb_list = typename AT::t_int_1d("reaxff/kk:hb_list",nmax*maxhb);
|
||||
MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax);
|
||||
MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax);
|
||||
MemKK::realloc_kokkos(d_hb_list,"reaxff/kk:hb_list",nmax*maxhb);
|
||||
}
|
||||
d_bo_first = typename AT::t_int_1d("reaxff/kk:bo_first",nmax);
|
||||
d_bo_num = typename AT::t_int_1d("reaxff/kk:bo_num",nmax);
|
||||
d_bo_list = typename AT::t_int_1d("reaxff/kk:bo_list",nmax*maxbo);
|
||||
MemKK::realloc_kokkos(d_bo_first,"reaxff/kk:bo_first",nmax);
|
||||
MemKK::realloc_kokkos(d_bo_num,"reaxff/kk:bo_num",nmax);
|
||||
MemKK::realloc_kokkos(d_bo_list,"reaxff/kk:bo_list",nmax*maxbo);
|
||||
|
||||
d_BO = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo);
|
||||
d_BO_s = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo);
|
||||
d_BO_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi",nmax,maxbo);
|
||||
d_BO_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_BO,"reaxff/kk:BO",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_BO_s,"reaxff/kk:BO",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_BO_pi,"reaxff/kk:BO_pi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_BO_pi2,"reaxff/kk:BO_pi2",nmax,maxbo);
|
||||
|
||||
d_dln_BOp_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi",nmax,maxbo);
|
||||
d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_dln_BOp_pi,"reaxff/kk:d_dln_BOp_pi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_dln_BOp_pi2,"reaxff/kk:d_dln_BOp_pi2",nmax,maxbo);
|
||||
|
||||
d_C1dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbo",nmax,maxbo);
|
||||
d_C2dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbo",nmax,maxbo);
|
||||
d_C3dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbo",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C1dbo,"reaxff/kk:d_C1dbo",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C2dbo,"reaxff/kk:d_C2dbo",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C3dbo,"reaxff/kk:d_C3dbo",nmax,maxbo);
|
||||
|
||||
d_C1dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi",nmax,maxbo);
|
||||
d_C2dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi",nmax,maxbo);
|
||||
d_C3dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi",nmax,maxbo);
|
||||
d_C4dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C1dbopi,"reaxff/kk:d_C1dbopi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C2dbopi,"reaxff/kk:d_C2dbopi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C3dbopi,"reaxff/kk:d_C3dbopi",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C4dbopi,"reaxff/kk:d_C4dbopi",nmax,maxbo);
|
||||
|
||||
d_C1dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi2",nmax,maxbo);
|
||||
d_C2dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi2",nmax,maxbo);
|
||||
d_C3dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi2",nmax,maxbo);
|
||||
d_C4dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C1dbopi2,"reaxff/kk:d_C1dbopi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C2dbopi2,"reaxff/kk:d_C2dbopi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C3dbopi2,"reaxff/kk:d_C3dbopi2",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_C4dbopi2,"reaxff/kk:d_C4dbopi2",nmax,maxbo);
|
||||
|
||||
d_dBOp = typename AT::t_ffloat_2d_dl("reaxff/kk:dBOp",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_dBOp,"reaxff/kk:dBOp",nmax,maxbo);
|
||||
|
||||
d_dDeltap_self = typename AT::t_ffloat_2d_dl("reaxff/kk:dDeltap_self",nmax,3);
|
||||
d_Deltap_boc = typename AT::t_ffloat_1d("reaxff/kk:Deltap_boc",nmax);
|
||||
d_Deltap = typename AT::t_ffloat_1d("reaxff/kk:Deltap",nmax);
|
||||
d_total_bo = typename AT::t_ffloat_1d("reaxff/kk:total_bo",nmax);
|
||||
MemKK::realloc_kokkos(d_dDeltap_self,"reaxff/kk:dDeltap_self",nmax,3);
|
||||
MemKK::realloc_kokkos(d_Deltap_boc,"reaxff/kk:Deltap_boc",nmax);
|
||||
MemKK::realloc_kokkos(d_Deltap,"reaxff/kk:Deltap",nmax);
|
||||
MemKK::realloc_kokkos(d_total_bo,"reaxff/kk:total_bo",nmax);
|
||||
|
||||
d_Cdbo = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbo",nmax,3*maxbo);
|
||||
d_Cdbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi",nmax,3*maxbo);
|
||||
d_Cdbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi2",nmax,3*maxbo);
|
||||
MemKK::realloc_kokkos(d_Cdbo,"reaxff/kk:Cdbo",nmax,3*maxbo);
|
||||
MemKK::realloc_kokkos(d_Cdbopi,"reaxff/kk:Cdbopi",nmax,3*maxbo);
|
||||
MemKK::realloc_kokkos(d_Cdbopi2,"reaxff/kk:Cdbopi2",nmax,3*maxbo);
|
||||
|
||||
d_Delta = typename AT::t_ffloat_1d("reaxff/kk:Delta",nmax);
|
||||
d_Delta_boc = typename AT::t_ffloat_1d("reaxff/kk:Delta_boc",nmax);
|
||||
d_dDelta_lp = typename AT::t_ffloat_1d("reaxff/kk:dDelta_lp",nmax);
|
||||
d_Delta_lp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp",nmax);
|
||||
d_Delta_lp_temp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp_temp",nmax);
|
||||
d_CdDelta = typename AT::t_ffloat_1d("reaxff/kk:CdDelta",nmax);
|
||||
d_sum_ovun = typename AT::t_ffloat_2d_dl("reaxff/kk:sum_ovun",nmax,3);
|
||||
MemKK::realloc_kokkos(d_Delta,"reaxff/kk:Delta",nmax);
|
||||
MemKK::realloc_kokkos(d_Delta_boc,"reaxff/kk:Delta_boc",nmax);
|
||||
MemKK::realloc_kokkos(d_dDelta_lp,"reaxff/kk:dDelta_lp",nmax);
|
||||
MemKK::realloc_kokkos(d_Delta_lp,"reaxff/kk:Delta_lp",nmax);
|
||||
MemKK::realloc_kokkos(d_Delta_lp_temp,"reaxff/kk:Delta_lp_temp",nmax);
|
||||
MemKK::realloc_kokkos(d_CdDelta,"reaxff/kk:CdDelta",nmax);
|
||||
MemKK::realloc_kokkos(d_sum_ovun,"reaxff/kk:sum_ovun",nmax,3);
|
||||
|
||||
// FixReaxFFBonds
|
||||
d_abo = typename AT::t_ffloat_2d("reaxff/kk:abo",nmax,maxbo);
|
||||
d_neighid = typename AT::t_tagint_2d("reaxff/kk:neighid",nmax,maxbo);
|
||||
d_numneigh_bonds = typename AT::t_int_1d("reaxff/kk:numneigh_bonds",nmax);
|
||||
MemKK::realloc_kokkos(d_abo,"reaxff/kk:abo",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_neighid,"reaxff/kk:neighid",nmax,maxbo);
|
||||
MemKK::realloc_kokkos(d_numneigh_bonds,"reaxff/kk:numneigh_bonds",nmax);
|
||||
|
||||
// ComputeAngular intermediates
|
||||
d_angular_intermediates = typename AT::t_ffloat_2d("reaxff/kk:angular_intermediates",nmax,4);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairReaxFFKokkos<DeviceType>::deallocate_array()
|
||||
{
|
||||
if (cut_hbsq > 0.0) {
|
||||
d_hb_first = typename AT::t_int_1d();
|
||||
d_hb_num = typename AT::t_int_1d();
|
||||
d_hb_list = typename AT::t_int_1d();
|
||||
}
|
||||
d_bo_first = typename AT::t_int_1d();
|
||||
d_bo_num = typename AT::t_int_1d();
|
||||
d_bo_list = typename AT::t_int_1d();
|
||||
|
||||
d_BO = typename AT::t_ffloat_2d_dl();
|
||||
d_BO_s = typename AT::t_ffloat_2d_dl();
|
||||
d_BO_pi = typename AT::t_ffloat_2d_dl();
|
||||
d_BO_pi2 = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_dln_BOp_pi = typename AT::t_ffloat_2d_dl();
|
||||
d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_C1dbo = typename AT::t_ffloat_2d_dl();
|
||||
d_C2dbo = typename AT::t_ffloat_2d_dl();
|
||||
d_C3dbo = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_C1dbopi = typename AT::t_ffloat_2d_dl();
|
||||
d_C2dbopi = typename AT::t_ffloat_2d_dl();
|
||||
d_C3dbopi = typename AT::t_ffloat_2d_dl();
|
||||
d_C4dbopi = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_C1dbopi2 = typename AT::t_ffloat_2d_dl();
|
||||
d_C2dbopi2 = typename AT::t_ffloat_2d_dl();
|
||||
d_C3dbopi2 = typename AT::t_ffloat_2d_dl();
|
||||
d_C4dbopi2 = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_dBOp = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_dDeltap_self = typename AT::t_ffloat_2d_dl();
|
||||
d_Deltap_boc = typename AT::t_ffloat_1d();
|
||||
d_Deltap = typename AT::t_ffloat_1d();
|
||||
d_total_bo = typename AT::t_ffloat_1d();
|
||||
|
||||
d_Cdbo = typename AT::t_ffloat_2d_dl();
|
||||
d_Cdbopi = typename AT::t_ffloat_2d_dl();
|
||||
d_Cdbopi2 = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
d_Delta = typename AT::t_ffloat_1d();
|
||||
d_Delta_boc = typename AT::t_ffloat_1d();
|
||||
d_dDelta_lp = typename AT::t_ffloat_1d();
|
||||
d_Delta_lp = typename AT::t_ffloat_1d();
|
||||
d_Delta_lp_temp = typename AT::t_ffloat_1d();
|
||||
d_CdDelta = typename AT::t_ffloat_1d();
|
||||
d_sum_ovun = typename AT::t_ffloat_2d_dl();
|
||||
|
||||
// FixReaxFFBonds
|
||||
d_abo = typename AT::t_ffloat_2d();
|
||||
d_neighid = typename AT::t_tagint_2d();
|
||||
d_numneigh_bonds = typename AT::t_int_1d();
|
||||
|
||||
// ComputeAngular intermediates
|
||||
d_angular_intermediates = typename AT::t_ffloat_2d();
|
||||
MemKK::realloc_kokkos(d_angular_intermediates,"reaxff/kk:angular_intermediates",nmax,4);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -3549,9 +3469,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxUpdateBond<NEIGHFLAG>,
|
||||
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbo = d_Cdbo;
|
||||
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbopi = d_Cdbopi;
|
||||
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbopi2 = d_Cdbopi2;
|
||||
//auto a_Cdbo = dup_Cdbo.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
//auto a_Cdbopi = dup_Cdbopi.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
//auto a_Cdbopi2 = dup_Cdbopi2.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
const tagint itag = tag(i);
|
||||
|
||||
@ -384,7 +384,6 @@ class PairReaxFFKokkos : public PairReaxFF {
|
||||
protected:
|
||||
void allocate();
|
||||
void allocate_array();
|
||||
void deallocate_array();
|
||||
void setup();
|
||||
void init_md();
|
||||
int Init_Lookup_Tables();
|
||||
@ -436,6 +435,8 @@ class PairReaxFFKokkos : public PairReaxFF {
|
||||
typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2;
|
||||
typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self;
|
||||
|
||||
int need_dup;
|
||||
|
||||
using KKDeviceType = typename KKDevice<DeviceType>::value;
|
||||
|
||||
template<typename DataType, typename Layout>
|
||||
@ -444,27 +445,19 @@ class PairReaxFFKokkos : public PairReaxFF {
|
||||
template<typename DataType, typename Layout>
|
||||
using NonDupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
|
||||
|
||||
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_total_bo;
|
||||
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_CdDelta;
|
||||
DupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> dup_eatom;
|
||||
DupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> dup_f;
|
||||
DupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> dup_eatom;
|
||||
DupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> dup_vatom;
|
||||
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_dDeltap_self;
|
||||
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbo;
|
||||
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbopi;
|
||||
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbopi2;
|
||||
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_total_bo;
|
||||
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_CdDelta;
|
||||
|
||||
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_total_bo;
|
||||
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_CdDelta;
|
||||
NonDupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> ndup_eatom;
|
||||
NonDupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> ndup_f;
|
||||
NonDupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> ndup_eatom;
|
||||
NonDupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> ndup_vatom;
|
||||
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_dDeltap_self;
|
||||
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbo;
|
||||
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbopi;
|
||||
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbopi2;
|
||||
|
||||
int need_dup;
|
||||
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_total_bo;
|
||||
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_CdDelta;
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**[7],typename DeviceType::array_layout,DeviceType> tdual_ffloat_2d_n7;
|
||||
typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
|
||||
|
||||
@ -238,44 +238,14 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
typedef Kokkos::View<F_FLOAT**> t_bvec;
|
||||
t_bvec bvec;
|
||||
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
||||
t_dbvec dbvec;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
|
||||
|
||||
int inum,max_neighs,chunk_size,chunk_offset;
|
||||
int host_flag;
|
||||
int host_flag,neighflag;
|
||||
|
||||
int eflag,vflag;
|
||||
|
||||
void allocate() override;
|
||||
//void read_files(char *, char *);
|
||||
/*template<class DeviceType>
|
||||
inline int equal(double* x,double* y);
|
||||
template<class DeviceType>
|
||||
inline double dist2(double* x,double* y);
|
||||
double extra_cutoff();
|
||||
void load_balance();
|
||||
void set_sna_to_shared(int snaid,int i);
|
||||
void build_per_atom_arrays();*/
|
||||
|
||||
int neighflag;
|
||||
|
||||
Kokkos::View<T_INT*, DeviceType> ilistmast;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostilist;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostnumneigh;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostneighs;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostfirstneigh;
|
||||
|
||||
Kokkos::View<T_INT**, Kokkos::LayoutRight, DeviceType> i_pairs;
|
||||
Kokkos::View<T_INT***, Kokkos::LayoutRight, DeviceType> i_rij;
|
||||
Kokkos::View<T_INT**, Kokkos::LayoutRight, DeviceType> i_inside;
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> i_wj;
|
||||
Kokkos::View<F_FLOAT***, Kokkos::LayoutRight, DeviceType>i_rcutij;
|
||||
Kokkos::View<T_INT*, DeviceType> i_ninside;
|
||||
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
||||
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
||||
|
||||
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
|
||||
@ -286,7 +256,6 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||
tdual_fparams k_cutsq;
|
||||
|
||||
@ -206,10 +206,10 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in,
|
||||
|
||||
if (beta_max < inum) {
|
||||
beta_max = inum;
|
||||
d_beta = Kokkos::View<real_type**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
|
||||
MemKK::realloc_kokkos(d_beta,"PairSNAPKokkos:beta",ncoeff,inum);
|
||||
if (!host_flag)
|
||||
d_beta_pack = Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
||||
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
||||
MemKK::realloc_kokkos(d_beta_pack,"PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
||||
MemKK::realloc_kokkos(d_ninside,"PairSNAPKokkos:ninside",inum);
|
||||
}
|
||||
|
||||
chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user
|
||||
@ -545,7 +545,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::allocate()
|
||||
PairSNAP::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
d_map = Kokkos::View<T_INT*, DeviceType>("PairSNAPKokkos::map",n+1);
|
||||
MemKK::realloc_kokkos(d_map,"PairSNAPKokkos::map",n+1);
|
||||
}
|
||||
|
||||
|
||||
@ -574,11 +574,11 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::coeff(int narg, char
|
||||
|
||||
// Set up element lists
|
||||
|
||||
d_radelem = Kokkos::View<real_type*, DeviceType>("pair:radelem",nelements);
|
||||
d_wjelem = Kokkos::View<real_type*, DeviceType>("pair:wjelem",nelements);
|
||||
d_coeffelem = Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
||||
d_sinnerelem = Kokkos::View<real_type*, DeviceType>("pair:sinnerelem",nelements);
|
||||
d_dinnerelem = Kokkos::View<real_type*, DeviceType>("pair:dinnerelem",nelements);
|
||||
MemKK::realloc_kokkos(d_radelem,"pair:radelem",nelements);
|
||||
MemKK::realloc_kokkos(d_wjelem,"pair:wjelem",nelements);
|
||||
MemKK::realloc_kokkos(d_coeffelem,"pair:coeffelem",nelements,ncoeffall);
|
||||
MemKK::realloc_kokkos(d_sinnerelem,"pair:sinnerelem",nelements);
|
||||
MemKK::realloc_kokkos(d_dinnerelem,"pair:dinnerelem",nelements);
|
||||
|
||||
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
||||
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
|
||||
@ -1411,12 +1411,16 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
double PairSNAPKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
int n = atom->ntypes+1;
|
||||
bytes += n*n*sizeof(int);
|
||||
bytes += n*n*sizeof(real_type);
|
||||
bytes += (2*ncoeffall)*sizeof(real_type);
|
||||
bytes += (ncoeff*3)*sizeof(real_type);
|
||||
bytes += snaKK.memory_usage();
|
||||
bytes += MemKK::memory_usage(d_beta);
|
||||
if (!host_flag)
|
||||
bytes += MemKK::memory_usage(d_beta_pack);
|
||||
bytes += MemKK::memory_usage(d_ninside);
|
||||
bytes += MemKK::memory_usage(d_map);
|
||||
bytes += MemKK::memory_usage(d_radelem);
|
||||
bytes += MemKK::memory_usage(d_wjelem);
|
||||
bytes += MemKK::memory_usage(d_coeffelem);
|
||||
bytes += MemKK::memory_usage(d_sinnerelem);
|
||||
bytes += MemKK::memory_usage(d_dinnerelem);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "sna_kokkos.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
@ -62,12 +63,12 @@ SNAKokkos<DeviceType, real_type, vector_length>::SNAKokkos(real_type rfac0_in,
|
||||
build_indexlist();
|
||||
|
||||
int jdimpq = twojmax + 2;
|
||||
rootpqarray = t_sna_2d("SNAKokkos::rootpqarray",jdimpq,jdimpq);
|
||||
MemKK::realloc_kokkos(rootpqarray,"SNAKokkos::rootpqarray",jdimpq,jdimpq);
|
||||
|
||||
cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max);
|
||||
MemKK::realloc_kokkos(cglist,"SNAKokkos::cglist",idxcg_max);
|
||||
|
||||
if (bzero_flag) {
|
||||
bzero = Kokkos::View<real_type*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
|
||||
MemKK::realloc_kokkos(bzero,"sna:bzero",twojmax+1);
|
||||
auto h_bzero = Kokkos::create_mirror_view(bzero);
|
||||
|
||||
double www = wself*wself*wself;
|
||||
@ -95,7 +96,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
// index list for cglist
|
||||
|
||||
int jdim = twojmax + 1;
|
||||
idxcg_block = Kokkos::View<int***, DeviceType>(Kokkos::NoInit("SNAKokkos::idxcg_block"),jdim,jdim,jdim);
|
||||
MemKK::realloc_kokkos(idxcg_block,"SNAKokkos::idxcg_block",jdim,jdim,jdim);
|
||||
auto h_idxcg_block = Kokkos::create_mirror_view(idxcg_block);
|
||||
|
||||
int idxcg_count = 0;
|
||||
@ -113,7 +114,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
// index list for uarray
|
||||
// need to include both halves
|
||||
|
||||
idxu_block = Kokkos::View<int*, DeviceType>(Kokkos::NoInit("SNAKokkos::idxu_block"),jdim);
|
||||
MemKK::realloc_kokkos(idxu_block,"SNAKokkos::idxu_block",jdim);
|
||||
auto h_idxu_block = Kokkos::create_mirror_view(idxu_block);
|
||||
|
||||
int idxu_count = 0;
|
||||
@ -128,7 +129,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
Kokkos::deep_copy(idxu_block,h_idxu_block);
|
||||
|
||||
// index list for half uarray
|
||||
idxu_half_block = Kokkos::View<int*, DeviceType>(Kokkos::NoInit("SNAKokkos::idxu_half_block"),jdim);
|
||||
MemKK::realloc_kokkos(idxu_half_block,"SNAKokkos::idxu_half_block",jdim);
|
||||
auto h_idxu_half_block = Kokkos::create_mirror_view(idxu_half_block);
|
||||
|
||||
int idxu_half_count = 0;
|
||||
@ -142,7 +143,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
Kokkos::deep_copy(idxu_half_block, h_idxu_half_block);
|
||||
|
||||
// mapping between full and half indexing, encoding flipping
|
||||
idxu_full_half = Kokkos::View<FullHalfMapper*, DeviceType>(Kokkos::NoInit("SNAKokkos::idxu_full_half"),idxu_max);
|
||||
MemKK::realloc_kokkos(idxu_full_half,"SNAKokkos::idxu_full_half",idxu_max);
|
||||
auto h_idxu_full_half = Kokkos::create_mirror_view(idxu_full_half);
|
||||
|
||||
idxu_count = 0;
|
||||
@ -169,7 +170,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
// index list for "cache" uarray
|
||||
// this is the GPU scratch memory requirements
|
||||
// applied the CPU structures
|
||||
idxu_cache_block = Kokkos::View<int*, DeviceType>(Kokkos::NoInit("SNAKokkos::idxu_cache_block"),jdim);
|
||||
MemKK::realloc_kokkos(idxu_cache_block,"SNAKokkos::idxu_cache_block",jdim);
|
||||
auto h_idxu_cache_block = Kokkos::create_mirror_view(idxu_cache_block);
|
||||
|
||||
int idxu_cache_count = 0;
|
||||
@ -191,7 +192,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
if (j >= j1) idxb_count++;
|
||||
|
||||
idxb_max = idxb_count;
|
||||
idxb = Kokkos::View<int*[3], DeviceType>(Kokkos::NoInit("SNAKokkos::idxb"),idxb_max);
|
||||
MemKK::realloc_kokkos(idxb,"SNAKokkos::idxb",idxb_max);
|
||||
auto h_idxb = Kokkos::create_mirror_view(idxb);
|
||||
|
||||
idxb_count = 0;
|
||||
@ -208,7 +209,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
|
||||
// reverse index list for beta and b
|
||||
|
||||
idxb_block = Kokkos::View<int***, DeviceType>(Kokkos::NoInit("SNAKokkos::idxb_block"),jdim,jdim,jdim);
|
||||
MemKK::realloc_kokkos(idxb_block,"SNAKokkos::idxb_block",jdim,jdim,jdim);
|
||||
auto h_idxb_block = Kokkos::create_mirror_view(idxb_block);
|
||||
|
||||
idxb_count = 0;
|
||||
@ -234,10 +235,10 @@ void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||
idxz_count++;
|
||||
|
||||
idxz_max = idxz_count;
|
||||
idxz = Kokkos::View<int*[10], DeviceType>(Kokkos::NoInit("SNAKokkos::idxz"),idxz_max);
|
||||
MemKK::realloc_kokkos(idxz,"SNAKokkos::idxz",idxz_max);
|
||||
auto h_idxz = Kokkos::create_mirror_view(idxz);
|
||||
|
||||
idxz_block = Kokkos::View<int***, DeviceType>(Kokkos::NoInit("SNAKokkos::idxz_block"), jdim,jdim,jdim);
|
||||
MemKK::realloc_kokkos(idxz_block,"SNAKokkos::idxz_block", jdim,jdim,jdim);
|
||||
auto h_idxz_block = Kokkos::create_mirror_view(idxz_block);
|
||||
|
||||
idxz_count = 0;
|
||||
@ -294,59 +295,59 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
|
||||
natom = newnatom;
|
||||
nmax = newnmax;
|
||||
|
||||
rij = t_sna_3d(Kokkos::NoInit("sna:rij"),natom,nmax,3);
|
||||
wj = t_sna_2d(Kokkos::NoInit("sna:wj"),natom,nmax);
|
||||
rcutij = t_sna_2d(Kokkos::NoInit("sna:rcutij"),natom,nmax);
|
||||
sinnerij = t_sna_2d(Kokkos::NoInit("sna:sinnerij"),natom,nmax);
|
||||
dinnerij = t_sna_2d(Kokkos::NoInit("sna:dinnerij"),natom,nmax);
|
||||
inside = t_sna_2i(Kokkos::NoInit("sna:inside"),natom,nmax);
|
||||
element = t_sna_2i(Kokkos::NoInit("sna:element"),natom,nmax);
|
||||
dedr = t_sna_3d(Kokkos::NoInit("sna:dedr"),natom,nmax,3);
|
||||
MemKK::realloc_kokkos(rij,"sna:rij",natom,nmax,3);
|
||||
MemKK::realloc_kokkos(wj,"sna:wj",natom,nmax);
|
||||
MemKK::realloc_kokkos(rcutij,"sna:rcutij",natom,nmax);
|
||||
MemKK::realloc_kokkos(sinnerij,"sna:sinnerij",natom,nmax);
|
||||
MemKK::realloc_kokkos(dinnerij,"sna:dinnerij",natom,nmax);
|
||||
MemKK::realloc_kokkos(inside,"sna:inside",natom,nmax);
|
||||
MemKK::realloc_kokkos(element,"sna:element",natom,nmax);
|
||||
MemKK::realloc_kokkos(dedr,"sna:dedr",natom,nmax,3);
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (!host_flag) {
|
||||
const int natom_div = (natom + vector_length - 1) / vector_length;
|
||||
|
||||
a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),vector_length,nmax,natom_div);
|
||||
b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),vector_length,nmax,natom_div);
|
||||
da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),vector_length,nmax,natom_div,3);
|
||||
db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),vector_length,nmax,natom_div,3);
|
||||
sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),vector_length,nmax,natom_div,4);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); // dummy allocation
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1);
|
||||
ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re_pack"),vector_length,idxu_half_max,nelements,natom_div);
|
||||
ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im_pack"),vector_length,idxu_half_max,nelements,natom_div);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),vector_length,idxu_max,nelements,natom_div);
|
||||
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),1,1,1);
|
||||
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),1,1,1);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),vector_length,idxz_max,ndoubles,natom_div);
|
||||
blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),vector_length,idxb_max,ntriples,natom_div);
|
||||
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),1,1,1);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),vector_length,idxu_half_max,nelements,natom_div);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),vector_length,idxu_half_max,nelements,natom_div);
|
||||
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),1,1,1);
|
||||
MemKK::realloc_kokkos(a_pack,"sna:a_pack",vector_length,nmax,natom_div);
|
||||
MemKK::realloc_kokkos(b_pack,"sna:b_pack",vector_length,nmax,natom_div);
|
||||
MemKK::realloc_kokkos(da_pack,"sna:da_pack",vector_length,nmax,natom_div,3);
|
||||
MemKK::realloc_kokkos(db_pack,"sna:db_pack",vector_length,nmax,natom_div,3);
|
||||
MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",vector_length,nmax,natom_div,4);
|
||||
MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",1,1,1); // dummy allocation
|
||||
MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot",1,1,1);
|
||||
MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re_pack",vector_length,idxu_half_max,nelements,natom_div);
|
||||
MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im_pack",vector_length,idxu_half_max,nelements,natom_div);
|
||||
MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",vector_length,idxu_max,nelements,natom_div);
|
||||
MemKK::realloc_kokkos(ulist,"sna:ulist",1,1,1);
|
||||
MemKK::realloc_kokkos(zlist,"sna:zlist",1,1,1);
|
||||
MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",vector_length,idxz_max,ndoubles,natom_div);
|
||||
MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max);
|
||||
MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",vector_length,idxb_max,ntriples,natom_div);
|
||||
MemKK::realloc_kokkos(ylist,"sna:ylist",1,1,1);
|
||||
MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",vector_length,idxu_half_max,nelements,natom_div);
|
||||
MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",vector_length,idxu_half_max,nelements,natom_div);
|
||||
MemKK::realloc_kokkos(dulist,"sna:dulist",1,1,1);
|
||||
} else {
|
||||
#endif
|
||||
a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),1,1,1);
|
||||
b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),1,1,1);
|
||||
da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),1,1,1,1);
|
||||
db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),1,1,1,1);
|
||||
sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),1,1,1,1);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),idxu_half_max,nelements,natom);
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot_full"),idxu_max,nelements,natom);
|
||||
ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re"),1,1,1,1);
|
||||
ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im"),1,1,1,1);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),1,1,1,1);
|
||||
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),idxu_cache_max,natom,nmax);
|
||||
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),idxz_max,ndoubles,natom);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),1,1,1,1);
|
||||
blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),1,1,1,1);
|
||||
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),1,1,1,1);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),1,1,1,1);
|
||||
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),idxu_cache_max,natom,nmax);
|
||||
MemKK::realloc_kokkos(a_pack,"sna:a_pack",1,1,1);
|
||||
MemKK::realloc_kokkos(b_pack,"sna:b_pack",1,1,1);
|
||||
MemKK::realloc_kokkos(da_pack,"sna:da_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(db_pack,"sna:db_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",idxu_half_max,nelements,natom);
|
||||
MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot_full",idxu_max,nelements,natom);
|
||||
MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ulist,"sna:ulist",idxu_cache_max,natom,nmax);
|
||||
MemKK::realloc_kokkos(zlist,"sna:zlist",idxz_max,ndoubles,natom);
|
||||
MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max);
|
||||
MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ylist,"sna:ylist",idxu_half_max,nelements,natom);
|
||||
MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",1,1,1,1);
|
||||
MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",1,1,1,1);
|
||||
MemKK::realloc_kokkos(dulist,"sna:dulist",idxu_cache_max,natom,nmax);
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
}
|
||||
@ -2356,74 +2357,68 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
{
|
||||
int jdimpq = twojmax + 2;
|
||||
int jdim = twojmax + 1;
|
||||
double bytes;
|
||||
double bytes = 0;
|
||||
|
||||
bytes = 0;
|
||||
|
||||
bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray
|
||||
bytes += idxcg_max * sizeof(real_type); // cglist
|
||||
bytes += MemKK::memory_usage(rootpqarray);
|
||||
bytes += MemKK::memory_usage(cglist);
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (!host_flag) {
|
||||
|
||||
auto natom_pad = (natom+vector_length-1)/vector_length;
|
||||
|
||||
bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack
|
||||
bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack
|
||||
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack
|
||||
bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack
|
||||
bytes += MemKK::memory_usage(a_pack);
|
||||
bytes += MemKK::memory_usage(b_pack);
|
||||
bytes += MemKK::memory_usage(da_pack);
|
||||
bytes += MemKK::memory_usage(db_pack);
|
||||
bytes += MemKK::memory_usage(sfac_pack);
|
||||
|
||||
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack
|
||||
bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack
|
||||
bytes += MemKK::memory_usage(ulisttot_re_pack);
|
||||
bytes += MemKK::memory_usage(ulisttot_im_pack);
|
||||
bytes += MemKK::memory_usage(ulisttot_pack);
|
||||
|
||||
bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack
|
||||
bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack
|
||||
bytes += MemKK::memory_usage(zlist_pack);
|
||||
bytes += MemKK::memory_usage(blist_pack);
|
||||
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re
|
||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im
|
||||
bytes += MemKK::memory_usage(ylist_pack_re);
|
||||
bytes += MemKK::memory_usage(ylist_pack_im);
|
||||
} else {
|
||||
#endif
|
||||
|
||||
bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot
|
||||
bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full
|
||||
bytes += MemKK::memory_usage(ulist);
|
||||
bytes += MemKK::memory_usage(ulisttot);
|
||||
bytes += MemKK::memory_usage(ulisttot_full);
|
||||
|
||||
bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist
|
||||
bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist
|
||||
bytes += MemKK::memory_usage(zlist);
|
||||
bytes += MemKK::memory_usage(blist);
|
||||
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist
|
||||
bytes += MemKK::memory_usage(ylist);
|
||||
|
||||
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist
|
||||
bytes += MemKK::memory_usage(dulist);
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
}
|
||||
#endif
|
||||
|
||||
bytes += natom * nmax * 3 * sizeof(real_type); // dedr
|
||||
bytes += MemKK::memory_usage(dedr);
|
||||
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
|
||||
bytes += jdim * sizeof(int); // idxu_block
|
||||
bytes += jdim * sizeof(int); // idxu_half_block
|
||||
bytes += idxu_max * sizeof(FullHalfMapper); // idxu_full_half
|
||||
bytes += jdim * sizeof(int); // idxu_cache_block
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxz_block
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxb_block
|
||||
bytes += MemKK::memory_usage(idxcg_block);
|
||||
bytes += MemKK::memory_usage(idxu_block);
|
||||
bytes += MemKK::memory_usage(idxu_half_block);
|
||||
bytes += MemKK::memory_usage(idxu_full_half);
|
||||
bytes += MemKK::memory_usage(idxu_cache_block);
|
||||
bytes += MemKK::memory_usage(idxz_block);
|
||||
bytes += MemKK::memory_usage(idxb_block);
|
||||
|
||||
bytes += idxz_max * 10 * sizeof(int); // idxz
|
||||
bytes += idxb_max * 3 * sizeof(int); // idxb
|
||||
bytes += MemKK::memory_usage(idxz);
|
||||
bytes += MemKK::memory_usage(idxb);
|
||||
|
||||
bytes += jdim * sizeof(real_type); // bzero
|
||||
bytes += MemKK::memory_usage(bzero);
|
||||
|
||||
bytes += natom * nmax * 3 * sizeof(real_type); // rij
|
||||
bytes += natom * nmax * sizeof(real_type); // inside
|
||||
bytes += natom * nmax * sizeof(real_type); // wj
|
||||
bytes += natom * nmax * sizeof(real_type); // rcutij
|
||||
bytes += natom * nmax * sizeof(real_type); // sinnerij
|
||||
bytes += natom * nmax * sizeof(real_type); // dinnerij
|
||||
bytes += MemKK::memory_usage(rij);
|
||||
bytes += MemKK::memory_usage(inside);
|
||||
bytes += MemKK::memory_usage(wj);
|
||||
bytes += MemKK::memory_usage(rcutij);
|
||||
bytes += MemKK::memory_usage(sinnerij);
|
||||
bytes += MemKK::memory_usage(dinnerij);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -320,7 +320,7 @@ class PPPMDisp : public KSpace {
|
||||
void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &, int,
|
||||
FFT_SCALAR **, FFT_SCALAR **);
|
||||
void compute_rho_coeff(FFT_SCALAR **, FFT_SCALAR **, int);
|
||||
void slabcorr(int);
|
||||
virtual void slabcorr(int);
|
||||
|
||||
// grid communication
|
||||
|
||||
|
||||
@ -486,6 +486,85 @@ void PPPMDispTIP4P::fieldforce_c_peratom()
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#define SMALL 0.00001
|
||||
|
||||
void PPPMDispTIP4P::slabcorr(int /*eflag*/)
|
||||
{
|
||||
// compute local contribution to global dipole moment
|
||||
|
||||
double *q = atom->q;
|
||||
double **x = atom->x;
|
||||
double zprd = domain->zprd;
|
||||
int nlocal = atom->nlocal;
|
||||
int *type = atom->type;
|
||||
double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site
|
||||
|
||||
// sum local contributions to get global dipole moment
|
||||
double dipole = 0.0;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
xi = xM;
|
||||
} else xi = x[i];
|
||||
dipole += q[i]*xi[2];
|
||||
}
|
||||
|
||||
double dipole_all;
|
||||
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
||||
// need to make non-neutral systems and/or
|
||||
// per-atom energy translationally invariant
|
||||
|
||||
double dipole_r2 = 0.0;
|
||||
if (eflag_atom || fabs(qsum) > SMALL) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
dipole_r2 += q[i]*x[i][2]*x[i][2];
|
||||
|
||||
// sum local contributions
|
||||
|
||||
double tmp;
|
||||
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
dipole_r2 = tmp;
|
||||
}
|
||||
|
||||
// compute corrections
|
||||
|
||||
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
|
||||
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
|
||||
const double qscale = force->qqrd2e * scale;
|
||||
|
||||
if (eflag_global) energy_1 += qscale * e_slabcorr;
|
||||
|
||||
// per-atom energy
|
||||
|
||||
if (eflag_atom) {
|
||||
double efact = qscale * MY_2PI/volume;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
|
||||
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
|
||||
}
|
||||
|
||||
// add on force corrections
|
||||
|
||||
double ffact = qscale * (-4.0*MY_PI/volume);
|
||||
double **f = atom->f;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]);
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
f[i][2] += fzi_corr*(1 - alpha);
|
||||
f[iH1][2] += 0.5*alpha*fzi_corr;
|
||||
f[iH2][2] += 0.5*alpha*fzi_corr;
|
||||
}
|
||||
else f[i][2] += fzi_corr;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
find 2 H atoms bonded to O atom i
|
||||
compute position xM of fictitious charge site for O atom
|
||||
|
||||
@ -37,6 +37,7 @@ class PPPMDispTIP4P : public PPPMDisp {
|
||||
void fieldforce_c_ik() override;
|
||||
void fieldforce_c_ad() override;
|
||||
void fieldforce_c_peratom() override;
|
||||
void slabcorr(int) override;
|
||||
|
||||
private:
|
||||
void find_M(int, int &, int &, double *);
|
||||
|
||||
@ -477,6 +477,86 @@ void PPPMTIP4P::fieldforce_peratom()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#define SMALL 0.00001
|
||||
|
||||
void PPPMTIP4P::slabcorr()
|
||||
{
|
||||
// compute local contribution to global dipole moment
|
||||
|
||||
double *q = atom->q;
|
||||
double **x = atom->x;
|
||||
double zprd = domain->zprd;
|
||||
int nlocal = atom->nlocal;
|
||||
int *type = atom->type;
|
||||
double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site
|
||||
|
||||
// sum local contributions to get global dipole moment
|
||||
double dipole = 0.0;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
xi = xM;
|
||||
} else xi = x[i];
|
||||
dipole += q[i]*xi[2];
|
||||
}
|
||||
|
||||
double dipole_all;
|
||||
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
||||
// need to make non-neutral systems and/or
|
||||
// per-atom energy translationally invariant
|
||||
|
||||
double dipole_r2 = 0.0;
|
||||
if (eflag_atom || fabs(qsum) > SMALL) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
dipole_r2 += q[i]*x[i][2]*x[i][2];
|
||||
|
||||
// sum local contributions
|
||||
|
||||
double tmp;
|
||||
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
dipole_r2 = tmp;
|
||||
}
|
||||
|
||||
// compute corrections
|
||||
|
||||
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
|
||||
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
|
||||
const double qscale = force->qqrd2e * scale;
|
||||
|
||||
if (eflag_global) energy_1 += qscale * e_slabcorr;
|
||||
|
||||
// per-atom energy
|
||||
|
||||
if (eflag_atom) {
|
||||
double efact = qscale * MY_2PI/volume;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
|
||||
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
|
||||
}
|
||||
|
||||
// add on force corrections
|
||||
|
||||
double ffact = qscale * (-4.0*MY_PI/volume);
|
||||
double **f = atom->f;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]);
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
f[i][2] += fzi_corr*(1 - alpha);
|
||||
f[iH1][2] += 0.5*alpha*fzi_corr;
|
||||
f[iH2][2] += 0.5*alpha*fzi_corr;
|
||||
}
|
||||
else f[i][2] += fzi_corr;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
find 2 H atoms bonded to O atom i
|
||||
compute position xM of fictitious charge site for O atom
|
||||
|
||||
@ -35,6 +35,7 @@ class PPPMTIP4P : public PPPM {
|
||||
void fieldforce_ik() override;
|
||||
void fieldforce_ad() override;
|
||||
void fieldforce_peratom() override;
|
||||
void slabcorr() override;
|
||||
|
||||
private:
|
||||
void find_M(int, int &, int &, double *);
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 10 Feb 2021
|
||||
lammps_version: 4 May 2022
|
||||
tags: slow
|
||||
date_generated: Fri Feb 26 23:09:34 2021
|
||||
date_generated: Mon May 23 20:12:38 2022
|
||||
epsilon: 5e-13
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
@ -27,67 +27,67 @@ init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 -5.1875059319876482e-01 5.1660852898514097e-02 -5.7568746684221916e-01
|
||||
2 2.1689992534273889e-01 -2.6058788219741147e-01 4.1798750467672274e-01
|
||||
3 -3.2123603964927645e-02 -8.8740124548059782e-03 -1.3836110655816162e-02
|
||||
4 1.5801256389492813e-01 2.1126659375654909e-02 7.8030101326860571e-02
|
||||
5 1.4630529362834138e-01 7.0541031372980123e-02 1.0367534136208822e-01
|
||||
6 4.5271336509733173e-01 4.3434073283904712e-01 1.9672609182540585e-01
|
||||
7 -2.5202832073509374e-01 -4.0972860647817611e-01 -5.1025448731009770e-01
|
||||
8 -2.7733728801934087e-03 -6.6285305944413486e-01 -4.2259182161309783e-01
|
||||
9 8.7284510947996261e-02 3.4644067741232221e-01 5.2649415280028289e-01
|
||||
10 -7.0358238250045405e-02 1.2324438068757468e-01 9.7430764543568638e-02
|
||||
11 -1.0611397012429427e-01 1.6778028608628817e-01 1.0243492986761592e-01
|
||||
12 4.7522165036946828e-01 -4.7569230861934814e-01 -3.7620104501978141e-01
|
||||
13 -1.5084555220571103e-01 1.3477824777242514e-01 1.5530580320476042e-01
|
||||
14 -1.7331931611645846e-01 1.4489217533438961e-01 1.5187450400216165e-01
|
||||
15 -1.4457383479163205e-01 1.0191957555196750e-01 1.2647045003724980e-01
|
||||
16 -4.6380086833284712e-01 5.1725747136642608e-01 1.3944164623658954e+00
|
||||
17 2.5830539911922701e-01 -4.4001348824516739e-01 -1.6368568925682263e+00
|
||||
18 7.1711828509507158e-01 1.5135594083235546e+00 -3.0000511375094279e+00
|
||||
19 -2.6526242862377652e-01 -7.5615232036394919e-01 1.6174668711138471e+00
|
||||
20 -3.6412116833354524e-01 -6.8003985707517800e-01 1.5219834320490606e+00
|
||||
21 2.1913962201245335e-01 2.8436767546323427e-01 -1.5830999741451532e+00
|
||||
22 -1.1342236771135741e-01 -2.1501547367331550e-02 8.1828468024256407e-01
|
||||
23 -1.3904196539005109e-01 -1.8302644136991242e-01 7.6537752144201421e-01
|
||||
24 9.9781913572244155e-02 9.7885535038983940e-01 -1.2668572125502537e+00
|
||||
25 1.3435697022842072e-01 -3.4498868790162351e-01 6.9488200140438949e-01
|
||||
26 -2.4062268840602163e-01 -6.1062269338471520e-01 5.8807795407966368e-01
|
||||
27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.6721200413729438e+00
|
||||
28 4.2997465966368542e-01 -5.0965848067708741e-01 9.0674794598209418e-01
|
||||
29 2.4583990189455251e-01 -4.3624778607752923e-01 7.9388967726077175e-01
|
||||
1 -5.1875059319876493e-01 5.1660852898514124e-02 -5.3139324142026889e-01
|
||||
2 2.1689992534273903e-01 -2.6058788219741147e-01 3.8877216450479823e-01
|
||||
3 -3.2123603964927645e-02 -8.8740124548059712e-03 -1.1951249999562964e-02
|
||||
4 1.5801256389492810e-01 2.1126659375654853e-02 6.9548228373721183e-02
|
||||
5 1.4630529362834138e-01 7.0541031372980081e-02 9.5193468408948823e-02
|
||||
6 4.5271336509733173e-01 4.3434073283904689e-01 1.4866214509094927e-01
|
||||
7 -2.5202832073509379e-01 -4.0972860647817605e-01 -4.6219054057564113e-01
|
||||
8 -2.7733728801934924e-03 -6.6285305944413497e-01 -3.7829759619114767e-01
|
||||
9 8.7284510947996316e-02 3.4644067741232221e-01 4.9727881262835838e-01
|
||||
10 -7.0358238250045405e-02 1.2324438068757469e-01 9.0833752246682456e-02
|
||||
11 -1.0611397012429427e-01 1.6778028608628820e-01 9.3953056914476557e-02
|
||||
12 4.7522165036946817e-01 -4.7569230861934830e-01 -3.5075542616036326e-01
|
||||
13 -1.5084555220571100e-01 1.3477824777242520e-01 1.4682393025162105e-01
|
||||
14 -1.7331931611645843e-01 1.4489217533438964e-01 1.4339263104902228e-01
|
||||
15 -1.4457383479163199e-01 1.0191957555196751e-01 1.1798857708411041e-01
|
||||
16 -4.6380086833284706e-01 5.1725747136642608e-01 1.3463525156314389e+00
|
||||
17 2.5830539911922706e-01 -4.4001348824516717e-01 -1.5887929458337697e+00
|
||||
18 7.1711828509507103e-01 1.5135594083235548e+00 -2.9202084401105424e+00
|
||||
19 -2.6526242862377630e-01 -7.5615232036394919e-01 1.5775455224144039e+00
|
||||
20 -3.6412116833354502e-01 -6.8003985707517844e-01 1.4820620833496179e+00
|
||||
21 2.1913962201245299e-01 2.8436767546323433e-01 -1.3892564908575578e+00
|
||||
22 -1.1342236771135721e-01 -2.1501547367331557e-02 7.2136293859876610e-01
|
||||
23 -1.3904196539005081e-01 -1.8302644136991245e-01 6.6845577979821647e-01
|
||||
24 9.9781913572244044e-02 9.7885535038983906e-01 -1.0730137292626583e+00
|
||||
25 1.3435697022842086e-01 -3.4498868790162335e-01 5.9796025976059175e-01
|
||||
26 -2.4062268840602169e-01 -6.1062269338471475e-01 4.9115621243586605e-01
|
||||
27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.4782765580853483e+00
|
||||
28 4.2997465966368531e-01 -5.0965848067708730e-01 8.0982620433829633e-01
|
||||
29 2.4583990189455257e-01 -4.3624778607752923e-01 6.9696793561697401e-01
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 -5.1741021308022928e-01 5.2164463970109785e-02 -5.7035795586691229e-01
|
||||
2 2.1552845355081662e-01 -2.6145885390966006e-01 4.1455932829319120e-01
|
||||
3 -3.2103136898117973e-02 -8.8506517835662529e-03 -1.3601661681107857e-02
|
||||
4 1.5813149106431929e-01 2.0998590621170252e-02 7.7085061898295948e-02
|
||||
5 1.4611782148177443e-01 7.0495101424978557e-02 1.0262211863021993e-01
|
||||
6 4.5237682723361744e-01 4.3384953789975256e-01 1.9026464391810138e-01
|
||||
7 -2.5220238048340610e-01 -4.1025291093818367e-01 -5.0478860778438306e-01
|
||||
8 -1.8080584227147985e-03 -6.6262062885438788e-01 -4.1641859631215317e-01
|
||||
9 8.6463014670901714e-02 3.4599477171923032e-01 5.2262452042463015e-01
|
||||
10 -7.0489736534953079e-02 1.2334020200473024e-01 9.6566992716111827e-02
|
||||
11 -1.0626441695286094e-01 1.6812163421601856e-01 1.0149591994495212e-01
|
||||
12 4.7577991930135149e-01 -4.7567916757872036e-01 -3.7271573985070183e-01
|
||||
13 -1.5102820761813157e-01 1.3482444566331483e-01 1.5423678933941967e-01
|
||||
14 -1.7348673211954843e-01 1.4502141845434144e-01 1.5087187633492033e-01
|
||||
15 -1.4462875081155377e-01 1.0177069356565076e-01 1.2517182091704590e-01
|
||||
16 -4.6460162929294996e-01 5.1779385845248271e-01 1.3893654572043623e+00
|
||||
17 2.5891442131187525e-01 -4.3950548671559936e-01 -1.6317550757697421e+00
|
||||
18 7.2168431617151652e-01 1.5193451885286362e+00 -2.9893884981487782e+00
|
||||
19 -2.6652798165736163e-01 -7.5811338734451528e-01 1.6128149463359120e+00
|
||||
20 -3.6685702835539502e-01 -6.8322050954334113e-01 1.5165955023073157e+00
|
||||
21 2.1951624771560643e-01 2.7908195130062186e-01 -1.5761953363577819e+00
|
||||
22 -1.1336951380799791e-01 -1.8808477419676436e-02 8.1504600690354212e-01
|
||||
23 -1.3925079650238253e-01 -1.8099002366078934e-01 7.6197026590425576e-01
|
||||
24 1.0040861588721736e-01 9.7605298874640822e-01 -1.2616161159601158e+00
|
||||
25 1.3326916759186308e-01 -3.4383632694371846e-01 6.9190323110048380e-01
|
||||
26 -2.4021815268192853e-01 -6.0890409178886584e-01 5.8556538656853796e-01
|
||||
27 -6.0409904237121848e-01 9.0912495784502378e-01 -1.6654694276801689e+00
|
||||
28 4.3013936095125799e-01 -5.0934391123833689e-01 9.0325113244229294e-01
|
||||
29 2.4601612065863304e-01 -4.3639537669310857e-01 7.9029601422825357e-01
|
||||
1 -5.1741014216891323e-01 5.2164635296042393e-02 -5.2623836978475858e-01
|
||||
2 2.1552846412199758e-01 -2.6145894539516673e-01 3.8545898349593888e-01
|
||||
3 -3.2103138095670579e-02 -8.8506538753520981e-03 -1.1724235859914865e-02
|
||||
4 1.5813145650026225e-01 2.0998621233542170e-02 6.8636593918036282e-02
|
||||
5 1.4611784388196503e-01 7.0495096056163456e-02 9.4173752375773245e-02
|
||||
6 4.5237719221599776e-01 4.3384962277065459e-01 1.4239025998317195e-01
|
||||
7 -2.5220268493187059e-01 -4.1025301686518484e-01 -4.5691379591079689e-01
|
||||
8 -1.8087303830687639e-03 -6.6262053829478962e-01 -3.7229928601228018e-01
|
||||
9 8.6463301781221830e-02 3.4599465684255803e-01 4.9352454715915695e-01
|
||||
10 -7.0489594513774040e-02 1.2334016665695119e-01 8.9996005394677253e-02
|
||||
11 -1.0626421813039562e-01 1.6812158323291176e-01 9.3047478449378010e-02
|
||||
12 4.7577947092697664e-01 -4.7567891996498934e-01 -3.4737046113988318e-01
|
||||
13 -1.5102808666764547e-01 1.3482433032858601e-01 1.4578835246021055e-01
|
||||
14 -1.7348659325588509e-01 1.4502136217343140e-01 1.4242345028236647e-01
|
||||
15 -1.4462864359019598e-01 1.0177061950990737e-01 1.1672337003180658e-01
|
||||
16 -4.6460067645864556e-01 5.1779370227303589e-01 1.3414908637759566e+00
|
||||
17 2.5891386126470772e-01 -4.3950547582143235e-01 -1.5838807201814791e+00
|
||||
18 7.2168370323432252e-01 1.5193447526996697e+00 -2.9098598385970016e+00
|
||||
19 -2.6652787885830853e-01 -7.5811315791679745e-01 1.5730504781450976e+00
|
||||
20 -3.6685665590502275e-01 -6.8322021768929220e-01 1.4768311717846097e+00
|
||||
21 2.1951586762660091e-01 2.7908220235205461e-01 -1.3830824926773011e+00
|
||||
22 -1.1336938042579009e-01 -1.8808601624035179e-02 7.1848935671661196e-01
|
||||
23 -1.3925069633951992e-01 -1.8099007738613887e-01 6.6541382825713724e-01
|
||||
24 1.0040887040966230e-01 9.7605345725512060e-01 -1.0685029118802341e+00
|
||||
25 1.3326911366323715e-01 -3.4383656089990644e-01 5.9534683106490160e-01
|
||||
26 -2.4021835151581941e-01 -6.0890428532016694e-01 4.8900886112265285e-01
|
||||
27 -6.0409926106451306e-01 9.0912468504667421e-01 -1.4723570251596647e+00
|
||||
28 4.3013929998720690e-01 -5.0934380660262080e-01 8.0669499930969779e-01
|
||||
29 2.4601628669087905e-01 -4.3639523607143099e-01 6.9373995347613393e-01
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user