Merge branch 'develop' into mliappy_unified

2022-05-20 11:26:54 -06:00
parent 9a322b5a2a 75df9d4f2c
commit a43b24781c
3263 changed files with 172807 additions and 112423 deletions
--- a/python/lammps/formats.py
+++ b/python/lammps/formats.py
@ -14,14 +14,27 @@
 ################################################################################
 # LAMMPS output formats
 # Written by Richard Berger <richard.berger@temple.edu>
+# and Axel Kohlmeyer <akohlmey@gmail.com>
 ################################################################################

 import re

+has_yaml = False
+try:
+  import yaml
+  has_yaml = True
+  try:
+    from yaml import CSafeLoader as Loader
+  except ImportError:
+    from yaml import SafeLoader as Loader
+except ImportError:
+  # ignore here, raise an exception when trying to parse yaml instead
+  pass
+
 class LogFile:
  """Reads LAMMPS log files and extracts the thermo information

-  It supports both the default thermo output style (including custom) and multi.
+  It supports the line, multi, and yaml thermo output styles.

  :param filename: path to log file
  :type  filename: str
@ -33,11 +46,13 @@ class LogFile:

  STYLE_DEFAULT = 0
  STYLE_MULTI   = 1
+  STYLE_YAML    = 2

  def __init__(self, filename):
    alpha = re.compile(r'[a-df-zA-DF-Z]') # except e or E for floating-point numbers
    kvpairs = re.compile(r'([a-zA-Z_0-9]+)\s+=\s*([0-9\.eE\-]+)')
    style = LogFile.STYLE_DEFAULT
+    yamllog = ""
    self.runs = []
    self.errors = []
    with open(filename, 'rt') as f:
@ -46,14 +61,35 @@ class LogFile:
        for line in f:
            if "ERROR" in line or "exited on signal" in line:
                self.errors.append(line)
-            elif line.startswith('Step '):
+
+            elif re.match(r'^ *Step ', line):
                in_thermo = True
                in_data_section = True
                keys = line.split()
                current_run = {}
                for k in keys:
                    current_run[k] = []
-            elif line.startswith('---------------- Step'):
+
+            elif re.match(r'^(keywords:.*$|data:$|---$|  - \[.*\]$)', line):
+                if not has_yaml:
+                  raise Exception('Cannot process YAML format logs without the PyYAML Python module')
+                style = LogFile.STYLE_YAML
+                yamllog += line;
+                current_run = {}
+
+            elif re.match(r'^\.\.\.$', line):
+                thermo = yaml.load(yamllog, Loader=Loader)
+                for k in thermo['keywords']:
+                    current_run[k] = []
+                for step in thermo['data']:
+                    icol = 0
+                    for k in thermo['keywords']:
+                        current_run[k].append(step[icol])
+                        icol += 1
+                self.runs.append(current_run)
+                yamllog = ""
+
+            elif re.match(r'^------* Step ', line):
                if not in_thermo:
                   current_run = {'Step': [], 'CPU': []}
                in_thermo = True
@ -64,28 +100,29 @@ class LogFile:
                cpu  = float(str_cpu.split('=')[1].split()[0])
                current_run["Step"].append(step)
                current_run["CPU"].append(cpu)
+
            elif line.startswith('Loop time of'):
                in_thermo = False
-                self.runs.append(current_run)
+                if style != LogFile.STYLE_YAML:
+                    self.runs.append(current_run)
+
            elif in_thermo and in_data_section:
                if style == LogFile.STYLE_DEFAULT:
                    if alpha.search(line):
                        continue
-
                    for k, v in zip(keys, map(float, line.split())):
                        current_run[k].append(v)
+
                elif style == LogFile.STYLE_MULTI:
                    if '=' not in line:
                        in_data_section = False
                        continue
-
                    for k,v in kvpairs.findall(line):
                        if k not in current_run:
                            current_run[k] = [float(v)]
                        else:
                            current_run[k].append(float(v))

-
 class AvgChunkFile:
  """Reads files generated by fix ave/chunk

--- a/python/lammps/mliap/init.py
+++ b/python/lammps/mliap/init.py
@ -5,7 +5,14 @@
 import sysconfig
 import ctypes
 library = sysconfig.get_config_vars('INSTSONAME')[0]
-pylib = ctypes.CDLL(library)
+try:
+    pylib = ctypes.CDLL(library)
+except OSError as e:
+    if pylib.endswith(".a"):
+        pylib.strip(".a") + ".so"
+        pylib = ctypes.CDLL(library)
+    else:
+        raise e
 if not pylib.Py_IsInitialized():
    raise RuntimeError("This interpreter is not compatible with python-based mliap for LAMMPS.")
 del sysconfig, ctypes, library, pylib
--- a/python/lammps/mliap/pytorch.py
+++ b/python/lammps/mliap/pytorch.py
@ -19,10 +19,75 @@ import numpy as np
 import torch

 def calc_n_params(model):
+    """
+    Returns the sum of two decimal numbers in binary digits.
+
+        Parameters:
+                model (torch.nn.Module): Network model that maps descriptors to a per atom attribute
+
+        Returns:
+                n_params (int): Number of NN model parameters
+    """
    return sum(p.nelement() for p in model.parameters())

 class TorchWrapper(torch.nn.Module):
-    def __init__(self, model,n_descriptors,n_elements,n_params=None,device=None,dtype=torch.float64):
+    """
+    A class to wrap Modules to ensure lammps mliap compatability.
+
+    ...
+
+    Attributes
+    ----------
+    model : torch.nn.Module
+        Network model that maps descriptors to a per atom attribute
+
+    device : torch.nn.Module (None)
+        Accelerator device
+
+    dtype : torch.dtype (torch.float64)
+        Dtype to use on device
+
+    n_params : torch.nn.Module (None)
+        Number of NN model parameters
+
+    n_descriptors : int
+        Max number of per atom descriptors
+
+    n_elements : int
+        Max number of elements
+
+
+    Methods
+    -------
+    forward(descriptors, elems):
+        Feeds descriptors to network model to produce per atom energies and forces.
+    """
+
+    def __init__(self, model, n_descriptors, n_elements, n_params=None, device=None, dtype=torch.float64):
+        """
+        Constructs all the necessary attributes for the network module.
+
+        Parameters
+        ----------
+            model : torch.nn.Module
+                Network model that maps descriptors to a per atom attribute
+
+            n_descriptors : int
+                Max number of per atom descriptors
+
+            n_elements : int
+                Max number of elements
+
+            n_params : torch.nn.Module (None)
+                Number of NN model parameters
+            
+            device : torch.nn.Module (None)
+                Accelerator device
+            
+            dtype : torch.dtype (torch.float64)
+                Dtype to use on device
+        """
+
        super().__init__()

        self.model = model
@ -40,26 +105,222 @@ class TorchWrapper(torch.nn.Module):
        self.n_descriptors = n_descriptors
        self.n_elements = n_elements

-    def forward(self, elems, bispectrum, beta, energy):
+    def forward(self, elems, descriptors, beta, energy):
+        """
+        Takes element types and descriptors calculated via lammps and
+        calculates the per atom energies and forces.

-        bispectrum = torch.from_numpy(bispectrum).to(dtype=self.dtype, device=self.device).requires_grad_(True)
+        Parameters
+        ----------
+        elems : numpy.array
+            Per atom element types
+
+        descriptors : numpy.array
+            Per atom descriptors
+
+        beta : numpy.array
+            Expired beta array to be filled with new betas
+
+        energy : numpy.array
+            Expired per atom energy array to be filled with new per atom energy
+            (Note: This is a pointer to the lammps per atom energies)
+
+
+        Returns
+        -------
+        None
+        """
+
+        descriptors = torch.from_numpy(descriptors).to(dtype=self.dtype, device=self.device).requires_grad_(True)
        elems = torch.from_numpy(elems).to(dtype=torch.long, device=self.device) - 1

        with torch.autograd.enable_grad():

-            energy_nn = self.model(bispectrum, elems)
+            energy_nn = self.model(descriptors, elems)
            if energy_nn.ndim > 1:
                energy_nn = energy_nn.flatten()

-            beta_nn = torch.autograd.grad(energy_nn.sum(), bispectrum)[0]
+            beta_nn = torch.autograd.grad(energy_nn.sum(), descriptors)[0]

        beta[:] = beta_nn.detach().cpu().numpy().astype(np.float64)
        energy[:] = energy_nn.detach().cpu().numpy().astype(np.float64)

+
 class IgnoreElems(torch.nn.Module):
-    def __init__(self,subnet):
+    """
+    A class to represent a NN model agnostic of element typing.
+
+    ...
+
+    Attributes
+    ----------
+    subnet : torch.nn.Module
+        Network model that maps descriptors to a per atom attribute
+
+    Methods
+    -------
+    forward(descriptors, elems):
+        Feeds descriptors to network model
+    """
+
+    def __init__(self, subnet):
+        """
+        Constructs all the necessary attributes for the network module.
+
+        Parameters
+        ----------
+            subnet : torch.nn.Module
+                Network model that maps descriptors to a per atom attribute
+        """
+
        super().__init__()
        self.subnet = subnet

-    def forward(self,bispectrum,elems):
-        return self.subnet(bispectrum)
+    def forward(self, descriptors, elems):
+        """
+        Feeds descriptors to network model
+
+        Parameters
+        ----------
+        descriptors : torch.tensor
+            Per atom descriptors
+
+        elems : torch.tensor
+            Per atom element types
+
+        Returns
+        -------
+        self.subnet(descriptors) : torch.tensor
+            Per atom attribute computed by the network model
+        """
+
+        return self.subnet(descriptors)
+
+
+class UnpackElems(torch.nn.Module):
+    """
+    A class to represent a NN model pseudo-agnostic of element typing for
+    systems with multiple element typings.
+
+    ...
+
+    Attributes
+    ----------
+    subnet : torch.nn.Module
+        Network model that maps descriptors to a per atom attribute
+
+    n_types : int
+        Number of atom types used in training the NN model.
+
+    Methods
+    -------
+    forward(descriptors, elems):
+        Feeds descriptors to network model after adding zeros into
+        descriptor columns relating to different atom types
+    """
+
+    def __init__(self, subnet, n_types):
+        """
+        Constructs all the necessary attributes for the network module.
+
+        Parameters
+        ----------
+            subnet : torch.nn.Module
+                Network model that maps descriptors to a per atom attribute.
+
+            n_types : int
+                Number of atom types used in training the NN model.
+        """
+        super().__init__()
+        self.subnet = subnet
+        self.n_types = n_types
+
+    def forward(self, descriptors, elems):
+        """
+        Feeds descriptors to network model after adding zeros into
+        descriptor columns relating to different atom types
+
+        Parameters
+        ----------
+        descriptors : torch.tensor
+            Per atom descriptors
+
+        elems : torch.tensor
+            Per atom element types
+
+        Returns
+        -------
+        self.subnet(descriptors) : torch.tensor
+            Per atom attribute computed by the network model
+        """
+
+        unpacked_descriptors = torch.zeros(elems.shape[0], self.n_types, descriptors.shape[1], dtype=torch.float64)
+        for i, ind in enumerate(elems):
+            unpacked_descriptors[i, ind, :] = descriptors[i]
+        return self.subnet(torch.reshape(unpacked_descriptors, (elems.shape[0], -1)), elems)
+
+
+class ElemwiseModels(torch.nn.Module):
+    """
+    A class to represent a NN model dependent on element typing.
+
+    ...
+
+    Attributes
+    ----------
+    subnets : list of torch.nn.Modules
+        Per element type network models that maps per element type
+        descriptors to a per atom attribute.
+
+    n_types : int
+        Number of atom types used in training the NN model.
+
+    Methods
+    -------
+    forward(descriptors, elems):
+        Feeds descriptors to network model after adding zeros into
+        descriptor columns relating to different atom types
+    """
+
+    def __init__(self, subnets, n_types):
+        """
+        Constructs all the necessary attributes for the network module.
+
+        Parameters
+        ----------
+            subnets : list of torch.nn.Modules
+                Per element type network models that maps per element
+                type descriptors to a per atom attribute.
+
+            n_types : int
+                Number of atom types used in training the NN model.
+        """
+
+        super().__init__()
+        self.subnets = subnets
+        self.n_types = n_types
+
+    def forward(self, descriptors, elems):
+        """
+        Feeds descriptors to network model after adding zeros into
+        descriptor columns relating to different atom types
+
+        Parameters
+        ----------
+        descriptors : torch.tensor
+            Per atom descriptors
+
+        elems : torch.tensor
+            Per atom element types
+
+        Returns
+        -------
+        self.subnets(descriptors) : torch.tensor
+            Per atom attribute computed by the network model
+        """
+
+        per_atom_attributes = torch.zeros(elems.size[0])
+        given_elems, elem_indices = torch.unique(elems, return_inverse=True)
+        for i, elem in enumerate(given_elems):
+            per_atom_attribute[elem_indices == i] = self.subnets[elem](descriptors[elem_indices == i])
+        return per_atom_attributes