From 34a509322930d4db0fab3415affd3a8905231468 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 10 Nov 2022 16:03:06 -0500
Subject: [PATCH] refactor handling of the python source command. document it
 and more limits.

---
 doc/src/python.rst                      | 236 +++++++++++++-----------
 src/PYTHON/python_impl.cpp              |  18 +-
 unittest/python/test_python_package.cpp |   4 +-
 3 files changed, 141 insertions(+), 117 deletions(-)

diff --git a/doc/src/python.rst b/doc/src/python.rst
index aad2f636d3..d777719181 100644
--- a/doc/src/python.rst
+++ b/doc/src/python.rst
@@ -8,14 +8,24 @@ Syntax
 
 .. parsed-literal::
 
-   python func keyword args ...
+   python function keyword args ...
 
-* func = name of Python function
-* one or more keyword/args pairs must be appended
+* function = *source* or name of Python function
+
+  if function is *source*:
 
   .. parsed-literal::
 
-     keyword = *invoke* or *input* or *return* or *format* or *length* or *file* or *here* or *exists* or *source*
+     keyword = *inline* or name of a Python file
+         inline = one or more lines of Python code which will be executed immediately
+                  must be a single argument, typically enclosed in triple quotes
+         Python file = name of a file with Python code which will be executed immediately
+
+* if function is the name of a Python function, one or more keyword/args pairs must be appended
+
+  .. parsed-literal::
+
+     keyword = *invoke* or *input* or *return* or *format* or *length* or *file* or *here* or *exists*
        *invoke* arg = none = invoke the previously defined Python function
        *input* args = N i1 i2 ... iN
          N = # of inputs to function
@@ -38,10 +48,6 @@ Syntax
          inline = one or more lines of Python code which defines func
                   must be a single argument, typically enclosed between triple quotes
        *exists* arg = none = Python code has been loaded by previous python command
-       *source* arg = *filename* or *inline*
-         filename = file of Python code which will be executed immediately
-         inline = one or more lines of Python code which will be executed immediately
-                  must be a single argument, typically enclosed between triple quotes
 
 Examples
 """"""""
@@ -70,80 +76,89 @@ Examples
        lmp.command("pair_style lj/cut ${cut}")   # LAMMPS commands
        lmp.command("pair_coeff * * 1.0 1.0")
        lmp.command("run 100")
-    """
+   """
+
+   python source funcdef.py
+
+   python source inline "from lammps import lammps"
+
 
 Description
 """""""""""
 
 Define a Python function or execute a previously defined function or
-execute some arbitrary python code.
-Arguments, including LAMMPS variables, can be passed to the function
-from the LAMMPS input script and a value returned by the Python
-function to a LAMMPS variable.  The Python code for the function can
-be included directly in the input script or in a separate Python file.
-The function can be standard Python code or it can make "callbacks" to
-LAMMPS through its library interface to query or set internal values
-within LAMMPS.  This is a powerful mechanism for performing complex
-operations in a LAMMPS input script that are not possible with the
-simple input script and variable syntax which LAMMPS defines.  Thus
-your input script can operate more like a true programming language.
+execute some arbitrary python code.  Arguments, including LAMMPS
+variables, can be passed to the function from the LAMMPS input script
+and a value returned by the Python function to a LAMMPS variable.  The
+Python code for the function can be included directly in the input
+script or in a separate Python file.  The function can be standard
+Python code or it can make "callbacks" to LAMMPS through its library
+interface to query or set internal values within LAMMPS.  This is a
+powerful mechanism for performing complex operations in a LAMMPS input
+script that are not possible with the simple input script and variable
+syntax which LAMMPS defines.  Thus your input script can operate more
+like a true programming language.
 
 Use of this command requires building LAMMPS with the PYTHON package
 which links to the Python library so that the Python interpreter is
 embedded in LAMMPS.  More details about this process are given below.
 
-There are two ways to invoke a Python function once it has been
-defined.  One is using the *invoke* keyword.  The other is to assign
-the function to a :doc:`python-style variable <variable>` defined in
-your input script.  Whenever the variable is evaluated, it will
-execute the Python function to assign a value to the variable.  Note
-that variables can be evaluated in many different ways within LAMMPS.
-They can be substituted for directly in an input script.  Or they can
-be passed to various commands as arguments, so that the variable is
-evaluated during a simulation run.
+There are two ways to invoke a Python function once it has been defined.
+One is using the *invoke* keyword.  The other is to assign the function
+to a :doc:`python-style variable <variable>` defined in your input
+script.  Whenever the variable is evaluated, it will execute the Python
+function to assign a value to the variable.  Note that variables can be
+evaluated in many different ways within LAMMPS.  They can be substituted
+with their result directly in an input script, or they can be passed to
+various commands as arguments, so that the variable is evaluated during
+a simulation run.
 
-A broader overview of how Python can be used with LAMMPS is given on
-the :doc:`Python <Python_head>` doc page.  There is an examples/python
-directory which illustrates use of the python command.
+A broader overview of how Python can be used with LAMMPS is given in the
+:doc:`Use Python with LAMMPS <Python_head>` section of the
+documentation.  There is an ``examples/python`` directory which
+illustrates use of the python command.
 
 ----------
 
-The *func* setting specifies the name of the Python function.  The
-code for the function is defined using the *file* or *here* keywords
-as explained below. In case of the *source* keyword, the name of
-the function is ignored.
+The first argument of the *python* command is either the *source*
+keyword or the name of a Python function.
+
+If the *source* keyword is used, no other keywords can be used.  The
+argument either can be a filename or the keyword *inline* followed by a
+string with python commands, either on a single line enclosed in quotes,
+or as multiple lines enclosed in triple quotes. These python commands
+will be passed to the python interpreter and executed immediately
+without registering a python function for future execution.
+
+In all other cases, the first argument is the name of a Python function
+that will be registered with LAMMPS for future execution.  The function
+may already be defined (see *exists* keyword) or must be defined using
+the *file* or *here* keywords as explained below.
 
 If the *invoke* keyword is used, no other keywords can be used, and a
-previous python command must have defined the Python function
+previous python command must have registered the Python function
 referenced by this command.  This invokes the Python function with the
-previously defined arguments and return value processed as explained
-below.  You can invoke the function as many times as you wish in your
-input script.
-
-If the *source* keyword is used, no other keywords can be used.
-The argument can be a filename or a string with python commands,
-either on a single line enclosed in quotes, or as multiple lines
-enclosed in triple quotes. These python commands will be passed
-to the python interpreter and executed immediately without registering
-a python function for future execution.
+previously defined arguments and the return value is processed as
+explained below.  You can invoke the function as many times as you wish
+in your input script.
 
 The *input* keyword defines how many arguments *N* the Python function
-expects.  If it takes no arguments, then the *input* keyword should
-not be used.  Each argument can be specified directly as a value,
-e.g. 6 or 3.14159 or abc (a string of characters).  The type of each
-argument is specified by the *format* keyword as explained below, so
-that Python will know how to interpret the value.  If the word SELF is
-used for an argument it has a special meaning.  A pointer is passed to
-the Python function which it converts into a reference to LAMMPS
-itself.  This enables the function to call back to LAMMPS through its
-library interface as explained below.  This allows the Python function
-to query or set values internal to LAMMPS which can affect the
-subsequent execution of the input script.  A LAMMPS variable can also
-be used as an argument, specified as v_name, where "name" is the name
-of the variable.  Any style of LAMMPS variable can be used, as defined
-by the :doc:`variable <variable>` command.  Each time the Python
-function is invoked, the LAMMPS variable is evaluated and its value is
-passed to the Python function.
+expects.  If it takes no arguments, then the *input* keyword should not
+be used.  Each argument can be specified directly as a value, e.g. 6 or
+3.14159 or abc (a string of characters).  The type of each argument is
+specified by the *format* keyword as explained below, so that Python
+will know how to interpret the value.  If the word SELF is used for an
+argument it has a special meaning.  A pointer is passed to the Python
+function which it converts into a reference to LAMMPS itself.  This
+enables the function to call back to LAMMPS through its library
+interface as explained below.  This allows the Python function to query
+or set values internal to LAMMPS which can affect the subsequent
+execution of the input script.  A LAMMPS variable can also be used as an
+argument, specified as v_name, where "name" is the name of the variable.
+Any style of LAMMPS variable can be used, as defined by the
+:doc:`variable <variable>` command.  Each time the Python function is
+invoked, the LAMMPS variable is evaluated and its value is passed to the
+Python function.
 
 The *return* keyword is only needed if the Python function returns a
 value.  The specified *varReturn* must be of the form v_name, where
@@ -165,19 +180,19 @@ The two commands can appear in either order in the input script so
 long as both are specified before the Python function is invoked for
 the first time.
 
-The *format* keyword must be used if the *input* or *return* keyword
-is used.  It defines an *fstring* with M characters, where M = sum of
+The *format* keyword must be used if the *input* or *return* keyword is
+used.  It defines an *fstring* with M characters, where M = sum of
 number of inputs and outputs.  The order of characters corresponds to
 the N inputs, followed by the return value (if it exists).  Each
 character must be one of the following: "i" for integer, "f" for
-floating point, "s" for string, or "p" for SELF.  Each character
-defines the type of the corresponding input or output value of the
-Python function and affects the type conversion that is performed
-internally as data is passed back and forth between LAMMPS and Python.
-Note that it is permissible to use a :doc:`python-style variable <variable>` in a LAMMPS command that allows for an
-equal-style variable as an argument, but only if the output of the
-Python function is flagged as a numeric value ("i" or "f") via the
-*format* keyword.
+floating point, "s" for string, or "p" for SELF.  Each character defines
+the type of the corresponding input or output value of the Python
+function and affects the type conversion that is performed internally as
+data is passed back and forth between LAMMPS and Python.  Note that it
+is permissible to use a :doc:`python-style variable <variable>` in a
+LAMMPS command that allows for an equal-style variable as an argument,
+but only if the output of the Python function is flagged as a numeric
+value ("i" or "f") via the *format* keyword.
 
 If the *return* keyword is used and the *format* keyword specifies the
 output as a string, then the default maximum length of that string is
@@ -192,12 +207,12 @@ truncated.
 
 Either the *file*, *here*, or *exists* keyword must be used, but only
 one of them.  These keywords specify what Python code to load into the
-Python interpreter.  The *file* keyword gives the name of a file,
-which should end with a ".py" suffix, which contains Python code.  The
-code will be immediately loaded into and run in the "main" module of
-the Python interpreter.  Note that Python code which contains a
-function definition does not "execute" the function when it is run; it
-simply defines the function so that it can be invoked later.
+Python interpreter.  The *file* keyword gives the name of a file
+containing Python code, which should end with a ".py" suffix.  The code
+will be immediately loaded into and run in the "main" module of the
+Python interpreter.  Note that Python code which contains a function
+definition does not "execute" the function when it is run; it simply
+defines the function so that it can be invoked later.
 
 The *here* keyword does the same thing, except that the Python code
 follows as a single argument to the *here* keyword.  This can be done
@@ -208,14 +223,15 @@ proper indentation, blank lines, and comments, as desired.  See the
 how triple quotes can be used as part of input script syntax.
 
 The *exists* keyword takes no argument.  It means that Python code
-containing the required Python function defined by the *func* setting,
-is assumed to have been previously loaded by another python command.
+containing the required Python function with the given name has already
+been executed, for example by a *python source* command or in the same
+file that was used previously with the *file* keyword.
 
-Note that the Python code that is loaded and run must contain a
-function with the specified *func* name.  To operate properly when
-later invoked, the function code must match the *input* and
-*return* and *format* keywords specified by the python command.
-Otherwise Python will generate an error.
+Note that the Python code that is loaded and run must contain a function
+with the specified function name.  To operate properly when later
+invoked, the function code must match the *input* and *return* and
+*format* keywords specified by the python command.  Otherwise Python
+will generate an error.
 
 ----------
 
@@ -225,19 +241,18 @@ LAMMPS.
 Whether you load Python code from a file or directly from your input
 script, via the *file* and *here* keywords, the code can be identical.
 It must be indented properly as Python requires.  It can contain
-comments or blank lines.  If the code is in your input script, it
-cannot however contain triple-quoted Python strings, since that will
-conflict with the triple-quote parsing that the LAMMPS input script
-performs.
+comments or blank lines.  If the code is in your input script, it cannot
+however contain triple-quoted Python strings, since that will conflict
+with the triple-quote parsing that the LAMMPS input script performs.
 
 All the Python code you specify via one or more python commands is
 loaded into the Python "main" module, i.e. __main__.  The code can
 define global variables or statements that are outside of function
 definitions.  It can contain multiple functions, only one of which
 matches the *func* setting in the python command.  This means you can
-use the *file* keyword once to load several functions, and the
-*exists* keyword thereafter in subsequent python commands to access
-the other functions previously loaded.
+use the *file* keyword once to load several functions, and the *exists*
+keyword thereafter in subsequent python commands to access the other
+functions previously loaded.
 
 A Python function you define (or more generally, the code you load)
 can import other Python modules or classes, it can make calls to other
@@ -495,24 +510,35 @@ Restrictions
 """"""""""""
 
 This command is part of the PYTHON package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
 
-Building LAMMPS with the PYTHON package will link LAMMPS with the
-Python library on your system.  Settings to enable this are in the
+Building LAMMPS with the PYTHON package will link LAMMPS with the Python
+library on your system.  Settings to enable this are in the
 lib/python/Makefile.lammps file.  See the lib/python/README file for
 information on those settings.
 
-If you use Python code which calls back to LAMMPS, via the SELF input argument
-explained above, there is an extra step required when building LAMMPS.  LAMMPS
-must also be built as a shared library and your Python function must be able to
-load the :doc:`"lammps" Python module <Python_module>` that wraps the LAMMPS
-library interface.  These are the same steps required to use Python by itself
-to wrap LAMMPS.  Details on these steps are explained on the :doc:`Python
-<Python_head>` doc page.  Note that it is important that the stand-alone LAMMPS
-executable and the LAMMPS shared library be consistent (built from the same
-source code files) in order for this to work.  If the two have been built at
+If you use Python code which calls back to LAMMPS, via the SELF input
+argument explained above, there is an extra step required when building
+LAMMPS.  LAMMPS must also be built as a shared library and your Python
+function must be able to load the :doc:`"lammps" Python module
+<Python_module>` that wraps the LAMMPS library interface.  These are the
+same steps required to use Python by itself to wrap LAMMPS.  Details on
+these steps are explained on the :doc:`Python <Python_head>` doc page.
+Note that it is important that the stand-alone LAMMPS executable and the
+LAMMPS shared library be consistent (built from the same source code
+files) in order for this to work.  If the two have been built at
 different times using different source files, problems may occur.
 
+Another limitation of calling back to Python from the LAMMPS module
+using the *python* command in a LAMMPS input is that both, the Python
+interpreter and LAMMPS, must be linked to the same Python runtime as a
+shared library.  If the Python interpreter is linked to Python
+statically (which seems to happen with Conda) then loading the shared
+LAMMPS library will create a second python "main" module that hides the
+one from the Python interpreter and all previous defined function and
+global variables will become invisible.
+
 Related commands
 """"""""""""""""
 
diff --git a/src/PYTHON/python_impl.cpp b/src/PYTHON/python_impl.cpp
index 92668674d1..f7c9684615 100644
--- a/src/PYTHON/python_impl.cpp
+++ b/src/PYTHON/python_impl.cpp
@@ -130,19 +130,17 @@ void PythonImpl::command(int narg, char **arg)
     return;
   }
 
-  // if source is only keyword, execute the python code
+  // if source is only keyword, execute the python code in file
 
-  if (narg == 3 && strcmp(arg[1], "source") == 0) {
-    int err;
+  if ((narg > 1) && (strcmp(arg[0], "source") == 0)) {
+    int err = -1;
 
-    FILE *fp = fopen(arg[2], "r");
-    if (fp == nullptr)
+    if ((narg > 2) && (strcmp(arg[1], "inline") == 0)) {
       err = execute_string(arg[2]);
-    else
-      err = execute_file(arg[2]);
-
-    if (fp) fclose(fp);
-    if (err) error->all(FLERR, "Could not process Python source command");
+    } else {
+      if (platform::file_is_readable(arg[1])) err = execute_file(arg[1]);
+    }
+    if (err) error->warning(FLERR, "Could not process Python source command. Error code: {}", err);
 
     return;
   }
diff --git a/unittest/python/test_python_package.cpp b/unittest/python/test_python_package.cpp
index 4e5aa53b0c..8b4f998bfc 100644
--- a/unittest/python/test_python_package.cpp
+++ b/unittest/python/test_python_package.cpp
@@ -276,7 +276,7 @@ TEST_F(PythonPackageTest, RunSource)
 {
     // execute python script from file
     auto output = CAPTURE_OUTPUT([&] {
-        command("python xyz source ${input_dir}/run.py");
+        command("python source ${input_dir}/run.py");
     });
 
     ASSERT_THAT(output, HasSubstr(LOREM_IPSUM));
@@ -286,7 +286,7 @@ TEST_F(PythonPackageTest, RunSourceInline)
 {
     // execute inline python script
     auto output = CAPTURE_OUTPUT([&] {
-        command("python xyz source \"\"\"\n"
+        command("python source inline \"\"\"\n"
                 "from __future__ import print_function\n"
                 "print(2+2)\n"
                 "\"\"\"");