Merge pull request #4352 from ndtrung81/regression-test-update

Add a KOKKOS workflow for GitHub actions and minor updates for the regression tester
2024-10-25 11:06:33 -04:00
parent 3041443e5f f09a9c1c2a
commit 6d0e633edf
7 changed files with 496 additions and 64 deletions
--- a/.github/workflows/kokkos-regression.yaml
+++ b/.github/workflows/kokkos-regression.yaml
@ -0,0 +1,124 @@
+# GitHub action to build LAMMPS on Linux and run selected regression tests
+name: "Kokkos OpenMP Regression Test"
+
+on:
+  pull_request:
+    branches:
+      - develop
+
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build LAMMPS with Kokkos OpenMP
+    # restrict to official LAMMPS repository
+    if: ${{ github.repository == 'lammps/lammps' }}
+    runs-on: ubuntu-latest
+    env:
+      CCACHE_DIR: ${{ github.workspace }}/.ccache
+    strategy:
+      max-parallel: 4
+      matrix:
+        idx: [ 'pair', 'fix', 'compute', 'misc' ]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 2
+        show-progress: false
+
+    - name: Install extra packages
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y ccache ninja-build libeigen3-dev \
+                     libcurl4-openssl-dev python3-dev \
+                     mpi-default-bin mpi-default-dev
+
+    - name: Create Build Environment
+      run: mkdir build
+
+    - name: Set up ccache
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CCACHE_DIR }}
+        key: linux-kokkos-ccache-${{ github.sha }}
+        restore-keys: linux-kokkos-ccache-
+
+    - name: Building LAMMPS via CMake
+      shell: bash
+      run: |
+        ccache -z
+        python3 -m venv linuxenv
+        source linuxenv/bin/activate
+        python3 -m pip install --upgrade pip
+        python3 -m pip install numpy pyyaml junit_xml
+        cmake -S cmake -B build \
+              -C cmake/presets/gcc.cmake \
+              -C cmake/presets/basic.cmake \
+              -C cmake/presets/kokkos-openmp.cmake \
+              -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
+              -D CMAKE_C_COMPILER_LAUNCHER=ccache \
+              -D BUILD_SHARED_LIBS=off \
+              -D DOWNLOAD_POTENTIALS=off \
+              -D PKG_AMOEBA=on \
+              -D PKG_ASPHERE=on \
+              -D PKG_BROWNIAN=on \
+              -D PKG_CLASS2=on \
+              -D PKG_COLLOID=on \
+              -D PKG_CORESHELL=on \
+              -D PKG_DIPOLE=on \
+              -D PKG_DPD-BASIC=on \
+              -D PKG_EXTRA-COMPUTE=on \
+              -D PKG_EXTRA-FIX=on \
+              -D PKG_EXTRA-MOLECULE=on \
+              -D PKG_EXTRA-PAIR=on \
+              -D PKG_GRANULAR=on \
+              -D PKG_LEPTON=on \
+              -D PKG_MC=on \
+              -D PKG_MEAM=on \
+              -D PKG_POEMS=on \
+              -D PKG_PYTHON=on \
+              -D PKG_QEQ=on \
+              -D PKG_REAXFF=on \
+              -D PKG_REPLICA=on \
+              -D PKG_SRD=on \
+              -D PKG_VORONOI=on \
+              -G Ninja
+        cmake --build build
+        ccache -s
+
+    - name: Run Regression Tests for Selected Examples
+      shell: bash
+      run: |
+        source linuxenv/bin/activate
+        python3 tools/regression-tests/get_kokkos_input.py \
+               --examples-top-level=examples \
+               --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap"
+
+        python3 tools/regression-tests/run_tests.py \
+               --lmp-bin=build/lmp \
+               --config-file=tools/regression-tests/config_kokkos_openmp.yaml \
+               --list-input=input-list-${{ matrix.idx }}-kk.txt \
+               --output-file=output-${{ matrix.idx }}.xml \
+               --progress-file=progress-${{ matrix.idx }}.yaml \
+               --log-file=run-${{ matrix.idx }}.log \
+               --quick-max=100 --verbose
+
+        tar -cvf kokkos-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml
+
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: kokkos-regression-test-artifact-${{ matrix.idx }}
+        path: kokkos-regression-test-${{ matrix.idx }}.tar
+
+  merge:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Merge Artifacts
+        uses: actions/upload-artifact/merge@v4
+        with:
+          name: merged-kokkos-regresssion-artifact
+          pattern: kokkos-regression-test-artifact-*
--- a/examples/threebody/log.08Oct24.mos2.sw.mod.g++.4
+++ b/examples/threebody/log.08Oct24.mos2.sw.mod.g++.4
@ -0,0 +1,102 @@
+LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-512-g13c57ab9b5)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# monolayer MoS2
+units      	metal
+boundary   	p p f
+processors	* * 1
+
+atom_style 	atomic
+read_data       single_layer_MoS2.data
+Reading data file ...
+  triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0)
+WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:221)
+  2 by 2 by 1 MPI processor grid
+  reading atoms ...
+  768 atoms
+  read_data CPU = 0.003 seconds
+
+mass            * 32.065	# mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94		# mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      sw/mod maxdelcs 0.25 0.35
+pair_coeff      * * tmd.sw.mod Mo S S
+Reading sw potential file tmd.sw.mod with DATE: 2018-03-26
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.158796
+  ghost atom cutoff = 5.158796
+  binsize = 2.579398, bins = 30 18 78
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair sw/mod, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.165 | 3.165 | 3.165 Mbytes
+   Step         TotEng         PotEng         KinEng          Temp     
+         0  -899.28605     -929.02881      29.742759      300          
+       500  -899.28626     -922.45519      23.168929      233.69313    
+      1000  -899.29247     -925.86547      26.573002      268.02828    
+      1500  -899.27957     -916.95478      17.675214      178.28084    
+      2000  -899.28171     -918.38728      19.105573      192.70814    
+      2500  -899.28732     -922.50423      23.21691       234.17709    
+      3000  -899.28195     -918.74112      19.459174      196.27473    
+      3500  -899.27944     -918.03105      18.751604      189.13784    
+      4000  -899.28397     -920.50737      21.223397      214.06955    
+      4500  -899.28386     -919.79154      20.507685      206.85053    
+      5000  -899.28077     -918.78947      19.508698      196.77425    
+Loop time of 0.595509 on 4 procs for 5000 steps with 768 atoms
+
+Performance: 725.430 ns/day, 0.033 hours/ns, 8396.182 timesteps/s, 6.448 Matom-step/s
+99.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.4603     | 0.49732    | 0.54269    |   4.2 | 83.51
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.03293    | 0.078347   | 0.11558    |  10.6 | 13.16
+Output  | 0.00010079 | 0.00010935 | 0.00012827 |   0.0 |  0.02
+Modify  | 0.0073413  | 0.0082665  | 0.0091767  |   0.7 |  1.39
+Other   |            | 0.01146    |            |       |  1.92
+
+Nlocal:            192 ave         194 max         190 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:            194 ave         196 max         192 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:         5120 ave        5170 max        5070 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+
+Total # of neighbors = 20480
+Ave neighs/atom = 26.666667
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
--- a/tools/regression-tests/README
+++ b/tools/regression-tests/README
@ -106,7 +106,7 @@ An example of the test configuration `config.yaml` is given as below.

  ---
    lmp_binary: ""
-    nprocs: "4"
+    nprocs: ""
    args: "-cite none"
    mpiexec: "mpirun"
    mpiexec_numproc_flag: "-np"
@ -135,7 +135,8 @@ An example of the test configuration `config.yaml` is given as below.
          abs: 1e-2
          rel: 1e-4
    skip:
-      [ in.displ,
+      [ 
+        in.displ,
        in.displ2,
        in.*_imd*,
      ]
@ -144,6 +145,13 @@ An example of the test configuration `config.yaml` is given as below.
    epsilon: 1e-16
    timeout: 180

+Note that if nprocs is left empty "", as in the above example, the test for a given input script will use the maximum number of procs among the log files.
+For instance, for the input script examples/melt/in.melt, there are 2 log files with 1 and 4 procs. The test for in.melt will then run with 4 procs and the output is compare against the 4-proc log file.  This is the typical configuration.
+
+If nprocs is specified explicitly, for example, nprocs: "2", this value of nprocs will be used for ALL the input scripts (except for the valgrind test).
+In this case, the reference log file is again the one with the maximum number of procs, that is, 4. One example of this configuration is for KOKKOS tests where
+the runs are often `mpirun -np 2 lmp -in in.melt -k on g 2` (with the CUDA backend) or  `mpirun -np 2 lmp -in in.melt -k on t 2` (with the OpenMP backend).
+
 An example of the list of example subfolders in a text file `list_subfolders1.txt`

    /home/codes/lammps/examples/melt 1
--- a/tools/regression-tests/config_kokkos.yaml
+++ b/tools/regression-tests/config_kokkos.yaml
@ -1,33 +1,26 @@
 ---
  lmp_binary: ""
-  nprocs: "4"
-  args: "-cite none -k on g 1 -sf kk -pk kokkos newton on neigh half"
+  nprocs: "2"
+  args: "-cite none -k on g 2 -sf kk -pk kokkos newton on neigh half"
  mpiexec: "mpirun"
-  mpiexec_numproc_flag: "-np"
+  mpiexec_numproc_flag: "--host localhost:2 -np"
  tolerance:
    PotEng:
      abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
    TotEng:
      abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
    Press:
      abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
    Temp:
      abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
    E_vdwl:
      abs: 1e-3
-      rel: 1e-7
-  overrides:
-    in.rigid.tnr:
-      Temp:
-        abs: 1e-3
-        rel: 1e-5
-      Press:
-        abs: 1e-2
-        rel: 1e-4
+      rel: 1e-6

+  timeout: 120
  nugget: 1.0
  epsilon: 1e-16
--- a/tools/regression-tests/config_kokkos_openmp.yaml
+++ b/tools/regression-tests/config_kokkos_openmp.yaml
@ -0,0 +1,26 @@
+---
+  lmp_binary: ""
+  nprocs: "2"
+  args: "-cite none -k on t 2 -sf kk -pk kokkos newton on neigh half"
+  mpiexec: "mpirun"
+  mpiexec_numproc_flag: "--host localhost:2 -np"
+  tolerance:
+    PotEng:
+      abs: 1e-4
+      rel: 1e-6
+    TotEng:
+      abs: 1e-4
+      rel: 1e-6
+    Press:
+      abs: 1e-4
+      rel: 1e-6
+    Temp:
+      abs: 1e-4
+      rel: 1e-6
+    E_vdwl:
+      abs: 1e-3
+      rel: 1e-7
+
+  timeout: 120
+  nugget: 1.0
+  epsilon: 1e-16
--- a/tools/regression-tests/get_kokkos_input.py
+++ b/tools/regression-tests/get_kokkos_input.py
@ -0,0 +1,83 @@
+# This script looks for input scripts under examples/ that have pair/fix/compute styles with KOKKOS support
+# and print out separate sets of input lists into 4 files:
+#   input-list-pair-kk.txt
+#   input-list-fix-kk.txt
+#   input-list-compute-kk.txt
+#   input-list-misc-kk.txt
+# These 4 files will be read in by the regression tester run_tests.py
+
+from argparse import ArgumentParser
+import subprocess
+import sys
+
+# in_style = fix, pair, compute, angle, bond, dihedral, improper, min
+def generate_list(in_style, example_toplevel, filter_out, output_list):
+    
+    # find all the pair styles with the kokkos suffix 
+    cmd_str = f"ls {example_toplevel}/../src/KOKKOS | grep {in_style} | grep .cpp"
+    p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
+    kokkos_styles = p.stdout.split('\n')
+    style_names = []
+    for style in kokkos_styles:
+        if style != "":
+            # replace "{in_style}_[name]_kokkos.cpp" into "[name]"
+            style = style.replace(f"{in_style}_","")
+            style = style.replace("_kokkos.cpp","")
+            style = style.replace("_","/")
+            style_names.append(style)
+
+    for style in style_names:
+        # find in the in. script a line with "pair_style [name]"
+        if in_style == "pair" or in_style == "angle" or in_style == "bond" or in_style == "dihedral" or in_style == "improper":
+            cmd_str = f"grep -rl '{in_style}_style.*{style}' {example_toplevel}/*/in.* "
+        else:
+            # find in the in. script a line with "fix ... [name]" (or "compute ... [name]")
+            cmd_str = f"grep -rl '{in_style}.*{style}' {example_toplevel}/*/in.* "
+
+        p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
+        input_list = p.stdout.split('\n')
+        input_list = ' '.join(input_list).split()
+        for input in input_list:
+            if input != "":
+                skip = False
+                for filter in filter_out:
+                    if filter in input:
+                        skip = True
+                        break
+                if skip == True:
+                    continue
+                else:
+                    if input not in output_list:
+                        output_list.append(input)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level")
+    parser.add_argument("--filter-out", dest="filter_out", default="", help="Filter out input scripts that contain strings")
+
+    args = parser.parse_args()
+    example_toplevel = args.example_toplevel
+    filter_out = args.filter_out.split(";")
+
+    # print the list of the input scripts that has each feature to a separate file
+    features = [ 'pair', 'fix', 'compute' ]
+    for feature in features:
+        input_list = []
+        generate_list(feature, example_toplevel, filter_out, input_list)
+        with open(f"input-list-{feature}-kk.txt", "w") as f:
+            for input in input_list:
+                if input != "":
+                    f.write(f"{input}\n")
+
+    # combine the list of the input scripts that have these feature to a single file input-list-misc-kk.txt
+    features = [ 'angle', 'bond', 'dihedral', 'improper', 'min' ]
+    input_list = []
+    for feature in features:
+        generate_list(feature, example_toplevel, filter_out, input_list)
+
+    with open(f"input-list-misc-kk.txt", "w") as f:
+        for input in input_list:
+            if input != "":
+                f.write(f"{input}\n")
+
--- a/tools/regression-tests/run_tests.py
+++ b/tools/regression-tests/run_tests.py
@ -147,10 +147,13 @@ class TestResult:
 def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, failure_file, walltime_ref=1, verbose=False, last_progress=None, output_buf=None):

    num_tests = len(input_list)
+
+    num_skipped = 0 
+    num_error = 0
+    num_timeout = 0
+    num_failed = 0
    num_completed = 0
    num_passed = 0
-    num_skipped = 0
-    num_error = 0
    num_memleak = 0
    test_id = 0

@ -187,7 +190,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                msg = "   + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}"
                print(msg)
                logger.info(msg)
-                progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n")
+                progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'skipped', 'walltime': '{walltime}' }} }}\n")
                progress.close()
                num_skipped = num_skipped + 1
                test_id = test_id + 1
@ -205,7 +208,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                msg = "   + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}"
                print(msg)
                logger.info(msg)
-                progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n")
+                progress.write(f"{{ '{input}': {{ 'folder': {input_folder}, 'status': 'skipped', 'walltime': '{walltime}' }} }}\n")
                progress.close()
                num_skipped = num_skipped + 1
                test_id = test_id + 1
@ -300,12 +303,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file

        saved_nprocs = config['nprocs']

-        # if the maximum number of procs is different from the value in the configuration file
-        #      then override the setting for this particular input script
-        if max_np != int(config['nprocs']):
+        # if the nprocs value in the configuration file is empty then use max_np for this particular input script
+        if config['nprocs'] == "":
            config['nprocs'] = str(max_np)
+        else:
+            # otherwise use the nprocs value in the configuration file (4 for most examples)
+            logger.info(f"     Using {config['nprocs']} nprocs for {input_test} as enforced in the config file.")
+            logger.info(f"     WARNING: The maximum number of procs found from the reference log files is {max_np}.")

-        # store the value of nprocs
+        # store the value of nprocs to name the generated log file
        nprocs = int(config['nprocs'])

        # if valgrind is used for mem check, the run command will be
@ -322,7 +328,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
        result = TestResult(name=input, output="", time="", status="passed")

        # run the LAMMPS binary with the input script
-        cmd_str, output, error, returncode, logfilename = execute(lmp_binary, config, input_test)
+        status = execute(lmp_binary, config, input_test)
+        cmd_str = status['cmd_str']
+        output = status['stdout']
+        error = status['stderr']
+        returncode = status['returncode']
+        logfilename = status['logfilename']

        # restore the nprocs value in the configuration
        config['nprocs'] = saved_nprocs
@ -352,7 +363,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            results.append(result)
            print(f"{result.status}")

-            msg = f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime} }}\n"
+            msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}' }} }}\n"
            progress.write(msg)
            progress.close()
            failure.write(msg)
@ -369,7 +380,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            logger.info(f"    {output}")
            logger.info(f"    Error:\n{error}")

-            msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no log file generated\", walltime: {walltime} }}\n"
+            msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'failed, no log file generated', 'walltime': '{walltime}' }} }}\n"
            progress.write(msg)
            progress.close()
            failure.write(msg)
@ -399,7 +410,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            result.status = msg
            results.append(result)

-            msg = f"{input}: {{ folder: {input_folder}, status: \"{msg}\", walltime: {walltime} }}\n"
+            msg = f"{{ '{input}': {{ 'folder': '{input_folder}', status: \"{msg}\", 'walltime': '{walltime}' }} }}\n"
            progress.write(msg)
            progress.close()
            failure.write(msg)
@ -418,11 +429,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            logger.info(f"\n    Output:\n{output}")
            logger.info(f"\n    Error:\n{error}")

-            msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no Total wall time in the output, {error}\", walltime: {walltime} }}\n"
+            msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'failed, no Total wall time in the output, {error}', 'walltime': '{walltime}' }} }}\n"
            progress.write(msg)
            progress.close()
            failure.write(msg)

+            returncode = int(returncode)
+            if returncode == -1:
+                num_timeout = num_timeout + 1
+
            num_error = num_error + 1
            test_id = test_id + 1
            continue
@ -449,7 +464,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            logger.info(f"\n    Output:\n{output}")
            logger.info(f"\n    Error:\n{error}")

-            msg = f"{input}: {{ folder: {input_folder}, status: \"completed, but no Step nor Loop in the output.\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n"
+            msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, but no Step nor Loop in the output.', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n"
            progress.write(msg)
            progress.close()
            failure.write(msg)
@ -477,7 +492,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file

            result.status = msg + f", error parsing {logfilename} into YAML"
            results.append(result)
-            progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
            progress.close()

            if verbose == True:
@ -495,14 +510,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            if thermo_ref:
                num_runs_ref = len(thermo_ref)
            else:
-                # thhe thermo_ref dictionary is empty
+                # the thermo_ref dictionary is empty
                logger.info(f"    failed, error parsing the reference log file {thermo_ref_file}.")
                result.status = "skipped numerical checks due to parsing the reference log file"
                results.append(result)
-                progress.write(f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped, unsupported log file format\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+                progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, numerical checks skipped, unsupported log file format', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
                progress.close()
                num_completed = num_completed + 1
-                num_error = num_error + 1
+                num_failed = num_failed + 1
                test_id = test_id + 1
                continue
        else:
@ -521,12 +536,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                result.status = "skipped due to missing the reference log file"
                results.append(result)

-                msg = f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped due to missing the reference log file\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n"
+                msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, numerical checks skipped due to missing the reference log file', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n"
                progress.write(msg)
                progress.close()
                failure.write(msg)
                num_completed = num_completed + 1
-                num_error = num_error + 1
+                num_failed = num_failed + 1
                test_id = test_id + 1
                continue

@ -539,9 +554,10 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                        " Check README in the folder, possibly due to using mpirun with partitions or parsing the wrong reference log file.")
            result.status = "failed, incomplete runs"
            results.append(result)
-            progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
            progress.close()
-            num_error = num_error + 1
+            num_completed = num_completed + 1
+            num_failed = num_failed + 1
            test_id = test_id + 1
            continue

@ -555,9 +571,10 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            logger.info(f"     Check both log files for more details.")
            result.status = "failed, mismatched columns in the log files"
            results.append(result)
-            progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
            progress.close()
-            num_error = num_error + 1
+            num_completed = num_completed + 1
+            num_failed = num_failed + 1
            test_id = test_id + 1
            continue

@ -581,6 +598,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
        failed_rel_output = []
        num_checks = 0
        mismatched_columns = False
+        mismatched_num_steps = False

        for irun in range(num_runs):
            num_fields = len(thermo[irun]['keywords'])
@ -593,6 +611,13 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file

            # get the total number of the thermo output lines
            nthermo_steps = len(thermo[irun]['data'])
+            nthermo_steps_ref = len(thermo_ref[irun]['data'])
+
+            if nthermo_steps_ref != nthermo_steps:
+                logger.info(f"     failed: Number of thermo steps in {logfilename} ({nthermo_steps})")
+                logger.info(f"     is different from that in the reference log ({nthermo_steps_ref}) in run {irun}.")
+                mismatched_num_steps = True   
+                continue

            # get the output at the last timestep
            thermo_step = nthermo_steps - 1
@ -644,19 +669,41 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                    print(f"        {thermo[irun]['keywords'][i].ljust(width)} {str(val).rjust(20)} {str(ref).rjust(20)} {abs_diff_check.rjust(20)} {rel_diff_check.rjust(20)}")

        # after all runs completed, or are interrupted in one of the runs (mismatched_columns = True)
-
        if mismatched_columns == True:
-            msg = f"     mismatched log files after the first run. Check both log files for more details."
+            msg = f"     mismatched columns in the log files after the first run. Check both log files for more details."
            print(msg)
            logger.info(msg)
            result.status = "thermo checks failed due to mismatched log files after the first run"
+            results.append(result)
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
+            progress.close()
+            num_completed = num_completed + 1
+            num_failed = num_failed + 1
+            test_id = test_id + 1
+            continue
+
+        # some runs that involve the minimize command that leads to different number of steps vs the reference log file
+        if mismatched_num_steps == True:
+            msg = f"     mismatched num steps in the log files. Check both log files for more details."
+            print(msg)
+            logger.info(msg)
+            result.status = "thermo checks failed due to mismatched log files "
+            results.append(result)
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
+            progress.close()
+            num_completed = num_completed + 1
+            num_failed = num_failed + 1
+            test_id = test_id + 1
+            continue

        result.status = ""
        if num_abs_failed > 0:
            msg = f"     {num_abs_failed} abs diff checks failed."
            print(msg)
            logger.info(msg)
-            #result.status = f"abs_diff_failed: {num_abs_failed}, "
+            for out in failed_abs_output:
+                logger.info(f"        - {out}")
+
            if verbose == True:
                for out in failed_abs_output:
                    print(f"        - {out}")
@ -665,7 +712,9 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            msg = f"     {num_rel_failed} rel diff checks failed."
            print(msg)
            logger.info(msg)
-            #result.status += f"rel_diff_failed: {num_rel_failed}"
+            for out in failed_rel_output:
+                logger.info(f"        - {out}")
+            
            if verbose == True:
                for out in failed_rel_output:
                    print(f"        - {out}")
@ -674,12 +723,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
            msg = f"     all {num_checks} checks passed."
            print(msg)
            logger.info(msg)
-            #result.status = f"all {num_checks} checks passed."
+
+            result.status = f" 'status': 'passed', 'abs_diff_failed': '{num_abs_failed}', 'rel_diff_failed': '{num_rel_failed}' "
+
            num_passed = num_passed + 1
        else:
-            num_error = num_error + 1
+            result.status = f" 'status': 'failed', 'abs_diff_failed': '{num_abs_failed}', 'rel_diff_failed': '{num_rel_failed}' "
+            num_failed = num_failed + 1

-        result.status = f"abs_diff_failed: {num_abs_failed}, rel_diff_failed: {num_rel_failed}"
        results.append(result)

        # check if memleak detects from valgrind run (need to replace "mpirun" -> valgrind --leak-check=yes mpirun")
@ -691,12 +742,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                msg += ", memory leaks detected"
                num_memleak = num_memleak + 1

-        progress.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+        progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{msg}', 'failed_checks': {{ {result.status} }}, 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
        progress.close()

        # write to failure if there is any numerical failed check
        if num_abs_failed > 0 or num_rel_failed > 0:
-            failure.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n")
+            failure.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{msg}', 'failed_checks': '{{ {result.status} }}, 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")

        # count the number of completed runs
        num_completed = num_completed + 1
@ -709,6 +760,8 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
             'num_passed': num_passed,
             'num_skipped': num_skipped,
             'num_error': num_error,
+             'num_timeout': num_timeout,
+             'num_failed': num_failed,
             'num_memleak':  num_memleak,
           }
    return stat
@ -886,7 +939,14 @@ def execute(lmp_binary, config, input_file_name, generate_ref=False):

    try:
        p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True, timeout=timeout)
-        return cmd_str, p.stdout, p.stderr, p.returncode, logfilename
+        status = { 
+            'cmd_str': cmd_str,
+            'stdout': p.stdout,
+            'stderr': p.stderr,
+            'returncode': p.returncode,
+            'logfilename': logfilename,
+        }
+        return status

    except subprocess.TimeoutExpired:
        msg = f"     Timeout for: {cmd_str} ({timeout}s expired)"
@ -894,7 +954,14 @@ def execute(lmp_binary, config, input_file_name, generate_ref=False):
        print(msg)

    error_str = f"timeout ({timeout}s expired)"
-    return cmd_str, "", error_str, -1, logfilename
+    status = { 
+        'cmd_str': cmd_str,
+        'stdout': "",
+        'stderr': error_str,
+        'returncode': -1,
+        'logfilename': logfilename,
+    }
+    return status

 '''
   get the reference walltime by running the lmp_binary with config with an input script in the bench/ folder
@ -932,6 +999,7 @@ def get_reference_walltime(lmp_binary, config):
        logger.info(msg)
        print(msg)

+    looptime = 1.0
    for line in output.split('\n'):
        if "Total wall time" in line:
            walltime_str = line.split('time:')[1]
@ -940,6 +1008,14 @@ def get_reference_walltime(lmp_binary, config):
            minutes = float(hms[1])
            seconds = float(hms[2])
            walltime = hours * 3600.0 + minutes * 60.0 + seconds
+        if "Loop time" in line:
+            looptime_str = line.split(' ')[3]
+            seconds = float(looptime_str)
+            looptime = seconds
+
+    # there is case where total walltime with in.lj is reported as zero seconds, then use loop time
+    if float(walltime) < float(config['epsilon']):
+        walltime = looptime

    logger.info(f"     Reference walltime, sec = {walltime}")

@ -1104,7 +1180,7 @@ if __name__ == "__main__":
                        help="Determine which test inputs have commands changed between a branch and the head")
    parser.add_argument("--quick-branch", dest="quick_branch", default=quick_branch,
                        help="Branch to which compare the current head to for changed styles")
-    parser.add_argument("--quick-max", dest="quick_max", default=50,
+    parser.add_argument("--quick-max", dest="quick_max", default=0,
                        help="Maximum number of inputs to randomly select")
    parser.add_argument("--quick-reference", dest="quick_reference", default=quick_reference,
                        help="Reference YAML file with progress data from full regression test run")
@ -1351,6 +1427,13 @@ if __name__ == "__main__":
                        example_inputs.append(input)
                        num_inputscripts += 1

+            # allow to select randomly some input scripts at this point if quick_max is set
+            if quick_max > 0 and len(example_inputs) > quick_max:
+                example_inputs = random.sample(example_inputs, quick_max)
+                msg = "\nTesting " + str(quick_max) + " randomly selected inputs"
+                print(msg)
+                logger.info(msg)
+
            example_subfolders = folder_list
            msg = f"\nThere are {num_inputscripts} input scripts listed in {list_input}."
            print(msg)
@ -1437,6 +1520,8 @@ if __name__ == "__main__":
    passed_tests = 0
    skipped_tests = 0
    error_tests = 0
+    timeout_tests = 0
+    failed_tests = 0
    memleak_tests = 0

    # default setting is to use inplace_input
@ -1456,6 +1541,9 @@ if __name__ == "__main__":

        for directory in example_subfolders:

+            if os.path.exists(directory) is False:
+                continue
+
            # change to the directory where the input script and data files are located
            print("-"*80)
            print("Entering " + directory)
@ -1490,6 +1578,8 @@ if __name__ == "__main__":
            skipped_tests += stat['num_skipped']
            passed_tests += stat['num_passed']
            error_tests += stat['num_error']
+            timeout_tests += stat['num_timeout']
+            failed_tests += stat['num_failed']
            memleak_tests += stat['num_memleak']

            # append the results to the all_results list
@ -1509,26 +1599,34 @@ if __name__ == "__main__":
        skipped_tests = stat['num_skipped']
        passed_tests = stat['num_passed']
        error_tests = stat['num_error']
+        timeout_tests += stat['num_timeout']
+        failed_tests = stat['num_failed']
        memleak_tests = stat['num_memleak']

        all_results.extend(results)

-    # print out summary
+    # print out summary:
+    #  error_tests = number of runs that errored out
+    #  failed_tests = number of runs that failed the numerical checks, including missing the reference log files, different num runs and num steps in a run
+    #  completed_tests = number of runs that reached the end (Total wall time printed out) = failed_sests + passed_tests
+
    msg = "\nSummary:\n"
    msg += f"  Total number of input scripts: {total_tests}\n"
    msg += f"  - Skipped  : {skipped_tests}\n"
-    msg += f"  - Failed   : {error_tests}\n"
+    msg += f"  - Error    : {error_tests}\n"
+    msg += f"     - timeout  : {timeout_tests}\n"
    msg += f"  - Completed: {completed_tests}\n"
+    msg += f"     - failed   : {failed_tests}\n"

    # print notice to GitHub
    if 'GITHUB_STEP_SUMMARY' in os.environ:
        with open(os.environ.get('GITHUB_STEP_SUMMARY'), 'w') as f:
-            print(f"Skipped: {skipped_tests}  Failed: {error_tests}  Completed: {completed_tests}", file=f)
+            print(f"Skipped: {skipped_tests}  Error: {error_tests} Timeout: {timeout_tests} Failed: {failed_tests}  Completed: {completed_tests}", file=f)

    if memleak_tests < completed_tests and 'valgrind' in config['mpiexec']:
-        msg += f"    - memory leak detected  : {memleak_tests}\n"
+        msg += f"     - memory leak detected  : {memleak_tests}\n"
    if passed_tests <= completed_tests:
-        msg += f"    - numerical tests passed: {passed_tests}\n"
+        msg += f"     - numerical tests passed: {passed_tests}\n"
    msg += "\nOutput:\n"
    msg += f"  - List of failed inputs         : {failure_file}\n"
    msg += f"  - Status of the tested inputs   : {progress_file}\n"
@ -1544,12 +1642,10 @@ if __name__ == "__main__":
        for result in all_results:
            #print(f"{result.name}: {result.status}")
            case = TestCase(name=result.name, classname=result.name)
-            if result.status == "failed":
-                case.add_failure_info(message="Actual values did not match expected ones.")
-            if result.status == "skipped":
+            if "passed" not in result.status:
+                case.add_failure_info(message=result.status)
+            if "skipped" in result.status:
                case.add_skipped_info(message="Test was skipped.")
-            if result.status == "error":
-                case.add_skipped_info(message="Test run had errors.")
            test_cases.append(case)

        current_timestamp = datetime.datetime.now()