From c533ca0af12b868477ed8c83718c1fdf6a239354 Mon Sep 17 00:00:00 2001
From: Trung Nguyen <ndactrung@gmail.com>
Date: Mon, 14 Oct 2024 16:05:21 -0500
Subject: [PATCH] handle the cases where num steps do not match with the
 reference log files

---
 .github/workflows/kokkos-regression.yaml      |  2 +-
 tools/regression-tests/config_kokkos.yaml     | 23 ++++----------
 .../config_kokkos_openmp.yaml                 |  6 ----
 tools/regression-tests/run_tests.py           | 30 +++++++++++++++++--
 4 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml
index 4a202b664f..1670fc4e8c 100644
--- a/.github/workflows/kokkos-regression.yaml
+++ b/.github/workflows/kokkos-regression.yaml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        idx: [ 'pair', 'fix', 'compute', 'misc' ]
+        idx: [ 'pair', 'compute', 'misc' ]
 
     steps:
     - name: Checkout repository
diff --git a/tools/regression-tests/config_kokkos.yaml b/tools/regression-tests/config_kokkos.yaml
index bfb67793da..455d1ad0dd 100644
--- a/tools/regression-tests/config_kokkos.yaml
+++ b/tools/regression-tests/config_kokkos.yaml
@@ -7,32 +7,19 @@
   tolerance:
     PotEng:
       abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
     TotEng:
       abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
     Press:
       abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
     Temp:
       abs: 1e-4
-      rel: 1e-7
+      rel: 1e-6
     E_vdwl:
       abs: 1e-3
-      rel: 1e-7
-  overrides:
-    in.rigid.tnr:
-      Temp:
-        abs: 1e-3
-        rel: 1e-5
-      Press:
-        abs: 1e-2
-        rel: 1e-4
-  skip:
-    [
-      in.min.box,
-      in.icos,
-    ]
+      rel: 1e-6
 
   timeout: 120
   nugget: 1.0
diff --git a/tools/regression-tests/config_kokkos_openmp.yaml b/tools/regression-tests/config_kokkos_openmp.yaml
index 8df487b1bb..1979d54b6a 100644
--- a/tools/regression-tests/config_kokkos_openmp.yaml
+++ b/tools/regression-tests/config_kokkos_openmp.yaml
@@ -21,12 +21,6 @@
       abs: 1e-3
       rel: 1e-7
 
-  skip:
-    [
-      in.min.box,
-      in.icos,
-    ]
-
   timeout: 120
   nugget: 1.0
   epsilon: 1e-16
diff --git a/tools/regression-tests/run_tests.py b/tools/regression-tests/run_tests.py
index 5be3b6cd2f..cb418c98da 100755
--- a/tools/regression-tests/run_tests.py
+++ b/tools/regression-tests/run_tests.py
@@ -584,6 +584,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
         failed_rel_output = []
         num_checks = 0
         mismatched_columns = False
+        mismatched_num_steps = False
 
         for irun in range(num_runs):
             num_fields = len(thermo[irun]['keywords'])
@@ -596,6 +597,13 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
 
             # get the total number of the thermo output lines
             nthermo_steps = len(thermo[irun]['data'])
+            nthermo_steps_ref = len(thermo_ref[irun]['data'])
+
+            if nthermo_steps_ref != nthermo_steps:
+                logger.info(f"     failed: Number of thermo steps in {logfilename} ({nthermo_steps})")
+                logger.info(f"     is different from that in the reference log ({nthermo_steps_ref}) in run {irun}.")
+                mismatched_num_steps = True   
+                continue
 
             # get the output at the last timestep
             thermo_step = nthermo_steps - 1
@@ -647,12 +655,30 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
                     print(f"        {thermo[irun]['keywords'][i].ljust(width)} {str(val).rjust(20)} {str(ref).rjust(20)} {abs_diff_check.rjust(20)} {rel_diff_check.rjust(20)}")
 
         # after all runs completed, or are interrupted in one of the runs (mismatched_columns = True)
-
         if mismatched_columns == True:
-            msg = f"     mismatched log files after the first run. Check both log files for more details."
+            msg = f"     mismatched columns in the log files after the first run. Check both log files for more details."
             print(msg)
             logger.info(msg)
             result.status = "thermo checks failed due to mismatched log files after the first run"
+            results.append(result)
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
+            progress.close()
+            num_error = num_error + 1
+            test_id = test_id + 1
+            continue
+
+        # some runs that involve the minimize command that leads to different number of steps vs the reference log file
+        if mismatched_num_steps == True:
+            msg = f"     mismatched num steps in the log files. Check both log files for more details."
+            print(msg)
+            logger.info(msg)
+            result.status = "thermo checks failed due to mismatched log files "
+            results.append(result)
+            progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n")
+            progress.close()
+            num_error = num_error + 1
+            test_id = test_id + 1
+            continue
 
         result.status = ""
         if num_abs_failed > 0: