Cleaned up and added check for "Total wall time" in the output
This commit is contained in:
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
'''
|
'''
|
||||||
UPDATE: July 26, 2024:
|
UPDATE: August 13, 2024:
|
||||||
Launching the LAMMPS binary under testing using a configuration defined in a yaml file (e.g. config.yaml).
|
Launching the LAMMPS binary under testing using a configuration defined in a yaml file (e.g. config.yaml).
|
||||||
Comparing the output thermo with that in the existing log file (with the same nprocs)
|
Comparing the output thermo with that in the existing log file (with the same nprocs)
|
||||||
+ data in the log files are extracted and converted into yaml data structure
|
+ data in the log files are extracted and converted into yaml data structure
|
||||||
@ -19,7 +19,7 @@ With the current features, users can:
|
|||||||
|
|
||||||
Limitations:
|
Limitations:
|
||||||
- input scripts use thermo style multi (e.g., examples/peptide) do not work with the expected thermo output format
|
- input scripts use thermo style multi (e.g., examples/peptide) do not work with the expected thermo output format
|
||||||
- input scripts that require partition runs (e.g. examples/neb) need a separate config file, e.g. "args: --partition 3x1"
|
- input scripts that require partition runs (e.g. examples/neb) need a separate config file, e.g. args: "--partition 3x1"
|
||||||
- testing accelerator packages (GPU, INTEL, KOKKOS, OPENMP) need separate config files, "args: -sf omp -pk omp 4"
|
- testing accelerator packages (GPU, INTEL, KOKKOS, OPENMP) need separate config files, "args: -sf omp -pk omp 4"
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
@ -60,16 +60,15 @@ Example usage:
|
|||||||
of run_tests.py simultaneously.
|
of run_tests.py simultaneously.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
from argparse import ArgumentParser
|
||||||
import datetime
|
import datetime
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
from argparse import ArgumentParser
|
#from multiprocessing import Pool
|
||||||
|
|
||||||
from multiprocessing import Pool
|
|
||||||
|
|
||||||
import logging
|
|
||||||
# need "pip install numpy pyyaml"
|
# need "pip install numpy pyyaml"
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import yaml
|
import yaml
|
||||||
@ -82,6 +81,9 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from yaml import SafeLoader as Loader
|
from yaml import SafeLoader as Loader
|
||||||
|
|
||||||
|
'''
|
||||||
|
data structure to store the test result
|
||||||
|
'''
|
||||||
class TestResult:
|
class TestResult:
|
||||||
def __init__(self, name, output=None, time=None, checks=0, status=None):
|
def __init__(self, name, output=None, time=None, checks=0, status=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
@ -105,22 +107,6 @@ class TestResult:
|
|||||||
stat : a dictionary that lists the number of passed, skipped, failed tests
|
stat : a dictionary that lists the number of passed, skipped, failed tests
|
||||||
progress_file: yaml file that stores the tested input script and status
|
progress_file: yaml file that stores the tested input script and status
|
||||||
last_progress: the dictionary that shows the status of the last tests
|
last_progress: the dictionary that shows the status of the last tests
|
||||||
|
|
||||||
NOTE:
|
|
||||||
To map a function to individual workers:
|
|
||||||
|
|
||||||
def func(input1, input2, output_buf):
|
|
||||||
# do smth
|
|
||||||
return result
|
|
||||||
|
|
||||||
# args is a list of num_workers tuples, each tuple contains the arguments passed to the function executed by a worker
|
|
||||||
args = []
|
|
||||||
for i in range(num_workers):
|
|
||||||
args.append((input1, input2, output_buf))
|
|
||||||
|
|
||||||
with Pool(num_workers) as pool:
|
|
||||||
results = pool.starmap(func, args)
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, last_progress=None, output_buf=None):
|
def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, last_progress=None, output_buf=None):
|
||||||
|
|
||||||
@ -284,11 +270,13 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
|
|
||||||
# check if the output contains ERROR
|
# check if the output contains ERROR
|
||||||
if "ERROR" in output:
|
if "ERROR" in output:
|
||||||
cmd_str = "grep ERROR log.lammps"
|
error_line = ""
|
||||||
p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
|
for line in output:
|
||||||
error_line = p.stdout.split('\n')[0]
|
if "ERROR" in line:
|
||||||
|
error_line = line
|
||||||
|
break
|
||||||
logger.info(f" The run terminated with {input_test} gives the following output:")
|
logger.info(f" The run terminated with {input_test} gives the following output:")
|
||||||
logger.info(f" {error_line}")
|
logger.info(f" {error_line}")
|
||||||
if "Unrecognized" in output:
|
if "Unrecognized" in output:
|
||||||
result.status = "error, unrecognized command, package not installed"
|
result.status = "error, unrecognized command, package not installed"
|
||||||
elif "Unknown" in output:
|
elif "Unknown" in output:
|
||||||
@ -309,6 +297,17 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# if there is no ERROR in the output, then there is something irregular in the run
|
# if there is no ERROR in the output, then there is something irregular in the run
|
||||||
|
if "Total wall time" not in output:
|
||||||
|
logger.info(f" ERROR: no Total wall time in the output.\n")
|
||||||
|
logger.info(f"\n{input_test}:")
|
||||||
|
logger.info(f"\n Output:\n{output}")
|
||||||
|
logger.info(f"\n Error:\n{error}")
|
||||||
|
progress.write(f"{input}: {{ folder: {input_folder}, status: \"error, no Total wall time in the output.\" }}\n")
|
||||||
|
progress.close()
|
||||||
|
num_error = num_error + 1
|
||||||
|
test_id = test_id + 1
|
||||||
|
continue
|
||||||
|
|
||||||
if "Step" not in output or "Loop" not in output:
|
if "Step" not in output or "Loop" not in output:
|
||||||
logger.info(f" ERROR: no Step nor Loop in the output.\n")
|
logger.info(f" ERROR: no Step nor Loop in the output.\n")
|
||||||
logger.info(f"\n{input_test}:")
|
logger.info(f"\n{input_test}:")
|
||||||
@ -332,11 +331,11 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
test_id = test_id + 1
|
test_id = test_id + 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# save a copy of the log file
|
# save a copy of the log file for further inspection
|
||||||
cmd_str = f"cp log.lammps log.{basename}.{nprocs}"
|
cmd_str = f"cp log.lammps log.{basename}.{nprocs}"
|
||||||
p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
|
p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
|
||||||
|
|
||||||
# process thermo output in log.lammps from the run
|
# parse thermo output in log.lammps from the run
|
||||||
thermo = extract_data_to_yaml("log.lammps")
|
thermo = extract_data_to_yaml("log.lammps")
|
||||||
num_runs = len(thermo)
|
num_runs = len(thermo)
|
||||||
|
|
||||||
@ -365,10 +364,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
# At this point, the run completed without trivial errors, proceed with numerical checks
|
# At this point, the run completed without trivial errors, proceed with numerical checks
|
||||||
# check if there is a reference log file for this input
|
# check if there is a reference log file for this input
|
||||||
if logfile_exist:
|
if logfile_exist:
|
||||||
|
# parse the thermo output in reference log file
|
||||||
thermo_ref = extract_data_to_yaml(thermo_ref_file)
|
thermo_ref = extract_data_to_yaml(thermo_ref_file)
|
||||||
if thermo_ref:
|
if thermo_ref:
|
||||||
num_runs_ref = len(thermo_ref)
|
num_runs_ref = len(thermo_ref)
|
||||||
else:
|
else:
|
||||||
|
# dictionary is empty
|
||||||
logger.info(f" ERROR: Error parsing the reference log file {thermo_ref_file}.")
|
logger.info(f" ERROR: Error parsing the reference log file {thermo_ref_file}.")
|
||||||
result.status = "skipped numerical checks due to parsing the reference log file"
|
result.status = "skipped numerical checks due to parsing the reference log file"
|
||||||
results.append(result)
|
results.append(result)
|
||||||
@ -381,13 +382,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
msg = f" Cannot find the reference log file for {input_test} with the expected format log.[date].{basename}.*.[nprocs]"
|
msg = f" Cannot find the reference log file for {input_test} with the expected format log.[date].{basename}.*.[nprocs]"
|
||||||
logger.info(msg)
|
logger.info(msg)
|
||||||
print(msg)
|
print(msg)
|
||||||
# try to read in the thermo yaml output from the working directory
|
# attempt to read in the thermo yaml output from the working directory (the following section will be deprecated)
|
||||||
thermo_ref_file = 'thermo.' + input + '.yaml'
|
thermo_ref_file = 'thermo.' + input + '.yaml'
|
||||||
file_exist = os.path.isfile(thermo_ref_file)
|
file_exist = os.path.isfile(thermo_ref_file)
|
||||||
if file_exist == True:
|
if file_exist == True:
|
||||||
thermo_ref = extract_thermo(thermo_ref_file)
|
thermo_ref = extract_thermo(thermo_ref_file)
|
||||||
num_runs_ref = len(thermo_ref)
|
num_runs_ref = len(thermo_ref)
|
||||||
else:
|
else:
|
||||||
|
# mostly will come to here if the reference log file does not exist
|
||||||
logger.info(f" {thermo_ref_file} also does not exist in the working directory.")
|
logger.info(f" {thermo_ref_file} also does not exist in the working directory.")
|
||||||
result.status = "skipped due to missing the reference log file"
|
result.status = "skipped due to missing the reference log file"
|
||||||
results.append(result)
|
results.append(result)
|
||||||
@ -400,6 +402,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
logger.info(f" Comparing thermo output from log.lammps against the reference log file {thermo_ref_file}")
|
logger.info(f" Comparing thermo output from log.lammps against the reference log file {thermo_ref_file}")
|
||||||
|
|
||||||
# check if the number of runs matches with that in the reference log file
|
# check if the number of runs matches with that in the reference log file
|
||||||
|
# maybe due to some changes to the input where the ref log file is not updated yet
|
||||||
if num_runs != num_runs_ref:
|
if num_runs != num_runs_ref:
|
||||||
logger.info(f" ERROR: Number of runs in log.lammps ({num_runs}) is different from that in the reference log ({num_runs_ref})."
|
logger.info(f" ERROR: Number of runs in log.lammps ({num_runs}) is different from that in the reference log ({num_runs_ref})."
|
||||||
" Check README in the folder, possibly due to using mpirun with partitions or parsing the wrong reference log file.")
|
" Check README in the folder, possibly due to using mpirun with partitions or parsing the wrong reference log file.")
|
||||||
@ -411,7 +414,9 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
test_id = test_id + 1
|
test_id = test_id + 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# check if the number of fields match with that in the reference log file in the first run for early exit
|
# check if the number of fields match with that in the reference log file in the first run
|
||||||
|
# due to some changes to the input where the ref log file is not updated yet
|
||||||
|
# for early exit
|
||||||
num_fields = len(thermo[0]['keywords'])
|
num_fields = len(thermo[0]['keywords'])
|
||||||
num_fields_ref = len(thermo_ref[0]['keywords'])
|
num_fields_ref = len(thermo_ref[0]['keywords'])
|
||||||
if num_fields != num_fields_ref:
|
if num_fields != num_fields_ref:
|
||||||
@ -543,7 +548,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file
|
|||||||
# check if memleak detects from valgrind run (need to replace "mpirun" -> valgrind --leak-check=yes mpirun")
|
# check if memleak detects from valgrind run (need to replace "mpirun" -> valgrind --leak-check=yes mpirun")
|
||||||
msg = "completed"
|
msg = "completed"
|
||||||
if 'valgrind' in config['mpiexec']:
|
if 'valgrind' in config['mpiexec']:
|
||||||
if "All heap blocks were free" in error:
|
if "All heap blocks were freed" in error:
|
||||||
msg += ", no memory leak"
|
msg += ", no memory leak"
|
||||||
else:
|
else:
|
||||||
msg += ", memory leaks detected"
|
msg += ", memory leaks detected"
|
||||||
@ -705,6 +710,21 @@ def execute(lmp_binary, config, input_file_name, generate_ref_yaml=False):
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
split a list into a list of N sublists
|
split a list into a list of N sublists
|
||||||
|
|
||||||
|
NOTE:
|
||||||
|
To map a function to individual workers with multiprocessing.Pool:
|
||||||
|
|
||||||
|
def func(input1, input2, output_buf):
|
||||||
|
# do smth
|
||||||
|
return result
|
||||||
|
|
||||||
|
# args is a list of num_workers tuples, each tuple contains the arguments passed to the function executed by a worker
|
||||||
|
args = []
|
||||||
|
for i in range(num_workers):
|
||||||
|
args.append((input1, input2, output_buf))
|
||||||
|
|
||||||
|
with Pool(num_workers) as pool:
|
||||||
|
results = pool.starmap(func, args)
|
||||||
'''
|
'''
|
||||||
def divide_into_N(original_list, N):
|
def divide_into_N(original_list, N):
|
||||||
size = np.ceil(len(original_list) / N)
|
size = np.ceil(len(original_list) / N)
|
||||||
@ -1002,7 +1022,6 @@ if __name__ == "__main__":
|
|||||||
with Pool(num_workers) as pool:
|
with Pool(num_workers) as pool:
|
||||||
results = pool.starmap(func, args)
|
results = pool.starmap(func, args)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
for directory in example_subfolders:
|
for directory in example_subfolders:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user