diff --git a/.github/workflows/kokkos-regression.yaml b/.github/workflows/kokkos-regression.yaml new file mode 100644 index 0000000000..0756b080b0 --- /dev/null +++ b/.github/workflows/kokkos-regression.yaml @@ -0,0 +1,124 @@ +# GitHub action to build LAMMPS on Linux and run selected regression tests +name: "Kokkos OpenMP Regression Test" + +on: + pull_request: + branches: + - develop + + workflow_dispatch: + +jobs: + build: + name: Build LAMMPS with Kokkos OpenMP + # restrict to official LAMMPS repository + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + strategy: + max-parallel: 4 + matrix: + idx: [ 'pair', 'fix', 'compute', 'misc' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + show-progress: false + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache ninja-build libeigen3-dev \ + libcurl4-openssl-dev python3-dev \ + mpi-default-bin mpi-default-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-kokkos-ccache-${{ github.sha }} + restore-keys: linux-kokkos-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install --upgrade pip + python3 -m pip install numpy pyyaml junit_xml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/basic.cmake \ + -C cmake/presets/kokkos-openmp.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=off \ + -D DOWNLOAD_POTENTIALS=off \ + -D PKG_AMOEBA=on \ + -D PKG_ASPHERE=on \ + -D PKG_BROWNIAN=on \ + -D PKG_CLASS2=on \ + -D PKG_COLLOID=on \ + -D PKG_CORESHELL=on \ + -D PKG_DIPOLE=on \ + -D PKG_DPD-BASIC=on \ + -D PKG_EXTRA-COMPUTE=on \ + -D PKG_EXTRA-FIX=on \ + -D PKG_EXTRA-MOLECULE=on \ + -D PKG_EXTRA-PAIR=on \ + -D PKG_GRANULAR=on \ + -D PKG_LEPTON=on \ + -D PKG_MC=on \ + -D PKG_MEAM=on \ + -D PKG_POEMS=on \ + -D PKG_PYTHON=on \ + -D PKG_QEQ=on \ + -D PKG_REAXFF=on \ + -D PKG_REPLICA=on \ + -D PKG_SRD=on \ + -D PKG_VORONOI=on \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Regression Tests for Selected Examples + shell: bash + run: | + source linuxenv/bin/activate + python3 tools/regression-tests/get_kokkos_input.py \ + --examples-top-level=examples \ + --filter-out="balance;fire;gcmc;granregion;mdi;mliap;neb;pace;prd;pour;python;snap" + + python3 tools/regression-tests/run_tests.py \ + --lmp-bin=build/lmp \ + --config-file=tools/regression-tests/config_kokkos_openmp.yaml \ + --list-input=input-list-${{ matrix.idx }}-kk.txt \ + --output-file=output-${{ matrix.idx }}.xml \ + --progress-file=progress-${{ matrix.idx }}.yaml \ + --log-file=run-${{ matrix.idx }}.log \ + --quick-max=100 --verbose + + tar -cvf kokkos-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: kokkos-regression-test-artifact-${{ matrix.idx }} + path: kokkos-regression-test-${{ matrix.idx }}.tar + + merge: + runs-on: ubuntu-latest + needs: build + steps: + - name: Merge Artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: merged-kokkos-regresssion-artifact + pattern: kokkos-regression-test-artifact-* diff --git a/examples/threebody/log.08Oct24.mos2.sw.mod.g++.4 b/examples/threebody/log.08Oct24.mos2.sw.mod.g++.4 new file mode 100644 index 0000000000..f0941f2532 --- /dev/null +++ b/examples/threebody/log.08Oct24.mos2.sw.mod.g++.4 @@ -0,0 +1,102 @@ +LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-512-g13c57ab9b5) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# monolayer MoS2 +units metal +boundary p p f +processors * * 1 + +atom_style atomic +read_data single_layer_MoS2.data +Reading data file ... + triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0) +WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:221) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 768 atoms + read_data CPU = 0.003 seconds + +mass * 32.065 # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg +mass 1 95.94 # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg + +########################## Define potentials ################################ +pair_style sw/mod maxdelcs 0.25 0.35 +pair_coeff * * tmd.sw.mod Mo S S +Reading sw potential file tmd.sw.mod with DATE: 2018-03-26 +######################################################################### + +### Simulation settings #### +timestep 0.001 +velocity all create 300.0 12345 + +############################ + +# Output +thermo 500 +thermo_style custom step etotal pe ke temp +thermo_modify lost warn + +###### Run molecular dynamics ###### +fix thermostat all nve +run 5000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 5.158796 + ghost atom cutoff = 5.158796 + binsize = 2.579398, bins = 30 18 78 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair sw/mod, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.165 | 3.165 | 3.165 Mbytes + Step TotEng PotEng KinEng Temp + 0 -899.28605 -929.02881 29.742759 300 + 500 -899.28626 -922.45519 23.168929 233.69313 + 1000 -899.29247 -925.86547 26.573002 268.02828 + 1500 -899.27957 -916.95478 17.675214 178.28084 + 2000 -899.28171 -918.38728 19.105573 192.70814 + 2500 -899.28732 -922.50423 23.21691 234.17709 + 3000 -899.28195 -918.74112 19.459174 196.27473 + 3500 -899.27944 -918.03105 18.751604 189.13784 + 4000 -899.28397 -920.50737 21.223397 214.06955 + 4500 -899.28386 -919.79154 20.507685 206.85053 + 5000 -899.28077 -918.78947 19.508698 196.77425 +Loop time of 0.595509 on 4 procs for 5000 steps with 768 atoms + +Performance: 725.430 ns/day, 0.033 hours/ns, 8396.182 timesteps/s, 6.448 Matom-step/s +99.9% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.4603 | 0.49732 | 0.54269 | 4.2 | 83.51 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.03293 | 0.078347 | 0.11558 | 10.6 | 13.16 +Output | 0.00010079 | 0.00010935 | 0.00012827 | 0.0 | 0.02 +Modify | 0.0073413 | 0.0082665 | 0.0091767 | 0.7 | 1.39 +Other | | 0.01146 | | | 1.92 + +Nlocal: 192 ave 194 max 190 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Nghost: 194 ave 196 max 192 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 5120 ave 5170 max 5070 min +Histogram: 1 0 0 0 0 2 0 0 0 1 + +Total # of neighbors = 20480 +Ave neighs/atom = 26.666667 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/tools/regression-tests/README b/tools/regression-tests/README index 1342e50310..b698be6669 100644 --- a/tools/regression-tests/README +++ b/tools/regression-tests/README @@ -106,7 +106,7 @@ An example of the test configuration `config.yaml` is given as below. --- lmp_binary: "" - nprocs: "4" + nprocs: "" args: "-cite none" mpiexec: "mpirun" mpiexec_numproc_flag: "-np" @@ -135,7 +135,8 @@ An example of the test configuration `config.yaml` is given as below. abs: 1e-2 rel: 1e-4 skip: - [ in.displ, + [ + in.displ, in.displ2, in.*_imd*, ] @@ -144,6 +145,13 @@ An example of the test configuration `config.yaml` is given as below. epsilon: 1e-16 timeout: 180 +Note that if nprocs is left empty "", as in the above example, the test for a given input script will use the maximum number of procs among the log files. +For instance, for the input script examples/melt/in.melt, there are 2 log files with 1 and 4 procs. The test for in.melt will then run with 4 procs and the output is compare against the 4-proc log file. This is the typical configuration. + +If nprocs is specified explicitly, for example, nprocs: "2", this value of nprocs will be used for ALL the input scripts (except for the valgrind test). +In this case, the reference log file is again the one with the maximum number of procs, that is, 4. One example of this configuration is for KOKKOS tests where +the runs are often `mpirun -np 2 lmp -in in.melt -k on g 2` (with the CUDA backend) or `mpirun -np 2 lmp -in in.melt -k on t 2` (with the OpenMP backend). + An example of the list of example subfolders in a text file `list_subfolders1.txt` /home/codes/lammps/examples/melt 1 diff --git a/tools/regression-tests/config_kokkos.yaml b/tools/regression-tests/config_kokkos.yaml index 8c94e04071..455d1ad0dd 100644 --- a/tools/regression-tests/config_kokkos.yaml +++ b/tools/regression-tests/config_kokkos.yaml @@ -1,33 +1,26 @@ --- lmp_binary: "" - nprocs: "4" - args: "-cite none -k on g 1 -sf kk -pk kokkos newton on neigh half" + nprocs: "2" + args: "-cite none -k on g 2 -sf kk -pk kokkos newton on neigh half" mpiexec: "mpirun" - mpiexec_numproc_flag: "-np" + mpiexec_numproc_flag: "--host localhost:2 -np" tolerance: PotEng: abs: 1e-4 - rel: 1e-7 + rel: 1e-6 TotEng: abs: 1e-4 - rel: 1e-7 + rel: 1e-6 Press: abs: 1e-4 - rel: 1e-7 + rel: 1e-6 Temp: abs: 1e-4 - rel: 1e-7 + rel: 1e-6 E_vdwl: abs: 1e-3 - rel: 1e-7 - overrides: - in.rigid.tnr: - Temp: - abs: 1e-3 - rel: 1e-5 - Press: - abs: 1e-2 - rel: 1e-4 + rel: 1e-6 + timeout: 120 nugget: 1.0 epsilon: 1e-16 diff --git a/tools/regression-tests/config_kokkos_openmp.yaml b/tools/regression-tests/config_kokkos_openmp.yaml new file mode 100644 index 0000000000..1979d54b6a --- /dev/null +++ b/tools/regression-tests/config_kokkos_openmp.yaml @@ -0,0 +1,26 @@ +--- + lmp_binary: "" + nprocs: "2" + args: "-cite none -k on t 2 -sf kk -pk kokkos newton on neigh half" + mpiexec: "mpirun" + mpiexec_numproc_flag: "--host localhost:2 -np" + tolerance: + PotEng: + abs: 1e-4 + rel: 1e-6 + TotEng: + abs: 1e-4 + rel: 1e-6 + Press: + abs: 1e-4 + rel: 1e-6 + Temp: + abs: 1e-4 + rel: 1e-6 + E_vdwl: + abs: 1e-3 + rel: 1e-7 + + timeout: 120 + nugget: 1.0 + epsilon: 1e-16 diff --git a/tools/regression-tests/get_kokkos_input.py b/tools/regression-tests/get_kokkos_input.py new file mode 100644 index 0000000000..c03a813456 --- /dev/null +++ b/tools/regression-tests/get_kokkos_input.py @@ -0,0 +1,83 @@ +# This script looks for input scripts under examples/ that have pair/fix/compute styles with KOKKOS support +# and print out separate sets of input lists into 4 files: +# input-list-pair-kk.txt +# input-list-fix-kk.txt +# input-list-compute-kk.txt +# input-list-misc-kk.txt +# These 4 files will be read in by the regression tester run_tests.py + +from argparse import ArgumentParser +import subprocess +import sys + +# in_style = fix, pair, compute, angle, bond, dihedral, improper, min +def generate_list(in_style, example_toplevel, filter_out, output_list): + + # find all the pair styles with the kokkos suffix + cmd_str = f"ls {example_toplevel}/../src/KOKKOS | grep {in_style} | grep .cpp" + p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) + kokkos_styles = p.stdout.split('\n') + style_names = [] + for style in kokkos_styles: + if style != "": + # replace "{in_style}_[name]_kokkos.cpp" into "[name]" + style = style.replace(f"{in_style}_","") + style = style.replace("_kokkos.cpp","") + style = style.replace("_","/") + style_names.append(style) + + for style in style_names: + # find in the in. script a line with "pair_style [name]" + if in_style == "pair" or in_style == "angle" or in_style == "bond" or in_style == "dihedral" or in_style == "improper": + cmd_str = f"grep -rl '{in_style}_style.*{style}' {example_toplevel}/*/in.* " + else: + # find in the in. script a line with "fix ... [name]" (or "compute ... [name]") + cmd_str = f"grep -rl '{in_style}.*{style}' {example_toplevel}/*/in.* " + + p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) + input_list = p.stdout.split('\n') + input_list = ' '.join(input_list).split() + for input in input_list: + if input != "": + skip = False + for filter in filter_out: + if filter in input: + skip = True + break + if skip == True: + continue + else: + if input not in output_list: + output_list.append(input) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level") + parser.add_argument("--filter-out", dest="filter_out", default="", help="Filter out input scripts that contain strings") + + args = parser.parse_args() + example_toplevel = args.example_toplevel + filter_out = args.filter_out.split(";") + + # print the list of the input scripts that has each feature to a separate file + features = [ 'pair', 'fix', 'compute' ] + for feature in features: + input_list = [] + generate_list(feature, example_toplevel, filter_out, input_list) + with open(f"input-list-{feature}-kk.txt", "w") as f: + for input in input_list: + if input != "": + f.write(f"{input}\n") + + # combine the list of the input scripts that have these feature to a single file input-list-misc-kk.txt + features = [ 'angle', 'bond', 'dihedral', 'improper', 'min' ] + input_list = [] + for feature in features: + generate_list(feature, example_toplevel, filter_out, input_list) + + with open(f"input-list-misc-kk.txt", "w") as f: + for input in input_list: + if input != "": + f.write(f"{input}\n") + diff --git a/tools/regression-tests/run_tests.py b/tools/regression-tests/run_tests.py index 32b89bd27c..577ca211c0 100755 --- a/tools/regression-tests/run_tests.py +++ b/tools/regression-tests/run_tests.py @@ -147,10 +147,13 @@ class TestResult: def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, failure_file, walltime_ref=1, verbose=False, last_progress=None, output_buf=None): num_tests = len(input_list) + + num_skipped = 0 + num_error = 0 + num_timeout = 0 + num_failed = 0 num_completed = 0 num_passed = 0 - num_skipped = 0 - num_error = 0 num_memleak = 0 test_id = 0 @@ -187,7 +190,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg = " + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}" print(msg) logger.info(msg) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'skipped', 'walltime': '{walltime}' }} }}\n") progress.close() num_skipped = num_skipped + 1 test_id = test_id + 1 @@ -205,7 +208,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg = " + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}" print(msg) logger.info(msg) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': {input_folder}, 'status': 'skipped', 'walltime': '{walltime}' }} }}\n") progress.close() num_skipped = num_skipped + 1 test_id = test_id + 1 @@ -300,12 +303,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file saved_nprocs = config['nprocs'] - # if the maximum number of procs is different from the value in the configuration file - # then override the setting for this particular input script - if max_np != int(config['nprocs']): + # if the nprocs value in the configuration file is empty then use max_np for this particular input script + if config['nprocs'] == "": config['nprocs'] = str(max_np) + else: + # otherwise use the nprocs value in the configuration file (4 for most examples) + logger.info(f" Using {config['nprocs']} nprocs for {input_test} as enforced in the config file.") + logger.info(f" WARNING: The maximum number of procs found from the reference log files is {max_np}.") - # store the value of nprocs + # store the value of nprocs to name the generated log file nprocs = int(config['nprocs']) # if valgrind is used for mem check, the run command will be @@ -322,7 +328,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file result = TestResult(name=input, output="", time="", status="passed") # run the LAMMPS binary with the input script - cmd_str, output, error, returncode, logfilename = execute(lmp_binary, config, input_test) + status = execute(lmp_binary, config, input_test) + cmd_str = status['cmd_str'] + output = status['stdout'] + error = status['stderr'] + returncode = status['returncode'] + logfilename = status['logfilename'] # restore the nprocs value in the configuration config['nprocs'] = saved_nprocs @@ -352,7 +363,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file results.append(result) print(f"{result.status}") - msg = f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) @@ -369,7 +380,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file logger.info(f" {output}") logger.info(f" Error:\n{error}") - msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no log file generated\", walltime: {walltime} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'failed, no log file generated', 'walltime': '{walltime}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) @@ -399,7 +410,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file result.status = msg results.append(result) - msg = f"{input}: {{ folder: {input_folder}, status: \"{msg}\", walltime: {walltime} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', status: \"{msg}\", 'walltime': '{walltime}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) @@ -418,11 +429,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file logger.info(f"\n Output:\n{output}") logger.info(f"\n Error:\n{error}") - msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no Total wall time in the output, {error}\", walltime: {walltime} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'failed, no Total wall time in the output, {error}', 'walltime': '{walltime}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) + returncode = int(returncode) + if returncode == -1: + num_timeout = num_timeout + 1 + num_error = num_error + 1 test_id = test_id + 1 continue @@ -449,7 +464,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file logger.info(f"\n Output:\n{output}") logger.info(f"\n Error:\n{error}") - msg = f"{input}: {{ folder: {input_folder}, status: \"completed, but no Step nor Loop in the output.\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, but no Step nor Loop in the output.', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) @@ -477,7 +492,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file result.status = msg + f", error parsing {logfilename} into YAML" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") progress.close() if verbose == True: @@ -495,14 +510,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file if thermo_ref: num_runs_ref = len(thermo_ref) else: - # thhe thermo_ref dictionary is empty + # the thermo_ref dictionary is empty logger.info(f" failed, error parsing the reference log file {thermo_ref_file}.") result.status = "skipped numerical checks due to parsing the reference log file" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped, unsupported log file format\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, numerical checks skipped, unsupported log file format', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") progress.close() num_completed = num_completed + 1 - num_error = num_error + 1 + num_failed = num_failed + 1 test_id = test_id + 1 continue else: @@ -521,12 +536,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file result.status = "skipped due to missing the reference log file" results.append(result) - msg = f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped due to missing the reference log file\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n" + msg = f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': 'completed, numerical checks skipped due to missing the reference log file', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n" progress.write(msg) progress.close() failure.write(msg) num_completed = num_completed + 1 - num_error = num_error + 1 + num_failed = num_failed + 1 test_id = test_id + 1 continue @@ -539,9 +554,10 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file " Check README in the folder, possibly due to using mpirun with partitions or parsing the wrong reference log file.") result.status = "failed, incomplete runs" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") progress.close() - num_error = num_error + 1 + num_completed = num_completed + 1 + num_failed = num_failed + 1 test_id = test_id + 1 continue @@ -555,9 +571,10 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file logger.info(f" Check both log files for more details.") result.status = "failed, mismatched columns in the log files" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") progress.close() - num_error = num_error + 1 + num_completed = num_completed + 1 + num_failed = num_failed + 1 test_id = test_id + 1 continue @@ -581,6 +598,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file failed_rel_output = [] num_checks = 0 mismatched_columns = False + mismatched_num_steps = False for irun in range(num_runs): num_fields = len(thermo[irun]['keywords']) @@ -593,6 +611,13 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file # get the total number of the thermo output lines nthermo_steps = len(thermo[irun]['data']) + nthermo_steps_ref = len(thermo_ref[irun]['data']) + + if nthermo_steps_ref != nthermo_steps: + logger.info(f" failed: Number of thermo steps in {logfilename} ({nthermo_steps})") + logger.info(f" is different from that in the reference log ({nthermo_steps_ref}) in run {irun}.") + mismatched_num_steps = True + continue # get the output at the last timestep thermo_step = nthermo_steps - 1 @@ -644,19 +669,41 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file print(f" {thermo[irun]['keywords'][i].ljust(width)} {str(val).rjust(20)} {str(ref).rjust(20)} {abs_diff_check.rjust(20)} {rel_diff_check.rjust(20)}") # after all runs completed, or are interrupted in one of the runs (mismatched_columns = True) - if mismatched_columns == True: - msg = f" mismatched log files after the first run. Check both log files for more details." + msg = f" mismatched columns in the log files after the first run. Check both log files for more details." print(msg) logger.info(msg) result.status = "thermo checks failed due to mismatched log files after the first run" + results.append(result) + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") + progress.close() + num_completed = num_completed + 1 + num_failed = num_failed + 1 + test_id = test_id + 1 + continue + + # some runs that involve the minimize command that leads to different number of steps vs the reference log file + if mismatched_num_steps == True: + msg = f" mismatched num steps in the log files. Check both log files for more details." + print(msg) + logger.info(msg) + result.status = "thermo checks failed due to mismatched log files " + results.append(result) + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{result.status}', 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") + progress.close() + num_completed = num_completed + 1 + num_failed = num_failed + 1 + test_id = test_id + 1 + continue result.status = "" if num_abs_failed > 0: msg = f" {num_abs_failed} abs diff checks failed." print(msg) logger.info(msg) - #result.status = f"abs_diff_failed: {num_abs_failed}, " + for out in failed_abs_output: + logger.info(f" - {out}") + if verbose == True: for out in failed_abs_output: print(f" - {out}") @@ -665,7 +712,9 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg = f" {num_rel_failed} rel diff checks failed." print(msg) logger.info(msg) - #result.status += f"rel_diff_failed: {num_rel_failed}" + for out in failed_rel_output: + logger.info(f" - {out}") + if verbose == True: for out in failed_rel_output: print(f" - {out}") @@ -674,12 +723,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg = f" all {num_checks} checks passed." print(msg) logger.info(msg) - #result.status = f"all {num_checks} checks passed." + + result.status = f" 'status': 'passed', 'abs_diff_failed': '{num_abs_failed}', 'rel_diff_failed': '{num_rel_failed}' " + num_passed = num_passed + 1 else: - num_error = num_error + 1 + result.status = f" 'status': 'failed', 'abs_diff_failed': '{num_abs_failed}', 'rel_diff_failed': '{num_rel_failed}' " + num_failed = num_failed + 1 - result.status = f"abs_diff_failed: {num_abs_failed}, rel_diff_failed: {num_rel_failed}" results.append(result) # check if memleak detects from valgrind run (need to replace "mpirun" -> valgrind --leak-check=yes mpirun") @@ -691,12 +742,12 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg += ", memory leaks detected" num_memleak = num_memleak + 1 - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + progress.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{msg}', 'failed_checks': {{ {result.status} }}, 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") progress.close() # write to failure if there is any numerical failed check if num_abs_failed > 0 or num_rel_failed > 0: - failure.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + failure.write(f"{{ '{input}': {{ 'folder': '{input_folder}', 'status': '{msg}', 'failed_checks': '{{ {result.status} }}, 'walltime': '{walltime}', 'walltime_norm': '{walltime_norm}' }} }}\n") # count the number of completed runs num_completed = num_completed + 1 @@ -709,6 +760,8 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file 'num_passed': num_passed, 'num_skipped': num_skipped, 'num_error': num_error, + 'num_timeout': num_timeout, + 'num_failed': num_failed, 'num_memleak': num_memleak, } return stat @@ -886,7 +939,14 @@ def execute(lmp_binary, config, input_file_name, generate_ref=False): try: p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True, timeout=timeout) - return cmd_str, p.stdout, p.stderr, p.returncode, logfilename + status = { + 'cmd_str': cmd_str, + 'stdout': p.stdout, + 'stderr': p.stderr, + 'returncode': p.returncode, + 'logfilename': logfilename, + } + return status except subprocess.TimeoutExpired: msg = f" Timeout for: {cmd_str} ({timeout}s expired)" @@ -894,7 +954,14 @@ def execute(lmp_binary, config, input_file_name, generate_ref=False): print(msg) error_str = f"timeout ({timeout}s expired)" - return cmd_str, "", error_str, -1, logfilename + status = { + 'cmd_str': cmd_str, + 'stdout': "", + 'stderr': error_str, + 'returncode': -1, + 'logfilename': logfilename, + } + return status ''' get the reference walltime by running the lmp_binary with config with an input script in the bench/ folder @@ -932,6 +999,7 @@ def get_reference_walltime(lmp_binary, config): logger.info(msg) print(msg) + looptime = 1.0 for line in output.split('\n'): if "Total wall time" in line: walltime_str = line.split('time:')[1] @@ -940,6 +1008,14 @@ def get_reference_walltime(lmp_binary, config): minutes = float(hms[1]) seconds = float(hms[2]) walltime = hours * 3600.0 + minutes * 60.0 + seconds + if "Loop time" in line: + looptime_str = line.split(' ')[3] + seconds = float(looptime_str) + looptime = seconds + + # there is case where total walltime with in.lj is reported as zero seconds, then use loop time + if float(walltime) < float(config['epsilon']): + walltime = looptime logger.info(f" Reference walltime, sec = {walltime}") @@ -1104,7 +1180,7 @@ if __name__ == "__main__": help="Determine which test inputs have commands changed between a branch and the head") parser.add_argument("--quick-branch", dest="quick_branch", default=quick_branch, help="Branch to which compare the current head to for changed styles") - parser.add_argument("--quick-max", dest="quick_max", default=50, + parser.add_argument("--quick-max", dest="quick_max", default=0, help="Maximum number of inputs to randomly select") parser.add_argument("--quick-reference", dest="quick_reference", default=quick_reference, help="Reference YAML file with progress data from full regression test run") @@ -1351,6 +1427,13 @@ if __name__ == "__main__": example_inputs.append(input) num_inputscripts += 1 + # allow to select randomly some input scripts at this point if quick_max is set + if quick_max > 0 and len(example_inputs) > quick_max: + example_inputs = random.sample(example_inputs, quick_max) + msg = "\nTesting " + str(quick_max) + " randomly selected inputs" + print(msg) + logger.info(msg) + example_subfolders = folder_list msg = f"\nThere are {num_inputscripts} input scripts listed in {list_input}." print(msg) @@ -1437,6 +1520,8 @@ if __name__ == "__main__": passed_tests = 0 skipped_tests = 0 error_tests = 0 + timeout_tests = 0 + failed_tests = 0 memleak_tests = 0 # default setting is to use inplace_input @@ -1456,6 +1541,9 @@ if __name__ == "__main__": for directory in example_subfolders: + if os.path.exists(directory) is False: + continue + # change to the directory where the input script and data files are located print("-"*80) print("Entering " + directory) @@ -1490,6 +1578,8 @@ if __name__ == "__main__": skipped_tests += stat['num_skipped'] passed_tests += stat['num_passed'] error_tests += stat['num_error'] + timeout_tests += stat['num_timeout'] + failed_tests += stat['num_failed'] memleak_tests += stat['num_memleak'] # append the results to the all_results list @@ -1509,26 +1599,34 @@ if __name__ == "__main__": skipped_tests = stat['num_skipped'] passed_tests = stat['num_passed'] error_tests = stat['num_error'] + timeout_tests += stat['num_timeout'] + failed_tests = stat['num_failed'] memleak_tests = stat['num_memleak'] all_results.extend(results) - # print out summary + # print out summary: + # error_tests = number of runs that errored out + # failed_tests = number of runs that failed the numerical checks, including missing the reference log files, different num runs and num steps in a run + # completed_tests = number of runs that reached the end (Total wall time printed out) = failed_sests + passed_tests + msg = "\nSummary:\n" msg += f" Total number of input scripts: {total_tests}\n" msg += f" - Skipped : {skipped_tests}\n" - msg += f" - Failed : {error_tests}\n" + msg += f" - Error : {error_tests}\n" + msg += f" - timeout : {timeout_tests}\n" msg += f" - Completed: {completed_tests}\n" + msg += f" - failed : {failed_tests}\n" # print notice to GitHub if 'GITHUB_STEP_SUMMARY' in os.environ: with open(os.environ.get('GITHUB_STEP_SUMMARY'), 'w') as f: - print(f"Skipped: {skipped_tests} Failed: {error_tests} Completed: {completed_tests}", file=f) + print(f"Skipped: {skipped_tests} Error: {error_tests} Timeout: {timeout_tests} Failed: {failed_tests} Completed: {completed_tests}", file=f) if memleak_tests < completed_tests and 'valgrind' in config['mpiexec']: - msg += f" - memory leak detected : {memleak_tests}\n" + msg += f" - memory leak detected : {memleak_tests}\n" if passed_tests <= completed_tests: - msg += f" - numerical tests passed: {passed_tests}\n" + msg += f" - numerical tests passed: {passed_tests}\n" msg += "\nOutput:\n" msg += f" - List of failed inputs : {failure_file}\n" msg += f" - Status of the tested inputs : {progress_file}\n" @@ -1544,12 +1642,10 @@ if __name__ == "__main__": for result in all_results: #print(f"{result.name}: {result.status}") case = TestCase(name=result.name, classname=result.name) - if result.status == "failed": - case.add_failure_info(message="Actual values did not match expected ones.") - if result.status == "skipped": + if "passed" not in result.status: + case.add_failure_info(message=result.status) + if "skipped" in result.status: case.add_skipped_info(message="Test was skipped.") - if result.status == "error": - case.add_skipped_info(message="Test run had errors.") test_cases.append(case) current_timestamp = datetime.datetime.now()