Updated the regression tester run_tests.py to handle list of input scripts

2024-08-25 16:02:16 -05:00
parent 149ae74631
commit 04400e10a8
4 changed files with 119 additions and 66 deletions
--- a/.github/workflows/full-regression.yml
+++ b/.github/workflows/full-regression.yml
@ -67,7 +67,7 @@ jobs:
        cmake --build build
        ccache -s
-    - name: Analyze top-level examples folder, split into 8 seperate subfolder lists
+    - name: Analyze top-level examples folder, split into 8 seperate lists of input scripts
      shell: bash
      run: |
        source linuxenv/bin/activate
@ -86,10 +86,10 @@ jobs:
    strategy:
      max-parallel: 2
      matrix:
-        idx: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
+        idx: [ 0, 1 ]
    steps:
-    - name: Run regression tests with 8 workers each processing a list of subfolders
+    - name: Run regression tests
      run: |
        source linuxenv/bin/activate
        python3 tools/regression-tests/run_tests.py \
--- a/.github/workflows/quick-regression.yml
+++ b/.github/workflows/quick-regression.yml
@ -78,7 +78,7 @@ jobs:
        python3 tools/regression-tests/run_tests.py \
               --lmp-bin=build/lmp \
               --config-file=tools/regression-tests/config_serial.yaml \
-               --list-input=folder_list.txt
+               --list-input=input_list.txt
        tar -cvf quick-regression-test.tar run.log progress.yaml
    - name: Upload artifacts
--- a/tools/regression-tests/get-quick-list.py
+++ b/tools/regression-tests/get-quick-list.py
@ -259,28 +259,12 @@ if __name__ == "__main__":
    if regex:
        inputs = get_examples_using_styles(regex, os.path.join(LAMMPS_DIR,'examples'))
        # TODO: modify the regression tester tool to process the raw list of input scripts
        folder_list = []
        print("Suggested inputs for testing:")
        # input_list.txt is used for the regression tester tool
        with open('input_list.txt', 'w') as f:
            for inp in inputs:
                print(inp)
-
+                f.write(inp + '\n')
            # get the folder that contains the input script
            full_path = str(inp)
            folder = full_path.rsplit('/', 1)[0]
            # add unique folders in the list
            if folder not in folder_list:
                folder_list.append(folder)
        # input_list.txt is used for the regression tester tool
        # that lists the individual subfolders and the number of input scripts therein
        with open('folder_list.txt', 'w') as f:
            for folder in folder_list:
                cmd_str = f"ls {folder}/in.* | wc -l"
                p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
                num_input = p.stdout.split('\n')[0]
                f.write(folder + ' ' + num_input + '\n')
    print("Found changes to the following styles:")
    print("Commands: ", styles['command'])
--- a/tools/regression-tests/run_tests.py
+++ b/tools/regression-tests/run_tests.py
@ -9,6 +9,7 @@ UPDATE: August 13, 2024:
 With the current features, users can:
    + specify which LAMMPS binary version to test (e.g., the version from a commit, or those from `lammps-testing`)
    + specify the examples subfolders (thus the reference log files) seperately (e.g. from other LAMMPS versions or commits)
    + specify the list of examples input scripts to test
    + specify tolerances for individual quantities for any input script to override the global values
    + launch tests with `mpirun` with all supported command line features (multiple procs, multiple paritions, and suffices)
    + skip certain input files (whose names match specified patterns) if not interested, or packaged not installed, or no reference log file exists
@ -43,21 +44,33 @@ Example usage:
           python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \
                --example-folders="/path/to/examples/folder1;/path/to/examples/folder2"
-       The example folders can also be loaded from a text file list_subfolders1.txt:
+       The example subfolders can also be loaded from a text file list_subfolders1.txt:
           python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \
-                --list-input=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \
+                --list-subfolders=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \
                --log-file=run1.log
-    4) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree
+    4) Specify a list of example input scripts
            python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \
                --list-input=input-list-1.txt --output-file=output1.txt --progress-file=progress1.yaml \
                --log-file=run1.log
       The example subfolders can also be loaded from a text file list_subfolders1.txt:
           python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \
                --list-subfolders=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \
                --log-file=run1.log
    5) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree
           python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples
-    5) Analyze the LAMMPS binary annd whole top-level /examples folder in a LAMMPS source tree 
+    6) Analyze the LAMMPS binary annd whole top-level /examples folder in a LAMMPS source tree 
       and generate separate input lists for 8 workers:
           python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples \
                --analyze --num-workers=8
-       This is used for splitting the subfolders into separate input lists and launching different instances
+       The output of this run is 8 files folder-list-[0-7].txt that lists the subfolders 
-       of run_tests.py simultaneously.
+       and 8 files input-list-[0-7].txt that lists the input scripts under the top-level example folders.
       With these lists, one can launch multiple instances of run_tests.py simultaneously
       each with a list of example subfolders (Case 3), or with a list of input scripts (Case 4).
 '''
 from argparse import ArgumentParser
@ -825,6 +838,7 @@ if __name__ == "__main__":
    lmp_binary = ""
    configFileName = "config.yaml"
    example_subfolders = []
    example_inputs = []
    example_toplevel = ""
    genref = False
    verbose = False
@ -832,6 +846,7 @@ if __name__ == "__main__":
    progress_file = "progress.yaml"
    log_file = "run.log"
    list_input = ""
    list_subfolders = ""
    analyze = False
    # distribute the total number of input scripts over the workers
@ -844,7 +859,8 @@ if __name__ == "__main__":
                        help="Configuration YAML file")
    parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level")
    parser.add_argument("--example-folders", dest="example_folders", default="", help="Example subfolders")
-    parser.add_argument("--list-input", dest="list_input", default="", help="File that lists the subfolders")
+    parser.add_argument("--list-input", dest="list_input", default="", help="File that lists the input scripts")
    parser.add_argument("--list-subfolders", dest="list_subfolders", default="", help="File that lists the subfolders")
    parser.add_argument("--num-workers", dest="num_workers", default=1, help="Number of workers")
    parser.add_argument("--gen-ref",dest="genref", action='store_true', default=False,
                        help="Generating reference data")
@ -866,6 +882,7 @@ if __name__ == "__main__":
    if int(args.num_workers) > 0:
        num_workers = int(args.num_workers)
    list_input = args.list_input
    list_subfolders = args.list_subfolders
    # example_toplevel is where all the examples subfolders reside
    if args.example_toplevel != "":
@ -884,33 +901,6 @@ if __name__ == "__main__":
    logger = logging.getLogger(__name__)
    logging.basicConfig(filename=log_file, level=logging.INFO, filemode="w")
    # read in the configuration of the tests
    with open(configFileName, 'r') as f:
        config = yaml.load(f, Loader=Loader)
        absolute_path = os.path.abspath(configFileName)
        print(f"\nRegression tests with the settings defined in the configuration file:\n  {absolute_path}")
        f.close()
    # check if lmp_binary is specified in the config yaml
    if lmp_binary == "":
        if config['lmp_binary'] == "":
            print("Needs a valid LAMMPS binary")
            quit()
        else:
            lmp_binary = os.path.abspath(config['lmp_binary'])
    # print out the binary info
    packages, operating_system, GitInfo, compile_flags = get_lammps_build_configuration(lmp_binary)
    print("\nLAMMPS build info:")
    print(f"  - {operating_system}")
    print(f"  - {GitInfo}")
    print(f"  - Active compile flags: {compile_flags}")
    print(f"  - List of {len(packages)} installed packages:")
    all_pkgs = ""
    for p in packages:
        all_pkgs += p + " "
    print(all_pkgs)
    if len(example_subfolders) > 0:
        print("\nExample folders to test:")
        print(*example_subfolders, sep='\n')
@ -926,7 +916,7 @@ if __name__ == "__main__":
    # then use the path from --example-top-folder, or from the input-list read from a text file
    if len(example_subfolders) == 0:
-        # need top level specified
+        # if the top level is specified
        if len(example_toplevel) != 0:
            # getting the list of all the input files because there are subfolders (e.g. PACKAGES) under the top level
            cmd_str = f"find {example_toplevel} -name \"in.*\" "
@ -953,7 +943,7 @@ if __name__ == "__main__":
            # write each chunk to a file
            idx = 0
            for list_input in sublists:
-                filename = f"input-list-{idx}.txt"
+                filename = f"folder-list-{idx}.txt"
                with open(filename, "w") as f:
                    for folder in list_input:
                        # count the number of input scripts in each folder
@ -967,14 +957,28 @@ if __name__ == "__main__":
            # working on all the folders for now
            example_subfolders = folder_list
-        # if a list of subfolders are provided from a text file (list_input from the command-line argument)
+            # divide the list of input scripts into num_workers chunks
-        elif len(list_input) != 0:
+            sublists = divide_into_N(input_list, num_workers)
            # write each chunk to a file
            idx = 0
            for list_input in sublists:
                filename = f"input-list-{idx}.txt"
                with open(filename, "w") as f:
                    for inp in list_input:
                        f.write(inp + '\n')
                    f.close()
                idx = idx + 1
        # if a list of subfolders is provided from a text file (list_subfolders from the command-line argument)
        elif len(list_subfolders) != 0:
            num_inputscripts = 0
-            with open(list_input, "r") as f:
+            with open(list_subfolders, "r") as f:
                all_subfolders = f.read().splitlines()
                f.close()
                for line in all_subfolders:
                    if len(line) > 0:
                        # skip subfolders
                        if line[0] == '#':
                            continue
                        folder = line.split()[0]
@ -983,6 +987,33 @@ if __name__ == "__main__":
            msg = f"\nThere are {len(example_subfolders)} folders with {num_inputscripts} input scripts in total listed in {list_input}."
            print(msg)
            logger.info(msg)
        # if a list of input scripts is provided from a text file (list_input from the command-line argument)
        elif len(list_input) != 0:
            num_inputscripts = 0
            folder_list = []
            with open(list_input, "r") as f:
                all_inputs = f.read().splitlines()
                f.close()
                for line in all_inputs:
                    if len(line) > 0:
                        # skip input scripts 
                        if line[0] == '#':
                            continue
                        input = line.split()[0]
                        folder = input.rsplit('/', 1)[0]
                        # unique folders in the list
                        if folder not in folder_list:
                            folder_list.append(folder)
                        example_inputs.append(input)
                        num_inputscripts += 1
            example_subfolders = folder_list
            msg = f"\nThere are {num_inputscripts} input scripts listed in {list_input}."
            print(msg)
            logger.info(msg)
        else:
            inplace_input = False
@ -990,6 +1021,33 @@ if __name__ == "__main__":
    if analyze == True:
        quit()
    # read in the configuration of the tests
    with open(configFileName, 'r') as f:
        config = yaml.load(f, Loader=Loader)
        absolute_path = os.path.abspath(configFileName)
        print(f"\nRegression test configuration file:\n  {absolute_path}")
        f.close()
    # check if lmp_binary is specified in the config yaml
    if lmp_binary == "":
        if config['lmp_binary'] == "":
            print("Needs a valid LAMMPS binary")
            quit()
        else:
            lmp_binary = os.path.abspath(config['lmp_binary'])
    # print out the binary info
    packages, operating_system, GitInfo, compile_flags = get_lammps_build_configuration(lmp_binary)
    print("\nLAMMPS build info:")
    print(f"  - {operating_system}")
    print(f"  - {GitInfo}")
    print(f"  - Active compile flags: {compile_flags}")
    print(f"  - List of {len(packages)} installed packages:")
    all_pkgs = ""
    for p in packages:
        all_pkgs += p + " "
    print(all_pkgs)
    all_results = []
    # save current working dir
@ -1044,10 +1102,21 @@ if __name__ == "__main__":
            cmd_str = "ls in.*"
            p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True)
-            input_list = p.stdout.split('\n')
+            all_input_list = p.stdout.split('\n')
-            input_list.remove('')
+            all_input_list.remove('')
-            print(f"{len(input_list)} input script(s): {input_list}")
+            # if the list of example input scripts is provided
            #   if an input script is not in the list, then remove it from input_list
            input_list = []
            if len(example_inputs) > 0:
                for inp in all_input_list:
                    full_path = directory + "/" + inp
                    if full_path in example_inputs:
                        input_list.append(inp)
            else:
                input_list = all_input_list
            print(f"{len(input_list)} input script(s) to be tested: {input_list}")
            total_tests += len(input_list)
            # iterate through the input scripts