intermediate process (more tests need to be added): using input_directory only for simplication

yucongalicechen · yucongalicechen · commit ead58300b385 · 2024-05-10T00:29:44.000-04:00
diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py
@@ -1,6 +1,5 @@
 import sys
 from argparse import ArgumentParser
-from pathlib import Path
 
 from diffpy.labpdfproc.functions import apply_corr, compute_cve
 from diffpy.labpdfproc.tools import (
@@ -20,8 +19,11 @@ def get_args(override_cli_inputs=None):
     p.add_argument(
         "input",
         nargs="+",
-        help="The filename or directory of the datafile to load. Required. "
-        "Supports either a single input file, a directory, a file containing a list of files, or multiple files. ",
+        help="The filename(s) or folder(s) of the datafile(s) to load.  Required. "
+        "Supports multiple arguments of input file or directory. "
+        "The file can be either a data file or a file containing a list of files. "
+        "If a directory is provided, we will load all data files in it. "
+        "For example, file.xy, data/file.xy, file_list.txt, ./data/file.xy, ./data are all valid inputs. ",
     )
     p.add_argument(
         "-a",
@@ -92,8 +94,7 @@ def main():
     args.wavelength = set_wavelength(args)
     args = load_user_metadata(args)
 
-    for input_file in args.input_file:
-        filepath = Path(args.input_file)
+    for filepath in args.input_directory:
         outfilestem = filepath.stem + "_corrected"
         corrfilestem = filepath.stem + "_cve"
         outfile = args.output_directory / (outfilestem + ".chi")
diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py
@@ -19,86 +19,80 @@
 # This test covers existing single input file, directory, a file list, and multiple files
 # We store absolute path into input_directory and file names into input_file
 params_input = [
-    (["good_data.chi"], [".", "good_data.chi"]),  # single good file, same directory
-    (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]),  # single good file, input directory
+    (["good_data.chi"], ["good_data.chi"]),  # single good file, same directory
+    (["input_dir/good_data.chi"], ["input_dir/good_data.chi"]),  # single good file, input directory
     (  # glob current directory
         ["."],
-        [
-            ".",
-            ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
-        ],
+        ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
     ),
     (  # glob input directory
         ["./input_dir"],
         [
-            "input_dir",
-            ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
+            "input_dir/good_data.chi",
+            "input_dir/good_data.xy",
+            "input_dir/good_data.txt",
+            "input_dir/unreadable_file.txt",
+            "input_dir/binary.pkl",
         ],
     ),
     (  # list of files provided (we skip if encountering invalid files)
         ["good_data.chi", "good_data.xy", "unreadable_file.txt", "missing_file.txt"],
-        [
-            ".",
-            ["good_data.chi", "good_data.xy", "unreadable_file.txt"],
-        ],
+        ["good_data.chi", "good_data.xy", "unreadable_file.txt"],
     ),
     (  # list of files provided (with invalid files and files in different directories)
-        ["input_dir/good_data.chi", "good_data.xy", "missing_file.txt"],
-        [
-            ".",
-            ["input_dir/good_data.chi", "good_data.xy"],
-        ],
+        ["input_dir/good_data.chi", "good_data.chi", "missing_file.txt"],
+        ["input_dir/good_data.chi", "good_data.chi"],
     ),
     (  # file_list.txt list of files provided
         ["file_list_dir/file_list.txt"],
-        [".", ["good_data.chi", "good_data.xy", "good_data.txt"]],
+        ["good_data.chi", "good_data.xy", "good_data.txt"],
     ),
     (  # file_list_example2.txt list of files provided in different directories
         ["file_list_dir/file_list_example2.txt"],
-        [".", ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"]],
+        ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"],
     ),
 ]
 
 
 @pytest.mark.parametrize("inputs, expected", params_input)
 def test_set_input_files(inputs, expected, user_filesystem):
-    expected_input_directory = Path(user_filesystem) / expected[0]
-    expected_input_files = expected[1]
+    expected_input_directory = []
+    for expected_path in expected:
+        expected_input_directory.append(Path(user_filesystem) / expected_path)
 
     cli_inputs = ["2.5"] + inputs
     actual_args = get_args(cli_inputs)
     actual_args = set_input_files(actual_args)
-    assert actual_args.input_directory == expected_input_directory
-    assert set(actual_args.input_file) == set(expected_input_files)
+    assert set(actual_args.input_directory) == set(expected_input_directory)
 
 
 # This test is for existing single input file or directory absolute path not in cwd
 # Here we are in user_filesystem/input_dir, testing for a file or directory in user_filesystem
 params_input_not_cwd = [
-    (["good_data.chi"], [".", "good_data.chi"]),
-    (["."], [".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]]),
+    (["good_data.chi"], ["good_data.chi"]),
+    (["."], ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]),
 ]
 
 
 @pytest.mark.parametrize("inputs, expected", params_input_not_cwd)
 def test_set_input_files_not_cwd(inputs, expected, user_filesystem):
-    expected_input_directory = Path(user_filesystem) / expected[0]
-    expected_input_files = expected[1]
+    expected_input_directory = []
+    for expected_path in expected:
+        expected_input_directory.append(Path(user_filesystem) / expected_path)
     actual_input = [str(Path(user_filesystem) / inputs[0])]
     os.chdir("input_dir")
 
     cli_inputs = ["2.5"] + actual_input
     actual_args = get_args(cli_inputs)
     actual_args = set_input_files(actual_args)
-    assert actual_args.input_directory == expected_input_directory
-    assert set(actual_args.input_file) == set(expected_input_files)
+    assert set(actual_args.input_directory) == set(expected_input_directory)
 
 
 # This test covers non-existing single input file or directory, in this case we raise an error with message
 params_input_bad = [
-    (["non_existing_file.xy"], "Please specify valid input file or directory."),
-    (["./input_dir/non_existing_file.xy"], "Please specify valid input file or directory."),
-    (["./non_existing_dir"], "Please specify valid input file or directory."),
+    (["non_existing_file.xy"], "Please specify at least one valid input file or directory."),
+    (["./input_dir/non_existing_file.xy"], "Please specify at least one valid input file or directory."),
+    (["./non_existing_dir"], "Please specify at least one valid input file or directory."),
 ]
 
 
diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py
@@ -16,59 +16,52 @@ def set_input_files(args):
         the arguments from the parser
 
     It is implemented as the following:
-    If user input multiple files, we store their common directory as input directory and all of their names.
+    For each input, we try to read it as a file or a directory.
     If input is a file, we first try to read it as a file list and store all listed file names.
     If the first filename is invalid, then we proceed to treat it as a data file.
     Otherwise if we have a directory, glob all files within it.
-    If there are any invalid filenames (for the cases of multiple files, file list, or directory), we skip them.
+    If any file does not exist, we raise a ValueError telling which file(s) does not exist.
+    If all files are invalid, we raise an Error telling user to specify at least one valid file or directory.
 
     Returns
     -------
     args argparse.Namespace
 
     """
 
-    if len(args.input) > 1:
-        input_paths = []
-        input_paths_parent = []
-        for input in args.input:
-            if Path(input).is_file():
-                input_paths.append(Path(input).resolve())
-                input_paths_parent.append(Path(input).resolve().parent)
-        input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent]))
-        input_file_name = [str(path.relative_to(input_dir)) for path in input_paths]
-        setattr(args, "input_directory", input_dir)
-        setattr(args, "input_file", input_file_name)
-        return args
-
-    if not Path(args.input[0]).exists():
-        raise ValueError("Please specify valid input file or directory.")
-
-    if not Path(args.input[0]).is_dir():
-        input_paths = []
-        input_paths_parent = []
-        with open(args.input[0], "r") as f:
-            lines = [line.strip() for line in f]
-            if not os.path.isfile(lines[0]):
-                input_dir = Path.cwd() / Path(args.input[0]).parent
-                input_file_name = Path(args.input[0]).name
-            else:
-                for line in lines:
-                    if not os.path.isfile(line):
-                        continue
-                    else:
-                        input_paths.append(Path(line).resolve())
-                        input_paths_parent.append(Path(line).resolve().parent)
-                input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent]))
-                input_file_name = [str(path.relative_to(input_dir)) for path in input_paths]
-
-    else:
-        input_dir = Path(args.input[0]).resolve()
-        input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)]
-        input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files]
-
-    setattr(args, "input_directory", input_dir)
-    setattr(args, "input_file", input_file_name)
+    input_paths = []
+    for input in args.input:
+        try:
+            if Path(input).exists():
+                if not Path(input).is_dir():
+                    with open(args.input[0], "r") as f:
+                        lines = [line.strip() for line in f]
+                        if not os.path.isfile(lines[0]):
+                            input_paths.append(Path(input).resolve())
+                        else:
+                            for line in lines:
+                                try:
+                                    if os.path.isfile(line):
+                                        input_paths.append(Path(line).resolve())
+                                except Exception as e:
+                                    raise ValueError(f"{line} does not exist. {e}.")
+
+                else:
+                    input_dir = Path(input).resolve()
+                    input_files = [
+                        Path(file).resolve()
+                        for file in glob.glob(str(input_dir) + "/*", recursive=True)
+                        if os.path.isfile(file)
+                    ]
+                    input_paths.extend(input_files)
+
+        except Exception as e:
+            raise ValueError(f"{input} does not exist. {e}.")
+
+    if len(input_paths) == 0:
+        raise ValueError("Please specify at least one valid input file or directory.")
+
+    setattr(args, "input_directory", input_paths)
     return args