-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_directory.py
executable file
·76 lines (63 loc) · 2.51 KB
/
make_directory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
import json
from argparse import ArgumentParser
from os import fspath, walk
from pathlib import Path
from subprocess import check_call
import pandas as pd
def find_files(directory, patterns):
for dirpath_str, dirnames, filenames in walk(directory):
dirpath = Path(dirpath_str)
for filename in filenames:
filepath = dirpath / filename
for pattern in patterns:
if filepath.match(pattern):
return filepath
def find_file_pairs(directory):
filtered_patterns = ["secondary_analysis.h5ad"]
unfiltered_patterns = ["expr.h5ad"]
filtered_file = find_files(directory, filtered_patterns)
unfiltered_file = find_files(directory, unfiltered_patterns)
return filtered_file, unfiltered_file
def get_input_directory(data_directory, uuid):
public_directory = data_directory / "public" / uuid
if public_directory.exists():
return public_directory
else:
consortium_directory = data_directory / "consortium"
if consortium_directory.exists():
for subdir in consortium_directory.iterdir():
consortium_subdir = subdir / uuid
if consortium_subdir.exists():
return consortium_subdir
def main(data_directory: Path, uuids_file: Path, tissue: str):
uuids = pd.read_csv(uuids_file, sep="\t")["uuid"]
uuids = uuids.dropna()
h5ads_base_directory = Path(f"{tissue}_h5ads")
h5ads_base_directory.mkdir(
exist_ok=True
) # Create h5ads directory if it doesn't exist
for uuid in uuids:
h5ads_directory = h5ads_base_directory / uuid
h5ads_directory.mkdir(
parents=True, exist_ok=True
) # Create UUID-specific directory
input_directory = get_input_directory(data_directory, uuid)
input_files = find_file_pairs(input_directory)
if input_files == (None, None):
print("No input files in: ", input_directory)
continue
print("Input directory:", input_directory)
print("Input files:", input_files)
for input_file in input_files:
check_call(
f"cp '{input_file}' '{h5ads_directory}/{input_file.name}'",
shell=True,
)
if __name__ == "__main__":
p = ArgumentParser()
p.add_argument("data_directory", type=Path)
p.add_argument("uuids_file", type=Path)
p.add_argument("tissue", type=str)
args = p.parse_args()
main(args.data_directory, args.uuids_file, args.tissue)