forked from nadegeguiglielmoni/genome_assembly_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
render.py
156 lines (117 loc) · 4.09 KB
/
render.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""Dynamically renders the readme from its jinja2 template and csv of assemblers"""
import os
import csv
from dataclasses import dataclass
import logging
import tempfile
import time
from typing import Iterable, List, Optional, Type, TypeVar
import git
from jinja2 import Environment, FileSystemLoader
logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
def get_last_commit_date(url: str) -> str:
"""Clone only the .git folder from target remote
into a tempdir and retrieve the latest commit date.
Parameters
----------
url: str
URL of the git repository.
Returns
-------
str:
The date of the last commit in YYYY-MM format.
"""
with tempfile.TemporaryDirectory() as repo_dir:
cloned = git.Repo.clone_from(url, repo_dir, no_checkout=True)
auth_date = cloned.head.commit.authored_datetime
return f"{auth_date.year}-{auth_date.month}"
@dataclass
class Software:
"""Defines standard fields and behaviours for all softwares."""
name: str
link: Optional[str]
publication: Optional[str]
last_update: Optional[str]
def __post_init__(self):
if self.link:
self.set_last_commit_date()
# Don't spam git server
time.sleep(0.1)
def set_last_commit_date(self):
"""Read the remote repo to find the latest commit date"""
try:
self.last_update = get_last_commit_date(self.link)
# If this is not a git repo, do nothing
except git.GitCommandError:
pass
@dataclass
class Assembler(Software):
"""A software used to assemble a genome"""
technology: str
@dataclass
class Processor(Software):
"""Software used to perform a pre/post-processing
task on data in genome assembly."""
task: str
reads: str
S = TypeVar("S", Software, Assembler, Processor)
def load_softwares(path: str, soft_type: Type[S]) -> List[S]:
"""Load a bunch of softwares from CSV file.
Parameters
----------
path: str
Path to CSV file containing the list of softwares.
soft_type: type Software, Assembler or Processor
The class to use to represent softwares. This
affects the fields available."""
softs = []
n_softs = sum(1 for i in open(path, "rb"))
with open(path, "r") as csvfile:
reader = csv.DictReader(csvfile)
for idx, row in enumerate(reader):
# csv fields expand to dataclass attrs
softs.append(soft_type(**row))
logging.info(
f'({idx} / {n_softs}) {soft_type.__name__} Done: {row["name"]}'
)
return softs
def fmt_processors(procs: Iterable[Processor]) -> List[Processor]:
"""Format a list of processors so that:
+ They are sorted by task, reads
+ Reads field is in italic
+ Only the first of each read type per task has a value
"""
read_type = ""
task = ""
first = True
fmt_procs = sorted(procs, key=lambda x: (x.task, x.reads))
for pr in fmt_procs:
# First processor in category ?
if (pr.task != task) or (pr.reads != read_type):
first = True
read_type = pr.reads
task = pr.task
if not first:
pr.reads = ""
first = False
# Add italics
for pr in fmt_procs:
if pr.reads:
pr.reads = f"__{pr.reads}__"
return fmt_procs
env = Environment(loader=FileSystemLoader("."), autoescape=False,)
# Load list of softwares and render templates consecutively.
# Just dump everything to stdout to compose the readme
### HEADER ###
with open("templates/header.md") as header:
print(header.read())
### ASSEMBLERS ###
assemblers = load_softwares("data/assemblers.csv", Assembler)
template = env.get_template("templates/assemblers.j2")
print(template.render(assemblers=assemblers))
### PRE/POST PROCESSORS ###
procs = load_softwares("data/processors.csv", Processor)
template = env.get_template("templates/processors.j2")
# Gotta sort processors and edit them a bit for fancy md formatting
fmt_procs = fmt_processors(procs)
print(template.render(processors=fmt_procs))