-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfiles.py
120 lines (108 loc) · 3.61 KB
/
files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# Python standard library
from __future__ import print_function
import sys
# Local imports
from utils import (
Colors,
err,
fatal
)
def clean(s, remove=['"', "'"]):
"""Cleans a string to remove any defined leading or trailing characters.
@param s <str>:
String to clean.
@param remove list[<str>]:
List of characters to remove from beginning or end of string 's'.
@return s <str>:
Cleaned string
"""
for c in remove:
s = s.strip(c)
return s
def contrasts(file, groups, delim='\t'):
"""Reads and parses the group comparison file, contrasts.tsv, into a
dictionary. This file acts as a config file to setup contrasts between
two groups, where groups of samples are defined in the groups.tsv file.
This information is used in differential analysis, like differential
gene expression, etc.
@Example: contrasts.tsv
G2 G1
G4 G3
G5 G1
>> contrasts = contrasts('contrasts.tsv', groups = ['G1', 'G2', 'G3', 'G4', 'G5'])
>> contrasts
[
["G2", "G1"],
["G4", "G3"],
["G5", "G1"]
]
@param file <str>:
Path to contrasts TSV file.
@param groups list[<str>]:
List of groups defined in the groups file, enforces groups exist.
@return comparisons <list[list[str, str]]>:
Nested list contain comparsions of interest.
"""
c = Colors()
errors = []
comparsions = []
line_number = 0
with open(file) as fh:
for line in fh:
line_number += 1
linelist = [clean(l.strip()) for l in line.split(delim)]
try:
g1 = linelist[0]
g2 = linelist[1]
if not g1 or not g2: continue # skip over empty lines
except IndexError:
# Missing a group, need two groups to tango
# This can happen if the file is NOT a TSV file,
# and it is seperated by white spaces, :(
err(
'{}{}Warning: {} is missing at least one group on line {}: {}{}'.format(
c.bg_yellow,
c.black,
file,
line_number,
line.strip(),
c.end
)
)
err('{}{}\t └── Skipping over line, check if line is tab seperated... {}'.format(
c.bg_yellow,
c.black,
c.end)
)
continue
# Check to see if groups where defined already,
# avoids user errors and spelling errors
for g in [g1, g2]:
if g not in groups:
# Collect all error and report them at end
errors.append(g)
# Add comparsion to list of comparisons
if [g1, g2] not in comparsions:
comparsions.append([g1, g2])
if errors:
# One of the groups is not defined in groups file
err('{}{}Error: the following group(s) in "{}" are not defined in --groups file! {}'.format(
c.bg_red,
c.white,
file,
c.end)
)
fatal('{}{}\t └── {} {}'.format(
c.bg_red,
c.white,
','.join(errors),
c.end)
)
return comparsions
if __name__ == '__main__':
# Testing TSV parser
groups = {"T1": ["A","B"], "T2": ["C","D"]}
comparsions = contrasts(sys.argv[2], groups=groups.keys())
print(comparsions)