-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path3_LMM.py
executable file
·130 lines (103 loc) · 4.48 KB
/
3_LMM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
# Author: Giulia Muzio
'''
Script for performing the maximum restricted likelihood
to calculate the likelihood ratio test for the neighborhoods
by employing the FaST-LMM-set implemetation obtainable at
https://github.com/fastlmm/FaST-LMM/
This script can be used on both the original (i.e., non
permuted scenario) and the multiple permuted scenarios.
This script can easily be run in parallel for different
permutations as j is a command line argument.
'''
import os
import sys
import argparse
import numpy as np
from utils import *
sys.path.insert(0, 'LMM/')
from LMM.association.networkGWAS_snpSet import networkGWAS_snpSet
def main(args):
# Input files
genotype = args.genotype # bed bim fam file
phenotype = args.phenotype
if(not os.path.exists(args.odir)):
os.makedirs(args.odir)
if(not args.j):
snp_set_list = args.nbs
output = '{}/{}'.format(args.odir, args.ofile)
# running fastlmm snp-set
observed_statistics, null_distribution = \
networkGWAS_snpSet(test_snps = genotype, set_list = snp_set_list,
phenotype = phenotype, test_type = "lrt",
covariate = args.covariate, kernel = args.kernel,
standardize_data = True)
save_file(output, observed_statistics)
else:
if(len(args.j) > 2): raise NameError('Wrong permutation indexes!')
for j in range(args.j[0], args.j[-1] + 1):
snp_set_list = args.nbs + str(j) + '.txt'
output = '{}/{}{}.pkl'.format(args.odir, args.ofile, j)
# running fastlmm snp-set
observed_statistics, null_distribution = \
networkGWAS_snpSet(test_snps = genotype, set_list = snp_set_list,
phenotype = phenotype, test_type = "lrt",
covariate = args.covariate, kernel = args.kernel,
standardize_data = True)
save_file(output, observed_statistics)
def parse_arguments():
'''
Definition of the command line arguments
Input
-----------
Output
-----------
args.genotype: path to the bed/bim/fam genotype
args.phenotype: path to the phenotype (in plink format)
args.nbs: path to the file where the sets to tests, e.g.,
the neighborhoods, are defined
args.covariate: path to the (optional) file where the covariates
are saved
args.kernel: string, either "lin" or "poly", that defines which
kernel to use for measuring the similarities between
the SNPs in the test-set, i.e., the neighborhood
args.j: permutation number. Optional. If present, then the
likelihood ratio for the neighborhoods is obtained
on the j-th permuted setting. Otherwise, on the non
permuted setting (which should be done once)
args.odir: output folder. For non-permuted setting, it could
be: "results/llr/"
For the permuted settings, it could be:
"results/llr/permuted/"
args.file: name of the output file. For example, "llr.pkl" in
case of the non-permuted setting, and a base-name "llr_"
in case of the permuted settings, since the script will
add the permutation index and the file format.
'''
parser = argparse.ArgumentParser()
parser.add_argument('--genotype', required = False, default = 'data/genotype')
parser.add_argument('--phenotype', required = False, default = 'data/y_50.pheno')
parser.add_argument('--nbs', required = True, help = 'neighborhoods file. Under\
the current folder structure, it would be "results/settings/neighborhoods.txt"\
in case of the non-permuted scenario, and \
"results/settings/permutations/neighborhoods/nbs_" otherwise.')
parser.add_argument('--covariate', required = False)
parser.add_argument('--kernel', required = True, choices = ['lin', 'poly'],
help = 'type of the kernel for modeling \
the similarities between the SNPs in the\
neighborhoods.')
parser.add_argument('--j', nargs='+', type=int, required = False,
help = 'permutation index. If not given,\
the analysis is performed on the non-permuted setting.')
parser.add_argument('--odir', required = True,
help = 'folder where to save the results. For example,\
"results/llr/"" in case of the non-permuted scenario,\
and "results/llr/permuted/" otherwise.')
parser.add_argument('--ofile', required = True,
help = 'filename for the results. "llr.pkl" in case\
of the non-permuted scenario, and "llr_" otherwise.')
args = parser.parse_args()
return args
if __name__ == '__main__':
arguments = parse_arguments()
main(arguments)