-
Notifications
You must be signed in to change notification settings - Fork 0
/
diann_fdr_control.R
135 lines (117 loc) · 7.46 KB
/
diann_fdr_control.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
library(iq)
library(stringr)
library(matrixStats)
generate_new_reports = function(path){
globalPrecursorFDR <- 0.01
globalProteinFDR <- 0.01
runSpecificPrecursorFDR <- 0.01
runSpecificProteinFDR <- 1.1
out_path <- str_c(dirname(path), "/precursor_maxlfq.tsv")
if (file.exists(out_path) == FALSE) {
print(out_path)
print(str_c("global precursor FDR = ", globalPrecursorFDR))
print(str_c("global protein FDR = ", globalProteinFDR))
print(str_c("run specific precursor FDR = ", runSpecificPrecursorFDR))
print(str_c("run specific protein FDR = ", runSpecificProteinFDR))
df <- fast_read(path,
sample_id = "Run",
primary_id = "Precursor.Id",
secondary_id = "Precursor.Id",
intensity_col = "Precursor.Normalised",
annotation_col = NULL,
filter_string_equal = NULL,
filter_string_not_equal = NULL,
filter_double_less = c("Global.Q.Value" = globalPrecursorFDR, "Global.PG.Q.Value" = globalProteinFDR, "Q.Value" = runSpecificPrecursorFDR, "PG.Q.Value" = runSpecificProteinFDR),
filter_double_greater = NULL,
intensity_col_sep = NULL,
intensity_col_id = NULL,
na_string = "0")
df_norm <- fast_preprocess(df$quant_table, median_normalization = FALSE, log2_intensity_cutoff = 0, pdf_out = NULL) # Do not enable median normalization because the tools have their own normalization algorithm.
df_maxlfq <- fast_MaxLFQ(df_norm, row_names = df$protein[, 1], col_names = df$sample) # MaxLFQ requires a precursor having non-zero intensities in at least two runs, which makes the "quantified" entries slightly fewer than those from the DIA-NN main report.
maxlfq <- df_maxlfq$estimate
maxlfq[maxlfq <= 0] <- NA
write.table(2^maxlfq, out_path, quote = FALSE, sep = "\t", col.names = NA)
}
out_path <- str_c(dirname(path), "/modified_sequence_maxlfq.tsv")
if (file.exists(out_path) == FALSE) {
print(out_path)
print(str_c("global precursor FDR = ", globalPrecursorFDR))
print(str_c("global protein FDR = ", globalProteinFDR))
print(str_c("run specific precursor FDR = ", runSpecificPrecursorFDR))
print(str_c("run specific protein FDR = ", runSpecificProteinFDR))
df <- fast_read(path,
sample_id = "Run",
primary_id = "Modified.Sequence",
secondary_id = "Precursor.Id",
intensity_col = "Precursor.Normalised",
annotation_col = NULL,
filter_string_equal = NULL,
filter_string_not_equal = NULL,
filter_double_less = c("Global.Q.Value" = globalPrecursorFDR, "Global.PG.Q.Value" = globalProteinFDR, "Q.Value" = runSpecificPrecursorFDR, "PG.Q.Value" = runSpecificProteinFDR),
filter_double_greater = NULL,
intensity_col_sep = NULL,
intensity_col_id = NULL,
na_string = "0")
df_norm <- fast_preprocess(df$quant_table, median_normalization = FALSE, log2_intensity_cutoff = 0, pdf_out = NULL) # Do not enable median normalization because the tools have their own normalization algorithm.
df_maxlfq <- fast_MaxLFQ(df_norm, row_names = df$protein[, 1], col_names = df$sample) # MaxLFQ requires a precursor having non-zero intensities in at least two runs, which makes the "quantified" entries slightly fewer than those from the DIA-NN main report.
maxlfq <- df_maxlfq$estimate
maxlfq[maxlfq <= 0] <- NA
write.table(2^maxlfq, out_path, quote = FALSE, sep = "\t", col.names = NA)
}
out_path <- str_c(dirname(path), "/protein_maxlfq.tsv")
if (file.exists(out_path) == FALSE) {
print(out_path)
print(str_c("global precursor FDR = ", globalPrecursorFDR))
print(str_c("global protein FDR = ", globalProteinFDR))
print(str_c("run specific precursor FDR = ", runSpecificPrecursorFDR))
print(str_c("run specific protein FDR = ", runSpecificProteinFDR))
df <- fast_read(path,
sample_id = "Run",
primary_id = "Protein.Group",
secondary_id = "Precursor.Id",
intensity_col = "Precursor.Normalised",
annotation_col = NULL,
filter_string_equal = NULL,
filter_string_not_equal = NULL,
filter_double_less = c("Global.Q.Value" = globalPrecursorFDR, "Global.PG.Q.Value" = globalProteinFDR, "Q.Value" = runSpecificPrecursorFDR, "PG.Q.Value" = runSpecificProteinFDR),
filter_double_greater = NULL,
intensity_col_sep = NULL,
intensity_col_id = NULL,
na_string = "0")
df_norm <- fast_preprocess(df$quant_table, median_normalization = FALSE, pdf_out = NULL) # Do not enable median normalization because the tools have their own normalization algorithm.
df_maxlfq <- fast_MaxLFQ(df_norm, row_names = df$protein[, 1], col_names = df$sample) # MaxLFQ requires a precursor having non-zero intensities in at least two runs, which makes the "quantified" entries slightly fewer than those from the DIA-NN main report.
maxlfq <- df_maxlfq$estimate
maxlfq[maxlfq <= 0] <- NA
write.table(2^maxlfq, out_path, quote = FALSE, sep = "\t", col.names = NA)
}
}
### CSF
generate_new_reports("./CSFData/DDA_lib_try_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DDA_lib_semi_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DDA_DIA_lib_semi_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DDA_DIA_lib_try_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DIA_lib_semi_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DIA_lib_try_result/diann-output/report.tsv")
generate_new_reports("./CSFData/DIA_NN_LF_result/report.tsv")
### Plasma
generate_new_reports("./plasmaData/DIA_lib_semi_result/diann-output/report.tsv")
generate_new_reports("./plasmaData/DIA_lib_try_result/diann-output/report.tsv")
generate_new_reports("./plasmaData/DIA_NN_LF_result/report.tsv")
### HLA
generate_new_reports("./HLAData/DIA_lib_result/diann-output/report.tsv")
### PhosphoData
generate_new_reports("./phosphoData/7min_result/diann-output/report.tsv")
generate_new_reports("./phosphoData/10min_result/diann-output/report.tsv")
generate_new_reports("./phosphoData/15min_result/diann-output/report.tsv")
generate_new_reports("./phosphoData/21min_result/diann-output/report.tsv")
generate_new_reports("./phosphoData/30min_result/diann-output/report.tsv")
generate_new_reports("./phosphoData/60min_result/diann-output/report.tsv")
### Low input
generate_new_reports("./lowInputData/Tonsil_lib_result/diann-output/report.tsv")
generate_new_reports("./lowInputData/Direct_lib_result/diann-output/report.tsv")
generate_new_reports("./lowInputData/DIANN_results_Fig4_original_study/report.tsv")
### Breast cancer
generate_new_reports("./revisionData/TNBCData/14_MainSearch_4223_DIA-NN_1.8.1_library_free/14_MainSearch_4223_DIA-NN_1.8.1_library_free/report.tsv")
generate_new_reports("./revisionData/TNBCData/diaTracer_hybrid_result_Frag22_methylthiolation/diann-output/report.tsv")
generate_new_reports("./revisionData/TNBCData/diaTracer_result_Frag22_methylthiolation/diann-output/report.tsv")
generate_new_reports("./revisionData/TNBCData/12_MainSearch_4223_DIA-NN_1.8.1_library_based/12_MainSearch_4223_DIA-NN_1.8.1_newSN16lib/report.tsv")