Skip to content

Commit

Permalink
Supplementary notes
Browse files Browse the repository at this point in the history
  • Loading branch information
jinghuazhao committed Sep 30, 2022
1 parent a3f66a4 commit 5d89a8b
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 7 deletions.
13 changes: 13 additions & 0 deletions rsid/OpenGWAS.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,19 @@ do
done
cd -

# The lastest with FinnGen

export EFO_UPDATE=${INF}/OpenGWAS/efo-update.txt
for f in $(sed '1d' ${EFO_UPDATE} | grep -v -e finn -e ebi-a-GCST90014325 -e ebi-a-GCST90013534 | awk '{print $NF}')
do
export vcf=https://gwas.mrcieu.ac.uk/files/${f}/${f}.vcf.gz
if [ ! -f ${vcf} ]; then
wget ${vcf}
wget ${vcf}.tbi
ls ${f}.vcf.gz.*
fi
done

Rscript -e "
library(TwoSampleMR)
Expand Down
2 changes: 1 addition & 1 deletion rsid/TNFB.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ id <- "ieu-b-18"
res <- mr(d, method_list = c("mr_egger_regression", "mr_ivw"))
p1 <- mr_scatter_plot2(res, d)
res_single <- mr_singlesnp(d)
mr_forest_plot2(res_single)
print(mr_forest_plot2(res_single))
# with(d, mr_steiger(p_exp=pval.exposure,p_out=pval.outcome,n_exp=samplesize.exposure,n_out=samplesize.outcome,r_xxo=1,r_yyo=1,r_exp=0,r_out=0.8))
lor <- with(d,beta.outcome)
af <- with(d,eaf.outcome)
Expand Down
46 changes: 46 additions & 0 deletions rsid/efo-update.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
suppressMessages(library(dplyr))
suppressMessages(library(openxlsx))
# Input
options(width=200)
HPC_WORK <- Sys.getenv("HPC_WORK")
INF <- Sys.getenv("INF")
rt <- file.path(INF,"mr","gsmr")
prot <- "TNFB"
xlsx <- "https://jhz22.user.srcf.net/INF/latest/GSMR_datasets.xlsx"
efo <- read.xlsx(xlsx,sheet=1,startRow=2,colNames=TRUE,skipEmptyRows=TRUE)
efo_left <- efo[1:5] %>%
rename(Source1=Source) %>%
mutate(N.cases=as.numeric(N.cases),N.controls=as.numeric(N.controls),Total.N=as.numeric(Total.N))
efo_right <- efo[6:10] %>% rename(Source2=Source) %>% mutate(N.cases=as.numeric(N.cases),N.controls=as.numeric(N.controls),Total.N=as.numeric(Total.N))
efo_old <- filter(cbind(efo_left,efo_right[5]),!grepl("http",Source2)) %>%
select(-Source2) %>% rename(Source=Source1)
efo_new <- filter(efo_right,grepl("http",Source2)) %>%
rename(Source=Source2)
efo_update <- bind_rows(efo_old,efo_new) %>%
filter(!is.na(Disease))
missed <- with(efo_update, grepl("Crohn",Disease))
efo_update[missed,c("N.cases","N.controls","Source")] <- c("12194","28072","https://gwas.mrcieu.ac.uk/datasets/ebi-a-GCST004132/")
efo_update <- mutate(efo_update,opengwasid=unlist(lapply(strsplit(Source,"/"),"[",5)))
write.table(efo_update,file="efo_update.txt",quote=FALSE,row.names=FALSE,sep="\t")
# Output
xlsx <- file.path("efo-update.xlsx")
wb <- createWorkbook(xlsx)
hs <- createStyle(textDecoration="BOLD", fontColour="#FFFFFF", fontSize=12, fontName="Arial Narrow", fgFill="#4F80BD")
for (sheet in c("efo_update"))
{
addWorksheet(wb,sheet,zoom=150)
writeData(wb,sheet,sheet,xy=c(1,1),headerStyle=createStyle(textDecoration="BOLD",
fontColour="#FFFFFF", fontSize=14, fontName="Arial Narrow", fgFill="#4F80BD"))
body <- get(sheet)
writeDataTable(wb, sheet, body, xy=c(1,2), headerStyle=hs, firstColumn=TRUE, tableStyle="TableStyleMedium2")
freezePane(wb, sheet, firstCol=TRUE, firstActiveRow=3)
width_vec <- apply(body, 2, function(x) max(nchar(as.character(x))+2, na.rm=TRUE))
# width_vec_header <- nchar(colnames(body))+2
setColWidths(wb, sheet, cols = 1:ncol(body), widths = width_vec)
writeData(wb, sheet, tail(body,1), xy=c(1, nrow(body)+2), colNames=FALSE, borders="rows", borderStyle="thick")
}
saveWorkbook(wb, file=xlsx, overwrite=TRUE)

library(TwoSampleMR)
library(ggplot2)
library(stringr)
24 changes: 18 additions & 6 deletions rsid/mr.sb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function gsmr()
gcta-1.9 --mbfile ${INF}/mr/gsmr/ref/gsmr_${prot} \
--gsmr-file ${INF}/mr/gsmr/prot/gsmr_${prot} ${INF}/mr/gsmr/trait/gsmr_${trait}-${prot} \
--gsmr-direction 0 \
--clump-r2 0.1 --gwas-thresh 5e-8 --diff-freq 0.4 --heidi-thresh 0.05 --gsmr-snp-min 10 --effect-plot \
--clump-r2 0.1 --gwas-thresh 5e-8 --diff-freq 0.4 --heidi-thresh 0.05 --gsmr-snp-min 3 --effect-plot \
--out ${INF}/mr/gsmr/out/${trait}-${prot}
if [ ! -f ${INF}/mr/gsmr/out/${trait}-${prot}.eff_plot.gz ]; then continue; fi
R --no-save -q <<\ \ \ \ \ \ END
Expand Down Expand Up @@ -137,8 +137,11 @@ function gsmr3()
# revised list without ukb, bbj, fenn AND trait based on extract_outcome_data but now VCF
{
if [ ! -d ${INF}/mr/gsmr/out ]; then mkdir -p ${INF}/mr/gsmr/out; fi
cat <(awk -vFS="\t" 'NR>1 && $4 !~ /ukb/ {print $4}' ${INF}/rsid/efo.txt) \
<(awk -vFS="\t" '/ebi|ieu|finn/{print $4}' ${INF}/OpenGWAS/ukb-replacement.txt) | \
# cat <(awk -vFS="\t" 'NR>1 && $4 !~ /ukb/ {print $4}' ${INF}/rsid/efo.txt) \
# <(awk -vFS="\t" '/ebi|ieu|finn/{print $4}' ${INF}/OpenGWAS/ukb-replacement.txt) | \
export EFO_UPDATE=${INF}/OpenGWAS/efo-update.txt
cat <(sed '1d' ${EFO_UPDATE} | grep -v -e finn -e ebi-a-GCST90014325 -e ebi-a-GCST90013534 | awk -vFS="\t" '{print $6,2/(1/$2+1/$3)}') \
<(sed '1d' ${EFO_UPDATE} | grep -e finn -e ebi-a-GCST90014325 -e ebi-a-GCST90013534 | awk -vFS="\t" '{print $6,2/(1/$2+1/$3)}') | \
while read efo
do
export trait=${efo}
Expand Down Expand Up @@ -186,7 +189,9 @@ function mr()
INF <- Sys.getenv("INF")
prot <- Sys.getenv("prot")
rt <- Sys.getenv("rt")
ids <- scan(file.path(INF,"TNFB","efo"),"")
# ids <- scan(file.path(INF,"TNFB","efo"),"")
suppressMessages(library(dplyr))
ids <- read.delim(file.path(INF,"OpenGWAS","efo-update.txt")) %>% pull(opengwasid)
rsids <- read.table(file.path(rt,"prot",paste0(prot,"-rsid.txt")), col.names=c("snpid","SNP"))
pkgs <- c("TwoSampleMR","dplyr","ggplot2","openxlsx","stringr")
invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))
Expand Down Expand Up @@ -227,6 +232,11 @@ function mr()
mutate(prot=prot,disease=gsub("[ |]* id:[a-z]*-[a-z]-[A-Z]*[0-9]*|","",outcome)) %>%
select(prot,id.outcome,disease,method,Q,Q_df,Q_pval,-id.exposure,-exposure)
write.table(mr_heterogeneity_results,file=file.path(rt,"mr",paste0(prot,".het")),row.names=FALSE,quote=FALSE,sep="\t")
mr_single_results <- mr_singlesnp(dat) %>%
mutate(prot=prot,disease=gsub("[ |]* id:[a-z]*-[a-z]-[A-Z]*[0-9]*|","",outcome)) %>%
select(prot,id.outcome,b,se,pval,nsnp,disease)
fp <- mr_forestplot(mr_single_results)
ggsave(fp,filename=file.path(rt,"mr",paste0(prot,".pdf")),device="pdf")
'
}
Expand All @@ -240,7 +250,9 @@ function gsmr_trait()
INF <- Sys.getenv("INF")
prot <- Sys.getenv("prot")
rt <- Sys.getenv("rt")
ids <- scan(file.path(INF,"TNFB","efo"),"")
# ids <- scan(file.path(INF,"TNFB","efo"),"")
suppressMessages(library(dplyr))
ids <- read.delim(file.path(INF,"OpenGWAS","efo-update.txt")) %>% pull(opengwasid)
pkgs <- c("TwoSampleMR","dplyr","ggplot2","openxlsx","stringr")
invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))
exposure_dat <- read.table(file.path(rt,"prot",paste0(prot,".gz")), header=TRUE) %>%
Expand All @@ -267,7 +279,7 @@ function gsmr_trait()
}
## EFO traits
# bfile
# mr
mr
# gsmr2
gsmr_trait
gsmr3
Expand Down
77 changes: 77 additions & 0 deletions rsid/mr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,83 @@ function ref_prot_outcome_gsmr()
# sed 's/.vcf.gz:/\t/;s/TotalControls=/\t/;s/,TotalCases=/\t/;s/,StudyType/\t/' ieu.sample | cut -f1,3,4 > ${INF}/OpenGWAS/ieu.N
# zgrep -h SAMPLE ${INF}/OpenGWAS/${efo}.vcf.gz | cut -f1,7,8 > ${INF}/mr/gsmr/trait/${efo}.sample
#!/usr/bin/bash
#SBATCH --account CARDIO-SL0-CPU
#SBATCH --partition cardio
#SBATCH --qos=cardio
#SBATCH --mem=28800
##SBATCH --account=PETERS-SL3-CPU
##SBATCH --partition=cclake-himem
#SBATCH --job-name=_lz
##SBATCH --mem=6840
#SBATCH --output=/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF/TNFB/slurm/_lz_%A_%a.o
#SBATCH --error=/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF/TNFB/slurm/_lz_%A_%a.e
#SBATCH --time=12:00:00
function efo_update()
# too slow to use and should siwtch to gsmr_trait in mr.sb
{
export EFO_UPDATE=${INF}/OpenGWAS/efo-update.txt
sed '1d' ${EFO_UPDATE} | grep -v -e finn -e ebi-a-GCST90014325 -e ebi-a-GCST90013534 | awk -vFS="\t" '{print $6,2/(1/$2+1/$3)}' | \
while read efo N
do
export efo=${efo}
export N=${N}
awk '$21=="cis" {print $3}' ${INF}/work/INF1.METAL | sort | uniq | grep -w -f - ${INF}/work/INF1.merge.genes | \
awk -vM=1e6 "{print \$1, \$2, \$3\":\"\$4-M\"-\"\$5+M}" | \
parallel -C' ' -j15 --env INF --env efo --env N --env suffix '
(
echo -e "SNP A1 A2 freq b se p N"
bcftools query -f "%CHROM %POS %ID %ALT %REF [%AF] [%ES] [%SE] [%LP] [$N] \n" -r {3} ${INF}/OpenGWAS/${efo}.vcf.gz | \
awk "{
if(\$4<\$5) snpid=\"chr\"\$1\":\"\$2\"_\"\$4\"_\"\$5;
else snpid=\"chr\"\$1\":\"\$2\"_\"\$5\"_\"\$4
\$9=10^-\$9
print snpid, \$1, \$2, \$4, \$5, \$6, \$7, \$8, \$9, \$10
}" | sort -k2,2n -k3,3n -k8,8g| cut -d" " -f2,3 --complement | awk "a[\$1]++==0" | awk -vFS="\t" -vN=${N} "{if(\$8=\"\") \$8=N;print}"
) | \
gzip -f> ${INF}/mr/gsmr/trait/${efo}-{2}.gz
'
done
sed '1d' ${EFO_UPDATE} | grep -e finn -e ebi-a-GCST90014325 -e ebi-a-GCST90013534 | awk -vFS="\t" '{print $6,2/(1/$2+1/$3)}' | \
while read efo N
do
export efo=${efo}
export N=${N}
awk '$21=="cis" {print $3}' ${INF}/work/INF1.METAL | sort | uniq | grep -w -f - ${INF}/work/INF1.merge.genes | \
awk -vM=1e6 "{print \$1, \$2, \$3\":\"\$4-M\"-\"\$5+M}" | \
parallel -C' ' -j15 --env INF --env efo --env N --env suffix '
export uniprot={1}
export prot={2}
export id={3}
echo {1} {2} {3}
Rscript -e "
require(TwoSampleMR)
require(dplyr)
efo <- Sys.getenv(\"efo\")
id <- Sys.getenv(\"id\")
n <- Sys.getenv(\"N\")
od <- extract_outcome_data(id,efo) %>%
distinct() %>%
mutate(snpid=gap::chr_pos_a1_a2(chr,pos,effect_allele.outcome,other_allele.outcome),
effect_allele.outcome=toupper(effect_allele.outcome),
other_allele.outcome=toupper(other_allele.outcome)) %>%
select(snpid,effect_allele.outcome,other_allele.outcome,eaf.outcome,beta.outcome,se.outcome,pval.outcome,samplesize.outcome) %>%
setNames(c(\"SNP\",\"A1\",\"A2\",\"freq\",\"b\",\"se\",\"p\",\"N\")) %>%
group_by(SNP) %>%
slice(which.min(p)) %>%
data.frame()
od[is.na(od\$N),\"N\"] <- n
write.table(od,quote=FALSE,row.names=FALSE)
" | \
gzip -f> ${INF}/mr/gsmr/trait/${efo}-{2}.gz
'
done
}
function gsmr_mr_heatmap()
{
#!/usr/bin/bash
Expand Down

0 comments on commit 5d89a8b

Please sign in to comment.