Skip to content

Commit

Permalink
upload 10/04/2022
Browse files Browse the repository at this point in the history
  • Loading branch information
francoiskroll authored Apr 10, 2022
1 parent 9e191e1 commit b58b00f
Show file tree
Hide file tree
Showing 25 changed files with 6,417 additions and 1 deletion.
111 changes: 111 additions & 0 deletions GCplot/GCplotter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# takes a sequence, plots GC%

library(here)
library(data.table)
library(ggplot2)

# GC percent --------------------------------------------------------------
# rolling sum in window

GCpercent <- function(sequence, window) {

seqsplit <- strsplit(sequence, split='')[[1]]

gc1 <- as.numeric(sapply(seqsplit, function(nu){
if(nu=='A' | nu=='T') return(0)
if(nu=='C' | nu=='G') return(1)
}))

gcroll <- frollsum(gc1, window,
fill=NA, align='right')

gcpe <- gcroll/window

return(gcpe)

}

# GC plot -----------------------------------------------------------------

GCplot <- function(GCpercent) {

gcdf <- cbind(1:length(GCpercent), as.data.frame(GCpercent))
colnames(gcdf) <- c('pos', 'gcp')

gcgg <- ggplot(gcdf, aes(x=pos, y=gcp)) +
geom_line(colour='#5d5e5d', size=0.5) +
coord_cartesian(ylim=c(0,1)) +
theme_minimal() +
theme(
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.major.x=element_blank()
)


return(gcgg)

}


# GC plot – barplot -------------------------------------------------------

GCplotbar <- function(GCpercent) {

gcdf <- cbind(1:length(GCpercent), as.data.frame(GCpercent))
colnames(gcdf) <- c('pos', 'gcp')

gcgg <- ggplot(gcdf, aes(x=pos, y=gcp)) +
geom_col(colour='#5d5e5d', size=0.5) +
coord_cartesian(ylim=c(0,1)) +
theme_minimal() +
theme(
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.major.x=element_blank()
)


return(gcgg)

}


# to use ------------------------------------------------------------------

# 1- import sequence in fasta
fasta <- here('GCplot', 'prnp_window1.fa')
seq <- as.character(read.table(fasta, skip=1))

# 2- run GCpercent & GCplot, with desired window
gcp1 <- GCplotbar(GCpercent(seq, 100))

ggsave((here('GCplot', 'gc_window1.pdf')), plot=gcp1, width=65, height=18, units='mm')

# second half
# 1- import sequence in fasta
fasta <- here('GCplot', 'prnp_window2.fa')
seq <- as.character(read.table(fasta, skip=1))

# 2- run GCpercent & GCplot, with desired window
gcp2 <- GCplotbar(GCpercent(seq, 100))

ggsave((here('GCplot', 'gc_window2.pdf')), plot=gcp2, width=52, height=18, units='mm')

# window3
# 1- import sequence in fasta
fasta <- here('GCplot', 'prnp_window3.fa')
seq <- as.character(read.table(fasta, skip=1))

# 2- run GCpercent & GCplot, with desired window
gcp3 <- GCplotbar(GCpercent(seq, 100))

ggsave(here('GCplot', 'gc_window3.pdf'), plot=gcp3, width=32, height=18, units='mm')
Binary file added GCplot/gc_window1.pdf
Binary file not shown.
Binary file added GCplot/gc_window2.pdf
Binary file not shown.
Binary file added GCplot/gc_window3.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions GCplot/prnp_window1.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> prnp_window1
TTCTACGCCTCAAAATTTAAGAGTTTATGTGAAAATTCATAAATATTAATCTCAATCCAGGTTAAGCAAAATTTTTTGCTCTCCTCTTTAGAAATTTCTGGTTGCCAAAGTTCCAGAAATTGCTTCCTCATTCCTGAGCCTTTCATTTTCTCGATTTCTCCATTATGTAACGGGGAGCTGGAGCTTTGGGCCGAATTTCCAATTAAAGATGATTTTTACAGTCAATGAGCCACGTCAGGGAGCGATGGCACCCGCAGGCGGTATCAACTGATGCAAGTGTTCAAGCGAATCTCAACTCGTTTTTTCCGGTGACTCATTCCCGGCCCTGCTTGGCAGCGCTGCACCCTTTAACTTAAACCTCGGCCGGCCGCCCGCCGGGGGCACAGAGTGTGCGCCGGGCCGCGCGGCAATTGGTCCCCGCGCCGACCTCCGCCCGCGAGCGCCGCCGCTTCCCTTCCCCGCCCCGCGTCCCTCCCCCTCGGCCCCGCGCGTCGCCTGTCCTCCGAGCCAGTCGCTGACAGCCGCGGCGCCGCGAGCTTCTCCTCTCCTCACGACCGAGGCAGGTAAACGCCCGGGGTGGGAGGAACGCGGGCGGGGGCAGGGGAGCCGCGGGGGCCGAGTGAGGACCCCGGGCCTCGGGTCCCAGGCGCAAGGGTGCCCGGCCGGGCGGGGTCGGGACCCCAGTGAGGAGGGGCCGGGGGCTGCCCCGCGGGCGCGTGACGCGTCTCGGGCCTGCCCGGCTGCGCTGGTCTCCGCTCGGGTGAGGCGGCTTGGCTTCGCTTTTCAGGTTAGGAAAGCTCCCTTTACTGCGCGTTGGGGGGCTGGGGGAGCTGGCGGAGCCCCGTTAGGGAGGTCGGTGGCGCCGGGGTGTCTCAGCGCCCCCTGCACCCCGCGCGGGTCCGGCCCAGCGGGCGATCGCTGGCGCCCAGGGAACTCCGGGAGGGCCGCCAGCGGGCTCCGCAGGGCGCGGGGCGGGGAGGGGCGCCTGGGGGCCGCGGGGCTCGCGCTCCCCGCCCGTTGGCCGCCCCTCGGAGGCCGAGATCGGGGCCCAGAACGCCCCTTGGCAAGGCCTGGCGCTTCCGCGATGCCCAGAGGGTGCTTGGGGGGATGGAGAGAGGGGCGCCCGCCGGGGGAGTTCCGGGAGCCTCGGTGCCTCCCGCCGCAGCTGCAGCGTTCCTCCCGGGAGGCGGCCCAGCCCTTCATCCTCGCCGCCTGAGCTTCTCCGAGGGGGGCTGCAGCCTTGCGGCCGTTGCCACCGCCTGGAGAAGCGGCCCACGCGGACTGACGGGCGGGGGCGGGGCCTCGGGCCTCGGCGGGGGCGGGGTCCGGGGAGGCCCCACCCTCTGTTCTCCAGGGGCGGGGAGAGAGGAGCTGCAGGTCTGCGGCCTGGCCCCAGGTGCGATGGCGGACCCCAGCTTGGCCAGTCACATTCCTCCCAGTCCCCCTGGAGGGAGAACGCTGGCCATGGGGGGCTCCAAGGAACAACCAGCCTCGGATGACGACCCTTGGGTCACCGGTCTCCCCACCTGTGCGGCAGGCGCCTTCACGTTTCATTATTAAACAATGGGGAGAAATCCATGTTTACTGTCCTTTTTAAGGAATTTTTTGCTCTTCTCTTTGAGGTGGCTGTAGGAAATAGATTTTTTTTTTAACCTCGCAATTCCACCACGGTCACATCCATCCTCGCCATCGCAGAGCCACAGCTCTCCGTTTTTGTTTCCTAGCCTCCAGATTCTCACACAACACAGTGCAGTTTCACTGCTGTAATGATGAGGATCTTCATGGCCGCGTTATTTTCTTGTTCTGAGAGCATCACGGTTTAATTAGCAGTTCCCCATATGATTTGAAGTGTTTCCCGTTTCCTTAGGGAAAACTCCTGGTAGAATAGGATTAAGGATTTTTACAAATATAATTATCAAAAACATAGGAACAGGGAATTGGATAAATATGTTAAACTTCTGGAAAAATCAACAACGCTCTTAGATTTGTAGAAGAAAGGAAAAAATCACCAGTGGAAAGGAGCAATTTTACTTACACAAACACAGAGAAGGTCTTACAGTGAAAAAAAGCTAACCAGTAAGGGGAAAAGCAGGCAGAGGGGTAGGATGTGATTTGTATGTTATTTATATCTAACACAAGTCTTCCAC
2 changes: 2 additions & 0 deletions GCplot/prnp_window2.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> prnp_window2
GTTAAATAATGAACTAAAAGTCATTCATCAAGTCCATAACTTAGGGTCACATTTGTCCTTGGAGCAGGAGAAAGAGTTGTGTTCACCCTTTTCTTACTTTTGCTTTTGTCCTAAGTGCTTCAGAGAAGTACAGGGTGGCAACAGTGTTTCTACTGAGCAGCTGATACCATTGCTATGCACTCATTCATTATGCAGGAAACATTTAGTAATTTCAACATAAATATGGGACTCTGACGTTCTCCTCTTCATTTTGCAGAGCAGTCATTATGGCGAACCTTGGCTGCTGGATGCTGGTTCTCTTTGTGGCCACATGGAGTGACCTGGGCCTCTGCAAGAAGCGCCCGAAGCCTGGAGGATGGAACACTGGGGGCAGCCGATACCCGGGGCAGGGCAGCCCTGGAGGCAACCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAGCCTCATGGTGGTGGCTGGGGGCAGCCTCATGGTGGTGGCTGGGGGCAGCCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAAGGAGGTGGCACCCACAGTCAGTGGAACAAGCCGAGTAAGCCAAAAACCAACATGAAGCACATGGCTGGTGCTGCAGCAGCTGGGGCAGTGGTGGGGGGCCTTGGCGGCTACATGCTGGGAAGTGCCATGAGCAGGCCCATCATACATTTCGGCAGTGACTATGAGGACCGTTACTATCGTGAAAACATGCACCGTTACCCCAACCAAGTGTACTACAGGCCCATGGATGAGTACAGCAACCAGAACAACTTTGTGCACGACTGCGTCAATATCACAATCAAGCAGCACACGGTCACCACAACCACCAAGGGGGAGAACTTCACCGAGACCGACGTTAAGATGATGGAGCGCGTGGTTGAGCAGATGTGTATCACCCAGTACGAGAGGGAATCTCAGGCCTATTACCAGAGAGGATCGAGCATGGTCCTCTTCTCCTCTCCACCTGTGATCCTCCTGATCTCTTTCCTCATCTTCCTGATAGTGGGATGAGGAAGGTCTTCCTGTTTTCACCATCTTTCTAATCTTTTTCCAGCTTGAGGGAGGCGGTATCCACCTGCAGCCCTTTTAGTGGTGGTGTCTCACTCTTTCTTCTCTCTTTGTCCCGGATAGGCTAATCAATACCCTTGGCACTGATGGGCACTGGAAAACATAGAGTAGACCTGAGATGCTGGTCAAGCCCCCTTTGATTGAGTTCATCATGAGCCGTTGCTAATGCCAGGCCAGTAAAAGTATAACAGCAAATAA
2 changes: 2 additions & 0 deletions GCplot/prnp_window3.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> prnp_window3
CTAGACACTGAAGGCAAATCTCCTTTGTCCATTTACCTGGAAACCAGAATGATTTTGACATACAGGAGAGCTGCAGTTGTGAAAGCACCATCATCATAGAGGATGATGTAATTAAAAAATGGTCAGTGTGCAAAGAAAAGAACTGCTTGCATTTCTTTATTTCTGTCTCATAATTGTCAAAAACCAGAATTAGGTCAAGTTCATAGTTTCTGTAATTGGCTTTTGAATCAAAGAATAGGGAGACAATCTAAAAAATATCTTAGGTTGGAGATGACAGAAATATGATTGATTTGAAGTGGAAAAAGAAATTCTGTTAATGTTAATTAAAGTAAAATTATTCCCTGAATTGTTTGATATTGTCACCTAGCAGATATGTATTACTTTTCTGCAATGTTATTATTGGCTTGCACTTTGTGAGTATTCTATGTAAAAATATATATGTATATAAAATATATATTGCATAGGACAGACTTAGGAGTTTTGTTTAGAGCAGTTAACATCTGAAGTGTCTAATGCATTAACTTTTGTAAGGTACTGAATACTTAATATGTGGGAAACCCTTTTGCGTGGTCCTTAGGCTTACAATGTGCACTGAATCGTTTCATGTAAGAATCCAAAGTGGACACCATTAACAGGTCTTTGAAATATGCATGTACTTTATATTTTCTATATTTGTAACTTTGCATGTTCTTGTTTTGTTATATAAAAAAATTGTAAATGTTTAATATCTGACTGAAATTAAACGAGCGAAGATGAGCACCACC
Binary file added OPR_litsurvey/OPRLitCatalog.xlsx
Binary file not shown.
Binary file added OPR_litsurvey/OPRLitCatalogSeq.xlsx
Binary file not shown.
Binary file added OPR_litsurvey/eachOPR.xlsx
Binary file not shown.
63 changes: 63 additions & 0 deletions OPR_litsurvey/seqFromOPRpattern.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# generate sequence from OPR pattern

library(openxlsx)
library(here)
library(dplyr)
library(tidyr)


# import OPR patterns -----------------------------------------------------

oprs <- read.xlsx(here('OPR_litsurvey', 'OPRLitCatalog.xlsx'))

# any duplicates in pattern?
sum(duplicated(oprs$pattern)) # No, OK

# any duplicates in allele?
sum(duplicated(oprs$allele)) # No, OK


# import reference for each OPR -------------------------------------------

oref <- read.xlsx(here('OPR_litsurvey', 'eachOPR.xlsx'))



# function to generate full sequence from a pattern -----------------------

pattern2Seq <- function(pattern) {

oseqs <- sapply(strsplit(pattern, '/')[[1]],
function(r){
as.character(subset(oref, Rname==r, sequence))
})

fullseq <- paste(oseqs, collapse='')

return(fullseq)

}


# apply the function ------------------------------------------------------

oprs$fullsequence <- sapply(oprs$pattern,
function(pa){
pattern2Seq(pa)
})


# add OPRD1_Lee2016 sequence manually -------------------------------------
# allele OPRD1_Lee2016 is a deletion that overlaps two OPRs, add sequence manually

oprs[which(oprs$allele=='OPRD1_Lee2016'), 'fullsequence'] <-
'CCTCAGGGCGGTGGTGGCTGGGGGCAGCCTCATGGTGGTGGCTGGGGGCAGCCTCATGGTGGTGGCTGGGGGCAGCCCCATGGTGGTGGCTGGGGTCAA'

# check that all the lengths make sense
which((nchar(oprs$fullsequence) == oprs$OPR_length)==FALSE) # all correct


# write the file ----------------------------------------------------------

# will need to copy the $pattern column from original file to keep the colouring
write.xlsx(oprs, here('OPR_litsurvey', 'OPRLitCatalogSeq.xlsx'))
Loading

0 comments on commit b58b00f

Please sign in to comment.