Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bio/reference/ensembl-annotation/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ output:
- Ensemble GTF or GFF3 anotation file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
- branch: branch of ftp server to download cache data if required (optional; e.g. "plants")
- collection: collection of ftp server to download cache data if required (optional; e.g. "bacteria_0_collection")
19 changes: 16 additions & 3 deletions bio/reference/ensembl-annotation/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ rule get_annotation:
release="105",
build="GRCh37",
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
# branch="plants", # optional: specify branch
log:
"logs/get_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
Expand All @@ -22,11 +21,25 @@ rule get_annotation_gz:
release="105",
build="GRCh37",
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
# branch="plants", # optional: specify branch
log:
"logs/get_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-annotation"


rule get_off_branch_annotation:
output:
"refs/off_branch_annotation.gtf",
params:
url="http://ftp.ensembl.org/pub",
species="bacillus_subtilis_subsp_subtilis_str_168_gca_000009045",
release="59", # note latest release varies with url
build="ASM904v1",
branch="bacteria", # optional for off branch genomes
url="ftp://ftp.ensemblgenomes.org/pub/", # optional set ftp server source
collection="bacteria_0_collection", # optional set collection source for genome
log:
"logs/get_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-annotation"
5 changes: 4 additions & 1 deletion bio/reference/ensembl-annotation/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
elif snakemake.params.get("branch"):
branch = snakemake.params.branch + "/"

collection = ""
if snakemake.params.get("collection"):
collection = snakemake.params.collection + "/"

flavor = snakemake.params.get("flavor", "")
if flavor:
Expand All @@ -49,7 +52,7 @@


url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
url = f"{url}/{branch}release-{release}/{out_fmt}/{species}/{species.capitalize()}.{build}.{gtf_release}.{flavor}{suffix}"
url = f"{url}/{branch}release-{release}/{out_fmt}/{collection}{species}/{species.capitalize()}.{build}.{gtf_release}.{flavor}{suffix}"


try:
Expand Down
2 changes: 2 additions & 0 deletions bio/reference/ensembl-sequence/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ output:
- fasta file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
- branch: branch of ftp server to download cache data if required (optional; e.g. "plants")
- collection: collection of ftp server to download cache data if required (optional; e.g. "bacteria_0_collection")
22 changes: 18 additions & 4 deletions bio/reference/ensembl-sequence/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@ rule get_single_chromosome:
build="R64-1-1",
release="101",
chromosome=["II"], # optional: restrict to one or multiple chromosomes, for multiple see below
# branch="plants", # optional: specify branch
log:
"logs/get_genome.log",
params:
url="http://ftp.ensembl.org/pub",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-sequence"
Expand All @@ -40,7 +37,24 @@ rule get_multiple_chromosome:
build="R64-1-1",
release="101",
chromosome=["I", "II"], # optional: restrict to one or multiple chromosomes
# branch="plants", # optional: specify branch
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-sequence"


rule get_off_branch_genome:
output:
"refs/off_branch_genome.fasta",
params:
species="bacillus_subtilis_subsp_subtilis_str_168_gca_000009045",
datatype="dna",
build="ASM904v1",
release="59", # note latest release varies with url
branch="bacteria", # optional for off branch genomes
url="ftp://ftp.ensemblgenomes.org/pub/", # optional set ftp server source
collection="bacteria_0_collection", # optional set collection source for genome
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
Expand Down
17 changes: 13 additions & 4 deletions bio/reference/ensembl-sequence/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,20 @@
elif snakemake.params.get("branch"):
branch = snakemake.params.branch + "/"

collection = ""
if snakemake.params.get("collection"):
collection = snakemake.params.collection + "/"

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

spec = ("{build}" if int(release) > 75 else "{build}.{release}").format(
build=build, release=release
)
if branch == "" or branch == "grch37/":
spec = ("{build}" if int(release) > 75 else "{build}.{release}").format(
build=build, release=release
)
else:
spec = ("{build}" if int(release) > 30 else "{build}.{release}").format(
build=build, release=release
)

suffixes = ""
datatype = snakemake.params.get("datatype", "")
Expand Down Expand Up @@ -52,7 +61,7 @@

url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
spec = spec.format(build=build, release=release)
url_prefix = f"{url}/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}"
url_prefix = f"{url}/{branch}release-{release}/fasta/{collection}{species}/{datatype}/{species.capitalize()}.{spec}"

success = False
for suffix in suffixes:
Expand Down
16 changes: 16 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5564,6 +5564,14 @@ def test_ensembl_sequence_chromosomes():
)


@skip_if_not_modified
def test_ensembl_sequence_off_branch():
run(
"bio/reference/ensembl-sequence",
["snakemake", "--cores", "1", "refs/off_branch_genome.fasta", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_ensembl_sequence_chromosome_old_release():
run(
Expand Down Expand Up @@ -5597,6 +5605,14 @@ def test_ensembl_annotation_gtf_gz():
)


@skip_if_not_modified
def test_ensembl_off_branch_annotation_gtf():
run(
"bio/reference/ensembl-annotation",
["snakemake", "--cores", "1", "refs/off_branch_annotation.gtf", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_ensembl_variation():
run(
Expand Down