This repository was archived by the owner on Mar 30, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathsamtools.yaml
55 lines (48 loc) · 1.58 KB
/
samtools.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
name: samtools
description: Run samtools on one or more files
# Define the resources needed for this pipeline.
resources:
zones:
- us-central1-a
- us-central1-b
- us-central1-c
- us-central1-f
- us-east1-b
- us-east1-c
- us-east1-d
# Create a data disk that is attached to the VM and destroyed when the
# pipeline terminates.
disks:
- name: datadisk
autoDelete: True
# Within the Docker container, specify a mount point for the disk.
mountPoint: /mnt/data
# Specify the Docker image to use along with the command
docker:
imageName: gcr.io/YOUR-PROJECT-ID/samtools
# The Pipelines API will create the input directory when localizing files,
# but does not create the output directory.
cmd: >
mkdir /mnt/data/output &&
find /mnt/data/input &&
for file in $(/bin/ls /mnt/data/input); do
samtools index /mnt/data/input/${file} /mnt/data/output/${file}.bai;
done
# The Pipelines API currently supports GCS paths, along with patterns (globs),
# but it doesn't directly support a list of files being passed as a single input
# parameter ("gs://bucket/foo.bam gs://bucket/bar.bam").
inputParameters:
- name: inputPath
description: Cloud Storage path or pattern to input file(s)
localCopy:
path: input/
disk: datadisk
# By specifying an outputParameter, we instruct the pipelines API to
# copy /mnt/data/output/* to the Cloud Storage location specified in
# the pipelineArgs (see below).
outputParameters:
- name: outputPath
description: Cloud Storage path for where to samtools output
localCopy:
path: output/*
disk: datadisk