diff --git a/docs/usecases/bids-fmriprep-workflow-NP-reproduce.sh b/docs/usecases/bids-fmriprep-workflow-NP-reproduce.sh new file mode 100755 index 000000000..b205bb583 --- /dev/null +++ b/docs/usecases/bids-fmriprep-workflow-NP-reproduce.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +set -eu + +export PS1="$ " +export PS4="> " + +set -x +setup=${1:-pip} +cd "$(mktemp -d ${TMPDIR:-/tmp}/rm-XXXXXXX)" + +mkdir HOME +cp ~/.gitconfig HOME/ # needed by datalad et al +cp ~/.freesurfer-license HOME/ 2>&1 || echo "No FreeSurfer license copied" + +export HOME=$PWD/HOME + +trap "echo Finished for setup=$setup under PWD=`pwd`" SIGINT SIGHUP SIGABRT EXIT + +py=3 +d=venv$py; +( +virtualenv --python=python$py --system-site-packages $d +) 2>&1 | tee venv-setup.log + +source "$d/bin/activate" # should be outside of () to take effect + +( +case "$setup" in + kyle1) + # Kyle's setup from https://github.com/ReproNim/reproman/issues/511#issuecomment-632776223 + pip install git+http://github.com/datalad/datalad@53765be03838ee8b07d4b44a2a27bbbe259fe160 + # This one seems to be for older datalad + pip install git+http://github.com/ReproNim/reproman@a9c9842302cad707bbdaf56fa4050fe0136ffe23 + # with unbuffered io: + #pip install git+http://github.com/ReproNim/reproman@4f05f3aa96c7ab550aa218d5de705ea3cfe5f600 + ;; + kyle1-ps4) + # Like above but for reproman have #513 merged for PS4 details + pip install git+http://github.com/datalad/datalad@53765be03838ee8b07d4b44a2a27bbbe259fe160 + pip install git+http://github.com/ReproNim/reproman@setup-kyle1-ps4 + ;; + debug1) # the "default + # Current master of datalad + pip install git+http://github.com/datalad/datalad@0.13.0rc1-109-g7f24491b2 + # ReproMan PR https://github.com/ReproNim/reproman/pull/506 with support of datalad master + pip install git+http://github.com/kyleam/niceman@v0.2.1-80-g45baab0 + ;; + reproman-master) + pip install 'git+http://github.com/ReproNim/reproman.git#egg=reproman[datalad]' + ;; + pip) # should be our target -- install via pip everything and it must be working + # until we release reproman with [datalad] - do manually + pip install 'datalad>=0.12.7' reproman;; + *) + echo "Unknown setup $setup" >&2 + exit 1 + ;; +esac + +# in either of the cases default datalad-container should be ok +pip install datalad-container + +# Actual script to run from the current state of the PR +# https://github.com/ReproNim/reproman/pull/438 +wget https://raw.githubusercontent.com/ReproNim/reproman/b70144e993660c271831e4ea8d2f4bb436bb7eeb/docs/usecases/bids-fmriprep-workflow-NP.sh + +# Ensure that we have local resource for default execution +) 2>&1 | tee install.log + +( + reproman create -t shell local +) 2>&1 | tee configure.log + +( + BIDS_APPS=mriqc FS_LICENSE=bogus RM_ORC=datalad-pair bash ./bids-fmriprep-workflow-NP.sh output +) 2>&1 | tee run.log diff --git a/docs/usecases/bids-fmriprep-workflow-NP.sh b/docs/usecases/bids-fmriprep-workflow-NP.sh new file mode 100755 index 000000000..3f0b4a26c --- /dev/null +++ b/docs/usecases/bids-fmriprep-workflow-NP.sh @@ -0,0 +1,292 @@ +#!/bin/bash +#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +#ex: set sts=4 ts=4 sw=4 et: +# +# This script is intended to demonstrate a sample workflow on a BIDS +# dataset using mriqc, fmriprep, and custom analysis pipeline, mimicing the +# steps presented in an fmriprep paper currently under review but using +# DataLad, ReproNim/containers, and ReproNim. +# +# COPYRIGHT: Yaroslav Halchenko 2019 +# +# LICENSE: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Description +# +# Environment variables +# - RUNNER - datalad or reproman (default: reproman) +# - Options to reproman run invocation +# - RM_ORC - orchestrator to use (default: datalad-pair-run) +# - RM_RESOURCE - resource to use (default: local) +# - RM_SUBMITTED - submitter to use (default: local) +# - BIDS_APPS - if set -- ,-separated list of apps to consider (out of +# mriqc and fmriprep ATM) +# - FS_LICENSE - filename or content of the license for freesurfer +# - CONTAINERS_REPO - an alternative (could be local) location for containers +# repository. +# Make sure that you have got the images for specific versions we freeze to below: +# datalad get images/bids/bids-mriqc--0.15.0.sing images/bids/bids-fmriprep--1.4.1.sing +# +# - INPUT_DATASET_REPO - an alternative (could be local) location for input +# BIDS dataset +# +# Note that if FS_LICENSE does not point to a file and is not empty, it would +# assume to contain the license content. If you are not interested in running +# only MRIQC, just set it to some bogus value. +# So to run only mriqc if you don't have freesurfer license, do +# BIDS_APPS=mriqc FS_LICENSE=bogus ... +# +# Sample invocations +# - Pointing to the existing local clones of input repositories for faster +# "get" +# RUNNER=datalad \ +# FS_LICENSE=~/.freesurfer-license \ +# CONTAINERS_REPO=~/proj/repronim/containers \ +# INPUT_DATASET_REPO=$PWD/bids-fmriprep-workflow-NP/ds000003-demo \ +# ./bids-fmriprep-workflow-NP.sh bids-fmriprep-workflow-NP/out2 +# + +set -eu +export PS4='ex:$? > ' +set -x + +# $STUDY is a variable used in a paper this workflow mimics +STUDY="$1" + +# Which runner - reproman or datalad +: "${RUNNER:=reproman}" + +# Define common parameters for the reproman run + +# ReproMan orchestrator to be used - determines how data/results would be +# transferred and execution protocoled +# Use reproman run --list orchestrators to get an updated list +: "${RM_ORC:=datalad-pair-run}" # ,plain,datalad-pair,datalad-local-run + +# Which batch processing system supported by ReproMan will be used +# Use reproman run --list submitters to get an updated list +# RM_SUB=condor,pbs,local + +# Which resource to use +# It would require (if was not done before) to configure +# a resource where execution will happen. For now will just use smaug below. +# TODO: provide pointers to doc ( ;-) ) + +# On discovery resource use PBS, and +# Necessary modules to be loaded in that session: +# - singularity/2.4.2 +# Necessary installations/upgrades to be done (TODO: contact John) +# - datalad (0.11.6, TODO: release first) +# - datalad-container + +: "${RM_RESOURCE:=local}" +: "${RM_SUB:=local}" + +# TODO: at reproman level allow to specify ORC and SUB for a resource, so there would +# be no need to specify for each invocation. Could be a new (meta) resource such as +# "smaug-condor" which would link smaug physical resource with those parameters +# TODO: point to the issue in ReproMan + + +unknown_runner () { + echo "ERROR: Unknown runner $RUNNER. Known reproman and datalad" >&2 + exit 1 +} + +# Common invocation of ReproMan +# TODO: just make it configurable per project/env? +reproman_run () { + reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" "$@" +} + + +# TODO: see where such functionality could be provided within reproman, so could +# be easily reused +get_participant_ids () { + # Would go through provided paths and current directory to find participants.tsv + # and return participant ids, comma-separated + for p in "$@" .; do + f="$p/participants.tsv" + if [ -e "$f" ]; then + awk -F'\t' '/^sub-/{print $1}' "$f" \ + | sed 's/sub-//' \ + | tr '\n' ',' \ + | sed -e 's/,$//g' + break + fi + done +} + +function run_bids_app() { + app="$1"; shift + do_group="$1"; shift + app_args=( "$@" -w work ) + + if [ -n "${BIDS_APPS:=}" ] && ! echo "$BIDS_APPS" | grep -q "\<$app\>" ; then + echo "I: skipping $app since BIDS_APPS=$BIDS_APPS" + return + fi + outds=data/$app + container=containers/bids-$app + app_runner_args=( --input containers/licenses --output "$outds" ) + + mkdir -p work + grep -e '^work$' .gitignore \ + || { echo "work" >> .gitignore; datalad save -m "Ignore work directory"; } + + # set -x + # Create target output dataset + # TODO: per app specific configuration? some might have too heavy xml etc + # files + [ -e "$outds" ] || datalad create -d . -c text2git "$outds" + + case "$RUNNER" in + reproman) + # Serial run + # reproman_run --jp container=containers/bids-mriqc "${RUNNER_ARGS[@]}" "${MRIQC_ARGS[@]}" + # Parallel requires two runs -- parallel across participants: + reproman_run --jp "container=$container" "${app_runner_args[@]}" \ + --input "data/bids/sub-{p[pl]}" \ + --bp "pl=$(get_participant_ids data/bids)" \ + data/bids '{outputs}' participant --participant_label '{p[pl]}' "${app_args[@]}" + case "$do_group" in + 1|yes) + # serial for the group + reproman_run --jp "container=$container" "${app_runner_args[@]}" \ + --input "data/bids" \ + '{inputs}' '{outputs}' group "${app_args[@]}" + ;; + 0|no) + ;; + *) + echo "Unknown value APP_GROUP=$do_group" >&2 + exit 1 + ;; + esac + ;; + datalad) + # Note: this is not in effect! TODO + case "$do_group" in + 1|yes) app_args=( group "${app_args[@]}" ) ;; + 0|no) ;; + *) exit 1 ;; + esac + datalad containers-run -n "$container" "${app_runner_args[@]}" \ + '{inputs}' '{outputs}' participant "${app_args[@]}" + ;; + *) unknown_runner;; + esac + # set +x +} + +# +# Check asap for licenses since fmriprep needs one for FreeSurfer +# + +if [ -z "${FS_LICENSE:-}" ]; then + if [ -e "${FREESURFER_HOME:-/XXXX}/.license" ]; then + FS_LICENSE="${FREESURFER_HOME}/.license" + else + cat >&2 <| "$CONTAINERS_FS_LICENSE" +fi +datalad save -d . -m "Added licenses/freesurfer (needed for fmriprep)" containers/licenses/ +( cd containers; git annex metadata licenses/freesurfer -s distribution-restrictions=sensitive; ) + + +# possibly downgrade versions to match the ones used in the "paper" +containers/scripts/freeze_versions --save-dataset=^ \ + poldracklab-ds003-example=0.0.3 \ + bids-mriqc=0.15.0 \ + bids-fmriprep=1.4.1 + +# +# Install dataset to be analyzed (no data - analysis might run in the cloud or on HPC) +# +# In original paper name for the dataset was used as is, and placed at the +# top level. Here, to make this demo easier to apply to other studies, +# and also check on other datasets, we install input dataset under a generic +# "data/bids" path. "data/" will also collect all other derivatives etc +mkdir data + +# For now we will work with minimized version with only 2 subjects +# datalad install -d . -s ///openneuro/ds000003 data/bids +datalad install -d . -s "${INPUT_DATASET_REPO:-https://github.com/ReproNim/ds000003-demo}" data/bids + +# +# Execution. +# +# That is where access to the powerful resource (HPC) etc would be useful. +# Every of those containerized apps might need custom options to be added. +# +# + +# datalad save -d . -m "Due to https://github.com/datalad/datalad/issues/3591" data/mriqc + + +run_bids_app mriqc yes +# note: not using $CONTAINERS_FS_LICENSE just to make things a bit more explicit +run_bids_app fmriprep no --fs-license-file=containers/licenses/freesurfer + +# 3. poldracklab-ds003-example -- analysis + +# X. Later? visualization etc - used nilearn + + +exit 0 # done for now + + +reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" \ + --bp 'thing=thing-*' \ + --input '{p[thing]}' \ + sh -c 'cat {p[thing]} {p[thing]} >doubled-{p[thing]}' + + diff --git a/docs/usecases/simple_kwyk.sh b/docs/usecases/simple_kwyk.sh new file mode 100755 index 000000000..c62bd402b --- /dev/null +++ b/docs/usecases/simple_kwyk.sh @@ -0,0 +1,42 @@ +#!/bin/bash +#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: t -*- +#ex: set sts=4 ts=4 sw=4 noet: +# +# +# COPYRIGHT: Yaroslav Halchenko 2019 +# +# LICENSE: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +set -eu + +cd $(mktemp -d --tmpdir=. ds-XXXX) +pwd +datalad create . +datalad install -d . ///repronim/containers +datalad install -d . -s https://github.com/ReproNim/ds000003-demo data/bids + +mkdir data/kwyked +datalad containers-run \ + --input data/bids/sub-02/anat/sub-02_T1w.nii.gz \ + --output data/kwyked/sub-02_T1w \ + -n containers/neuronets-kwyk \ + '{inputs}' '{outputs}' diff --git a/reproman/support/jobs/job_templates/submission/local.template b/reproman/support/jobs/job_templates/submission/local.template index 029567103..f389978fe 100755 --- a/reproman/support/jobs/job_templates/submission/local.template +++ b/reproman/support/jobs/job_templates/submission/local.template @@ -10,7 +10,8 @@ then else if test -z $(which parallel) then - echo "parallel (moreutils) is required to concurrent jobs locally" >&2 + echo "parallel is required to concurrent jobs locally" >&2 + echo "install parallel on Debian from moreutils or on macOS using MacPorts or Homebrew" >&2 exit 1 fi