diff --git a/.gitignore b/.gitignore index ebc3ff2..2670630 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ *.ipynb_checkpoints* *.env -*credentials \ No newline at end of file +*credentials +work/ +.nextflow/ +.nextflow.log* \ No newline at end of file diff --git a/bin/python_script.py b/bin/python_script.py index 0e5c075..0beb9dd 100644 --- a/bin/python_script.py +++ b/bin/python_script.py @@ -5,13 +5,14 @@ def main(): parser = argparse.ArgumentParser(description='Test write out a file.') parser.add_argument('input_filename') + parser.add_argument('variable') parser.add_argument('output_filename') args = parser.parse_args() df = pd.DataFrame() - df["OUTPUT"] = "1" - df["OUTPUT2"] = "2" + row = {"INPUT": args.input_filename, "VARIABLE": args.variable, "OUTPUT": args.output_filename} + df = pd.concat([df, pd.DataFrame([row])]) # saving file df.to_csv(args.output_filename, sep="\t", index=False) diff --git a/nf_workflow.nf b/nf_workflow.nf index dee894f..f55bccf 100644 --- a/nf_workflow.nf +++ b/nf_workflow.nf @@ -1,44 +1,58 @@ #!/usr/bin/env nextflow +// tested on N E X T F L O W ~ version 24.04.4 nextflow.enable.dsl=2 +nextflow.preview.output = true -params.input_spectra = "README.md" +params.foo = "Hello" +params.bar = "README.md" //This publish dir is mostly useful when we want to import modules in other workflows, keep it here usually don't change it -params.publishdir = "$baseDir/nf_output" -TOOL_FOLDER = "$baseDir/bin" +params.publish_dir = "./nf_output" +TOOL_FOLDER = "$moduleDir/bin" process processDataPython { - publishDir "$params.publishdir", mode: 'copy' + // publishDir "$params.publish_dir", mode: 'copy' // it is better to use the publishDir in the workflow for better hierarchy management as the publishDir is the workflow can be variable conda "$TOOL_FOLDER/conda_env.yml" input: file input + val variable output: file 'python_output.tsv' """ - python $TOOL_FOLDER/python_script.py $input python_output.tsv + python $TOOL_FOLDER/python_script.py $input $variable python_output.tsv """ } // TODO: This main will define the workflow that can then be imported, not really used right now, but can be workflow Main{ take: - input_spectra - publishDir + input_map main: - data_ch = Channel.fromPath(params.input_spectra) + data_ch = Channel.fromPath(input_map.bar) + var_ch = Channel.of(input_map.foo) // Outputting Python - processDataPython(data_ch) + results = processDataPython(data_ch, var_ch) + publish: + results >> input_map.publish_dir + + emit: + results } workflow { - _ = Main(params.input_spectra, params.publishDir) - + input_map = [foo: params.foo, bar: params.bar, publish_dir: params.publish_dir] + _ = Main(input_map) // Alternatively we can put everyhthing in the main from the above right here } + +output{ + mode 'copy' + // ignoreErrors true +} \ No newline at end of file diff --git a/nf_workflow_importer.nf b/nf_workflow_importer.nf index 69cb9eb..0d631f1 100644 --- a/nf_workflow_importer.nf +++ b/nf_workflow_importer.nf @@ -1,23 +1,67 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 +nextflow.preview.output = true include {Main as Module1} from './nf_workflow.nf' -params.input_spectra = "README.md" +params.foo = "Hola" +params.bar = "README.md" //This publish dir is mostly useful when we want to import modules in other workflows, keep it here usually don't change it -params.publishDir = "$baseDir/nf_output" -TOOL_FOLDER = "$baseDir/bin" +params.publish_dir = "./nf_output" +TOOL_FOLDER = "$moduleDir/bin" + + +process processExtra { + // publishDir "$params.publish_dir", mode: 'copy' // it is better to use the publishDir in the workflow for better hierarchy management as the publishDir is the workflow can be variable + + conda "$TOOL_FOLDER/conda_env.yml" + + input: + file input + val variable + + output: + file 'outer/python_output_*.tsv' + + """ + mkdir outer + python $TOOL_FOLDER/python_script.py $input $variable 'outer/python_output_${variable}.tsv' + """ +} workflow Main{ take: - a - b + input_map main: - Module1(a, b) + input_map_copy_adjusted = input_map.getClass().newInstance(input_map) + input_map_copy_adjusted.publish_dir = input_map.publish_dir + "/module1" + res_module1 = Module1(input_map_copy_adjusted) + + data_ch = Channel.fromPath(input_map.bar).collect() + var_ch = Channel.of("test1", "test2") + results_extra = processExtra(data_ch, var_ch) + + + combined_results = res_module1.concat(results_extra) + + results = combined_results.collectFile(name: "${input_map.publish_dir}/python_output2.tsv", newLine: false, keepHeader: true, skip: 1) + + publish: + results_extra >> input_map.publish_dir + + emit: + results } + workflow { - _ = Main(params.input_spectra, params.publishDir) + input_map = [foo: params.foo, bar:params.bar, publish_dir: params.publish_dir] + _ = Main(input_map) +} + +output{ + mode 'copy' + // ignoreErrors true } \ No newline at end of file