Wang-Bioinformatics-Lab · mwang87 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
 *.ipynb_checkpoints*
 *.env
-*credentials
+*credentials
+work/
+.nextflow/
+.nextflow.log*
diff --git a/bin/python_script.py b/bin/python_script.py
@@ -5,13 +5,14 @@
 def main():
     parser = argparse.ArgumentParser(description='Test write out a file.')
     parser.add_argument('input_filename')
+    parser.add_argument('variable')
     parser.add_argument('output_filename')
 
     args = parser.parse_args()
 
     df = pd.DataFrame()
-    df["OUTPUT"] = "1"
-    df["OUTPUT2"] = "2"
+    row = {"INPUT": args.input_filename, "VARIABLE": args.variable, "OUTPUT": args.output_filename}
+    df = pd.concat([df, pd.DataFrame([row])])
 
     # saving file
     df.to_csv(args.output_filename, sep="\t", index=False)

diff --git a/nf_workflow.nf b/nf_workflow.nf
@@ -1,44 +1,58 @@
 #!/usr/bin/env nextflow
+// tested on N E X T F L O W   ~  version 24.04.4
 nextflow.enable.dsl=2
+nextflow.preview.output = true
 
-params.input_spectra = "README.md"
+params.foo = "Hello"
+params.bar = "README.md"
 
 //This publish dir is mostly  useful when we want to import modules in other workflows, keep it here usually don't change it
-params.publishdir = "$baseDir/nf_output"
-TOOL_FOLDER = "$baseDir/bin"
+params.publish_dir = "./nf_output"
+TOOL_FOLDER = "$moduleDir/bin"
 
 process processDataPython {
-    publishDir "$params.publishdir", mode: 'copy'
+    // publishDir "$params.publish_dir", mode: 'copy' // it is better to use the publishDir in the workflow for better hierarchy management as the publishDir is the workflow can be variable
 
     conda "$TOOL_FOLDER/conda_env.yml"
 
     input:
     file input 
+    val variable
 
     output:
     file 'python_output.tsv'
 
     """
-    python $TOOL_FOLDER/python_script.py $input python_output.tsv
+    python $TOOL_FOLDER/python_script.py $input $variable python_output.tsv
     """
 }
 
 // TODO: This main will define the workflow that can then be imported, not really used right now, but can be
 workflow Main{
     take:
-    input_spectra
-    publishDir
+    input_map
 
     main:
-    data_ch = Channel.fromPath(params.input_spectra)
+    data_ch = Channel.fromPath(input_map.bar)
+    var_ch = Channel.of(input_map.foo)
 
     // Outputting Python
-    processDataPython(data_ch)
+    results = processDataPython(data_ch, var_ch)
 
+    publish:
+    results >> input_map.publish_dir
+
+    emit:
+    results
 }
 
 workflow {
-    _ = Main(params.input_spectra, params.publishDir)
-
+    input_map = [foo: params.foo, bar: params.bar, publish_dir: params.publish_dir]
+    _ = Main(input_map)
     // Alternatively we can put everyhthing in the main from the above right here
 }
+
+output{
+    mode 'copy'
+    // ignoreErrors true
+}
diff --git a/nf_workflow_importer.nf b/nf_workflow_importer.nf
@@ -1,23 +1,67 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl=2
+nextflow.preview.output = true
 
 include {Main as Module1} from './nf_workflow.nf'
 
-params.input_spectra = "README.md"
+params.foo = "Hola"
+params.bar = "README.md"
 
 //This publish dir is mostly  useful when we want to import modules in other workflows, keep it here usually don't change it
-params.publishDir = "$baseDir/nf_output"
-TOOL_FOLDER = "$baseDir/bin"
+params.publish_dir = "./nf_output"
+TOOL_FOLDER = "$moduleDir/bin"
+
+
+process processExtra {
+    // publishDir "$params.publish_dir", mode: 'copy' // it is better to use the publishDir in the workflow for better hierarchy management as the publishDir is the workflow can be variable
+
+    conda "$TOOL_FOLDER/conda_env.yml"
+
+    input:
+    file input 
+    val variable
+
+    output:
+    file 'outer/python_output_*.tsv'
+
+    """
+    mkdir outer
+    python $TOOL_FOLDER/python_script.py $input $variable 'outer/python_output_${variable}.tsv'
+    """
+}
 
 workflow Main{
     take:
-    a
-    b
+    input_map
 
     main:
-    Module1(a, b)
+    input_map_copy_adjusted = input_map.getClass().newInstance(input_map)
+    input_map_copy_adjusted.publish_dir = input_map.publish_dir + "/module1"
+    res_module1 = Module1(input_map_copy_adjusted)
+
+    data_ch = Channel.fromPath(input_map.bar).collect()
+    var_ch = Channel.of("test1", "test2")
+    results_extra = processExtra(data_ch, var_ch)
+
+
+    combined_results = res_module1.concat(results_extra)
+
+    results = combined_results.collectFile(name: "${input_map.publish_dir}/python_output2.tsv", newLine: false, keepHeader: true, skip: 1)
+
+    publish:
+    results_extra >> input_map.publish_dir
+
+    emit:
+    results
 }
+
 workflow {
-    _ = Main(params.input_spectra, params.publishDir)
+    input_map = [foo: params.foo, bar:params.bar, publish_dir: params.publish_dir]
+    _ = Main(input_map)
 
+}
+
+output{
+    mode 'copy'
+    // ignoreErrors true
 }