diff --git a/docs/process.md b/docs/process.md index db7a594aca..592ce14064 100644 --- a/docs/process.md +++ b/docs/process.md @@ -471,22 +471,46 @@ workflow { } ``` -The `stageAs` option allows you to control how the file should be named in the task work directory. You can provide a specific name or a pattern as described in the [Multiple input files](#multiple-input-files) section: +Available options: -```groovy -process foo { +`arity` +: :::{versionadded} 23.09.0-edge + ::: +: Specify the number of expected files. Can be a number or a range: + + ```groovy input: - path x, stageAs: 'data.txt' + path('one.txt', arity: '1') // exactly one file is expected + path('pair_*.txt', arity: '2') // exactly two files are expected + path('many_*.txt', arity: '1..*') // one or more files are expected + path('optional.txt', arity: '0..1') // zero or one file is expected + ``` - """ - your_command --in data.txt - """ -} + When a task is created, Nextflow will check whether the received files for each path input match the declared arity, and fail if they do not. -workflow { - foo('/some/data/file.txt') -} -``` + An input is *nullable* if the arity is exactly `0..1`. Nullable inputs can accept "null" files from nullable `path` outputs. + + You can also set `arity: true` to infer the arity from the file pattern: `1..*` if the pattern is a glob, `1` otherwise. + +`stageAs` +: Specify how the file should be named in the task work directory: + + ```groovy + process foo { + input: + path x, stageAs: 'data.txt' + + """ + your_command --in data.txt + """ + } + + workflow { + foo('/some/data/file.txt') + } + ``` + + Can be a name or a pattern as described in the [Multiple input files](#multiple-input-files) section. ### Multiple input files @@ -912,6 +936,25 @@ In the above example, the `randomNum` process creates a file named `result.txt` Available options: +`arity` +: :::{versionadded} 23.09.0-edge + ::: +: Specify the number of expected files. Can be a number or a range: + + ```groovy + output: + path('one.txt', arity: '1') // exactly one file is expected + path('pair_*.txt', arity: '2') // exactly two files are expected + path('many_*.txt', arity: '1..*') // one or more files are expected + path('optional.txt', arity: '0..1') // zero or one file is expected + ``` + + When a task completes, Nextflow will check whether the produced files for each path output match the declared arity, and fail if they do not. If the arity is *single* (i.e. either `1` or `0..1`), a single file will be emitted. Otherwise, a list will always be emitted, even if only one file is produced. + + An output is *nullable* if the arity is exactly `0..1`. Whereas optional outputs emit nothing if the output file does not exist, nullable outputs emit a "null" file that can only be accepted by a nullable `path` input. + + You can also set `arity: true` to infer the arity from the file pattern: `1..*` if the pattern is a glob, `1` otherwise. + `followLinks` : When `true` target files are return in place of any matching symlink (default: `true`) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 07fe7a7ef6..8c468898e4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -103,6 +103,7 @@ import nextflow.util.CacheHelper import nextflow.util.Escape import nextflow.util.LockManager import nextflow.util.LoggerHelper +import nextflow.util.NullPath import nextflow.util.TestOnly import org.codehaus.groovy.control.CompilerConfiguration import org.codehaus.groovy.control.customizers.ASTTransformationCustomizer @@ -1567,6 +1568,10 @@ class TaskProcessor { def path = param.glob ? splitter.strip(filePattern) : filePattern def file = workDir.resolve(path) def exists = param.followLinks ? file.exists() : file.exists(LinkOption.NOFOLLOW_LINKS) + if( !exists && param.isNullable() ) { + file = new NullPath(path) + exists = true + } if( exists ) result = [file] else @@ -1576,7 +1581,7 @@ class TaskProcessor { if( result ) allFiles.addAll(result) - else if( !param.optional ) { + else if( !param.optional && (!param.arity || param.arity.min > 0) ) { def msg = "Missing output file(s) `$filePattern` expected by process `${safeTaskName(task)}`" if( inputsRemovedFlag ) msg += " (note: input files are not included in the default matching set)" @@ -1584,7 +1589,10 @@ class TaskProcessor { } } - task.setOutput( param, allFiles.size()==1 ? allFiles[0] : allFiles ) + if( !param.isValidArity(allFiles) ) + throw new IllegalArgumentException("Incorrect number of output files for process `${safeTaskName(task)}` -- expected ${param.arity}, found ${allFiles.size()}") + + task.setOutput( param, allFiles.size()==1 && param.isSingle() ? allFiles[0] : allFiles ) } @@ -1782,7 +1790,10 @@ class TaskProcessor { return new FileHolder(source, result) } - protected Path normalizeToPath( obj ) { + protected Path normalizeToPath( obj, boolean nullable=false ) { + if( obj instanceof NullPath && !nullable ) + throw new ProcessUnrecoverableException("Path value cannot be null") + if( obj instanceof Path ) return obj @@ -1805,7 +1816,7 @@ class TaskProcessor { throw new ProcessUnrecoverableException("Not a valid path value: '$str'") } - protected List normalizeInputToFiles( Object obj, int count, boolean coerceToPath, FilePorter.Batch batch ) { + protected List normalizeInputToFiles( Object obj, int count, boolean coerceToPath, boolean nullable, FilePorter.Batch batch ) { Collection allItems = obj instanceof Collection ? obj : [obj] def len = allItems.size() @@ -1815,7 +1826,7 @@ class TaskProcessor { for( def item : allItems ) { if( item instanceof Path || coerceToPath ) { - def path = normalizeToPath(item) + def path = normalizeToPath(item, nullable) def target = executor.isForeignFile(path) ? batch.addToForeign(path) : path def holder = new FileHolder(target) files << holder @@ -1828,10 +1839,10 @@ class TaskProcessor { return files } - protected singleItemOrList( List items, ScriptType type ) { + protected singleItemOrList( List items, boolean single, ScriptType type ) { assert items != null - if( items.size() == 1 ) { + if( items.size() == 1 && single ) { return makePath(items[0],type) } @@ -2028,10 +2039,17 @@ class TaskProcessor { for( Map.Entry entry : secondPass.entrySet() ) { final param = entry.getKey() final val = entry.getValue() - final fileParam = param as FileInParam - final normalized = normalizeInputToFiles(val, count, fileParam.isPathQualifier(), batch) - final resolved = expandWildcards( fileParam.getFilePattern(ctx), normalized ) - ctx.put( param.name, singleItemOrList(resolved, task.type) ) + final normalized = normalizeInputToFiles(val, count, param.isPathQualifier(), param.isNullable(), batch) + final resolved = expandWildcards( param.getFilePattern(ctx), normalized ) + + if( !param.isValidArity(resolved) ) { + final msg = param.isNullable() + ? "expected a nullable file (0..1) but a list was provided" + : "expected ${param.arity}, found ${resolved.size()}" + throw new IllegalArgumentException("Incorrect number of input files for process `${safeTaskName(task)}` -- ${msg}") + } + + ctx.put( param.name, singleItemOrList(resolved, param.isSingle(), task.type) ) count += resolved.size() for( FileHolder item : resolved ) { Integer num = allNames.getOrCreate(item.stageName, 0) +1 diff --git a/modules/nextflow/src/main/groovy/nextflow/script/params/ArityParam.groovy b/modules/nextflow/src/main/groovy/nextflow/script/params/ArityParam.groovy new file mode 100644 index 0000000000..0b61faebc4 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/script/params/ArityParam.groovy @@ -0,0 +1,110 @@ +/* + * Copyright 2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script.params + +import groovy.transform.CompileStatic +import groovy.transform.EqualsAndHashCode + +/** + * Implements an arity option for process inputs and outputs. + * + * @author Ben Sherman + */ +@CompileStatic +trait ArityParam { + + Range arity + + Range getArity() { arity } + + def setArity(String value) { + if( value.isInteger() ) { + def n = value.toInteger() + this.arity = new Range(n, n) + return this + } + + def tokens = value.tokenize('..') + if( tokens.size() == 2 ) { + def min = tokens[0] + def max = tokens[1] + if( min.isInteger() && (max == '*' || max.isInteger()) ) { + this.arity = new Range( + min.toInteger(), + max == '*' ? Integer.MAX_VALUE : max.toInteger() + ) + return this + } + } + + throw new IllegalArgumentException("Path arity should be a number (e.g. '1') or a range (e.g. '1..*')") + } + + /** + * Determine whether a null file is allowed. + */ + boolean isNullable() { + return arity && arity.min == 0 && arity.max == 1 + } + + /** + * Determine whether a single output file should be unwrapped. + */ + boolean isSingle() { + return !arity || arity.max == 1 + } + + /** + * Determine whether a collection of files has valid arity. + * + * If the param is nullable, there should be exactly one file (either + * a real file or a null file) + * + * @param files + */ + boolean isValidArity(Collection files) { + if( !arity ) + return true + + return isNullable() + ? files.size() == 1 + : arity.contains(files.size()) + } + + @EqualsAndHashCode + static class Range { + int min + int max + + Range(int min, int max) { + this.min = min + this.max = max + } + + boolean contains(int value) { + min <= value && value <= max + } + + @Override + String toString() { + min == max + ? min.toString() + : "${min}..${max == Integer.MAX_VALUE ? '*' : max}".toString() + } + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/params/FileInParam.groovy b/modules/nextflow/src/main/groovy/nextflow/script/params/FileInParam.groovy index b52f8dad39..ae0ac4da5f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/params/FileInParam.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/params/FileInParam.groovy @@ -28,7 +28,7 @@ import nextflow.script.TokenVar */ @Slf4j @InheritConstructors -class FileInParam extends BaseInParam implements PathQualifier { +class FileInParam extends BaseInParam implements ArityParam, PathQualifier { protected filePattern @@ -153,4 +153,14 @@ class FileInParam extends BaseInParam implements PathQualifier { return this } + def setArity(boolean value) { + if( !value ) + return + + final str = filePattern?.contains('*') || filePattern?.contains('?') + ? '1..*' + : '1' + setArity(str) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/params/FileOutParam.groovy b/modules/nextflow/src/main/groovy/nextflow/script/params/FileOutParam.groovy index e305c46c3e..085e662449 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/params/FileOutParam.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/params/FileOutParam.groovy @@ -33,7 +33,7 @@ import nextflow.util.BlankSeparatedList */ @Slf4j @InheritConstructors -class FileOutParam extends BaseOutParam implements OutParam, OptionalParam, PathQualifier { +class FileOutParam extends BaseOutParam implements OutParam, ArityParam, OptionalParam, PathQualifier { /** * ONLY FOR TESTING DO NOT USE @@ -276,4 +276,14 @@ class FileOutParam extends BaseOutParam implements OutParam, OptionalParam, Path (FileOutParam)super.setOptions(opts) } + def setArity(boolean value) { + if( !value ) + return + + final str = filePattern?.contains('*') || filePattern?.contains('?') + ? '1..*' + : '1' + setArity(str) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/NullPath.groovy b/modules/nextflow/src/main/groovy/nextflow/util/NullPath.groovy new file mode 100644 index 0000000000..9ef1ab23c1 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/NullPath.groovy @@ -0,0 +1,41 @@ +/* + * Copyright 2013-2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.nio.file.Path +import java.nio.file.Paths + +import groovy.transform.EqualsAndHashCode +import groovy.transform.PackageScope +import groovy.util.logging.Slf4j + +/** + * + * @author Ben Sherman + */ +@EqualsAndHashCode +@Slf4j +class NullPath implements Path { + + @PackageScope + @Delegate + Path delegate + + NullPath(String path) { + delegate = Paths.get(path) + } +} \ No newline at end of file diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy index ab2f3bc511..71bafeb50d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy @@ -185,13 +185,19 @@ class TaskProcessorTest extends Specification { when: def list = [ FileHolder.get(path1, 'x_file_1') ] - def result = processor.singleItemOrList(list, ScriptType.SCRIPTLET) + def result = processor.singleItemOrList(list, true, ScriptType.SCRIPTLET) then: result.toString() == 'x_file_1' + when: + list = [ FileHolder.get(path1, 'x_file_1') ] + result = processor.singleItemOrList(list, false, ScriptType.SCRIPTLET) + then: + result*.toString() == ['x_file_1'] + when: list = [ FileHolder.get(path1, 'x_file_1'), FileHolder.get(path2, 'x_file_2'), FileHolder.get(path3, 'x_file_3') ] - result = processor.singleItemOrList(list, ScriptType.SCRIPTLET) + result = processor.singleItemOrList(list, false, ScriptType.SCRIPTLET) then: result*.toString() == [ 'x_file_1', 'x_file_2', 'x_file_3'] @@ -823,7 +829,7 @@ class TaskProcessorTest extends Specification { def proc = new TaskProcessor(); proc.executor = executor when: - def result = proc.normalizeInputToFiles(PATH.toString(), 0, true, batch) + def result = proc.normalizeInputToFiles(PATH.toString(), 0, true, false, batch) then: 1 * executor.isForeignFile(PATH) >> false 0 * batch.addToForeign(PATH) >> null diff --git a/modules/nextflow/src/test/groovy/nextflow/script/params/ArityParamTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/params/ArityParamTest.groovy new file mode 100644 index 0000000000..70cbca64fe --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/script/params/ArityParamTest.groovy @@ -0,0 +1,67 @@ +/* + * Copyright 2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script.params + +import spock.lang.Specification +import spock.lang.Unroll +/** + * + * @author Ben Sherman + */ +class ArityParamTest extends Specification { + + static class DefaultArityParam implements ArityParam { + DefaultArityParam() {} + } + + @Unroll + def testArity () { + + when: + def param = new DefaultArityParam() + param.setArity(VALUE) + then: + param.arity.min == MIN + param.arity.max == MAX + param.isNullable() == NULLABLE + param.isSingle() == SINGLE + + where: + VALUE | NULLABLE | SINGLE | MIN | MAX + '1' | false | true | 1 | 1 + '0..1' | true | true | 0 | 1 + '1..*' | false | false | 1 | Integer.MAX_VALUE + '0..*' | false | false | 0 | Integer.MAX_VALUE + } + + @Unroll + def testArityRange () { + + when: + def range = new ArityParam.Range(MIN, MAX) + then: + range.contains(2) == TWO + range.toString() == STRING + + where: + MIN | MAX | TWO | STRING + 1 | 1 | false | '1' + 0 | 1 | false | '0..1' + 1 | Integer.MAX_VALUE | true | '1..*' + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsInTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsInTest.groovy index 60dba152bc..8f8a033a92 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsInTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsInTest.groovy @@ -707,12 +707,12 @@ class ParamsInTest extends Dsl2Spec { process hola { input: - path x - path f1 - path '*.fa' - path 'file.txt' - path f2, name: '*.fa' - path f3, stageAs: '*.txt' + path x, arity: '1' + path f1, arity: '1..2' + path '*.fa', arity: '1..*' + path 'file.txt', arity: true + path f2, name: '*.fa', arity: true + path f3, stageAs: '*.txt', arity: true return '' } @@ -738,34 +738,40 @@ class ParamsInTest extends Dsl2Spec { in0.inChannel.val == FILE in0.index == 0 in0.isPathQualifier() + in0.arity == new ArityParam.Range(1, 1) in1.name == 'f1' in1.filePattern == '*' in1.inChannel.val == FILE in1.index == 1 in1.isPathQualifier() + in1.arity == new ArityParam.Range(1, 2) in2.name == '*.fa' in2.filePattern == '*.fa' in2.inChannel.val == FILE in2.index == 2 in2.isPathQualifier() + in2.arity == new ArityParam.Range(1, Integer.MAX_VALUE) in3.name == 'file.txt' in3.filePattern == 'file.txt' in3.inChannel.val == FILE in3.index == 3 in3.isPathQualifier() + in3.arity == new ArityParam.Range(1, 1) in4.name == 'f2' in4.filePattern == '*.fa' in4.index == 4 in4.isPathQualifier() + in4.arity == new ArityParam.Range(1, Integer.MAX_VALUE) in5.name == 'f3' in5.filePattern == '*.txt' in5.index == 5 in5.isPathQualifier() + in5.arity == new ArityParam.Range(1, Integer.MAX_VALUE) } def 'test input paths with gstring'() { diff --git a/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsOutTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsOutTest.groovy index 1c46c898ba..d19820f4f6 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsOutTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/params/ParamsOutTest.groovy @@ -931,7 +931,8 @@ class ParamsOutTest extends Dsl2Spec { separatorChar: '#', glob: false, optional: false, - includeInputs: false + includeInputs: false, + arity: '1' path y, maxDepth:5, @@ -941,7 +942,8 @@ class ParamsOutTest extends Dsl2Spec { separatorChar: ':', glob: true, optional: true, - includeInputs: true + includeInputs: true, + arity: '0..*' return '' } @@ -963,6 +965,7 @@ class ParamsOutTest extends Dsl2Spec { !out0.getGlob() !out0.getOptional() !out0.getIncludeInputs() + out0.getArity() == new ArityParam.Range(1, 1) and: out1.getMaxDepth() == 5 @@ -973,6 +976,7 @@ class ParamsOutTest extends Dsl2Spec { out1.getGlob() out1.getOptional() out1.getIncludeInputs() + out1.getArity() == new ArityParam.Range(0, Integer.MAX_VALUE) } def 'should set file options' () { diff --git a/tests/checks/process-arity-fails.nf/.checks b/tests/checks/process-arity-fails.nf/.checks new file mode 100644 index 0000000000..346e4a2ea0 --- /dev/null +++ b/tests/checks/process-arity-fails.nf/.checks @@ -0,0 +1,18 @@ +set +e + +# +# run normal mode +# +echo '' +$NXF_RUN +[[ $? == 0 ]] && exit 1 + + +# +# RESUME mode +# +echo '' +$NXF_RUN -resume +[[ $? == 0 ]] && exit 1 + +exit 0 \ No newline at end of file diff --git a/tests/checks/process-arity.nf/.checks b/tests/checks/process-arity.nf/.checks new file mode 100644 index 0000000000..66e9a3e265 --- /dev/null +++ b/tests/checks/process-arity.nf/.checks @@ -0,0 +1,16 @@ +set +e + +# +# run normal mode +# +echo '' +$NXF_RUN +[[ $? == 0 ]] || false + + +# +# RESUME mode +# +echo '' +$NXF_RUN -resume +[[ $? == 0 ]] || false \ No newline at end of file diff --git a/tests/process-arity-fails.nf b/tests/process-arity-fails.nf new file mode 100644 index 0000000000..9f2c62cc01 --- /dev/null +++ b/tests/process-arity-fails.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +process foo { + output: + path('output.txt', arity: '0..1') + script: + true +} + +process bar { + input: + path(file) + script: + true +} + +workflow { + foo | bar +} \ No newline at end of file diff --git a/tests/process-arity.nf b/tests/process-arity.nf new file mode 100644 index 0000000000..9c2eca7030 --- /dev/null +++ b/tests/process-arity.nf @@ -0,0 +1,37 @@ +#!/usr/bin/env nextflow + +process foo { + output: + path('one.txt', arity: '1') + path('pair_*.txt', arity: '2') + path('many_*.txt', arity: '1..*') + path('nullable.txt', arity: '0..1') + script: + """ + echo 'one' > one.txt + echo 'pair_1' > pair_1.txt + echo 'pair_2' > pair_2.txt + echo 'many_1' > many_1.txt + echo 'many_2' > many_2.txt + echo 'many_3' > many_3.txt + """ +} + +process bar { + input: + path('one.txt', arity: '1') + path('pair_*.txt', arity: '2') + path('many_*.txt', arity: '1..*') + path(x, arity: '0..1') + script: + """ + cat one.txt + cat pair_*.txt + cat many_*.txt + [[ -f ${x} ]] && cat ${x} || true + """ +} + +workflow { + foo | bar +} \ No newline at end of file