From 47d01688e074f56e7492c06de0d303259df95797 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 27 Mar 2023 14:01:09 -0500 Subject: [PATCH 01/20] Add initial task graph and metadata json file Signed-off-by: Ben Sherman --- docs/config.rst | 1 + .../src/main/groovy/nextflow/Session.groovy | 9 +++ .../groovy/nextflow/dag/ConcreteDAG.groovy | 76 +++++++++++++++++++ .../nextflow/dag/CytoscapeHtmlRenderer.groovy | 2 +- .../nextflow/dag/CytoscapeJsRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/DAG.groovy | 2 +- .../groovy/nextflow/dag/DagRenderer.groovy | 15 +++- .../groovy/nextflow/dag/DotRenderer.groovy | 2 +- .../groovy/nextflow/dag/GexfRenderer.groovy | 2 +- .../nextflow/dag/GraphVizRenderer.groovy | 2 +- .../nextflow/dag/MermaidRenderer.groovy | 29 +++++-- .../groovy/nextflow/dag/NodeMarker.groovy | 18 ++++- .../nextflow/processor/TaskHandler.groovy | 29 +++++++ .../nextflow/processor/TaskProcessor.groovy | 10 ++- .../groovy/nextflow/processor/TaskRun.groovy | 1 + .../trace/DefaultObserverFactory.groovy | 1 + .../nextflow/trace/GraphObserver.groovy | 50 +++++------- .../nextflow/dag/DotRendererTest.groovy | 2 +- .../nextflow/dag/GexfRendererTest.groovy | 2 +- .../nextflow/dag/MermaidRendererTest.groovy | 2 +- .../nextflow/processor/TaskHandlerTest.groovy | 26 +++++++ .../main/nextflow/extension/FilesEx.groovy | 8 ++ 22 files changed, 239 insertions(+), 52 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy diff --git a/docs/config.rst b/docs/config.rst index 3be4e6b518..744b814da4 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -254,6 +254,7 @@ Name Description enabled When ``true`` turns on the generation of the DAG file (default: ``false``). file Graph file name (default: ``dag-.dot``). overwrite When ``true`` overwrites any existing DAG file with the same name. +type Can be ``abstract`` to render the abstract DAG or ``concrete`` to render the concrete (task) DAG (default: ``abstract``). ================== ================ The above options can be used by prefixing them with the ``dag`` scope or surrounding them by curly diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 9fc4248502..55ef2cf6cc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -40,6 +40,7 @@ import nextflow.conda.CondaConfig import nextflow.config.Manifest import nextflow.container.ContainerConfig import nextflow.dag.DAG +import nextflow.dag.ConcreteDAG import nextflow.exception.AbortOperationException import nextflow.exception.AbortSignalException import nextflow.exception.IllegalConfigException @@ -193,6 +194,8 @@ class Session implements ISession { private DAG dag + private ConcreteDAG concreteDag + private CacheDB cache private Barrier processesBarrier = new Barrier() @@ -345,6 +348,7 @@ class Session implements ISession { // -- DAG object this.dag = new DAG() + this.concreteDag = new ConcreteDAG() // -- init work dir this.workDir = ((config.workDir ?: 'work') as Path).complete() @@ -800,6 +804,8 @@ class Session implements ISession { DAG getDag() { this.dag } + ConcreteDAG getConcreteDAG() { this.concreteDag } + ExecutorService getExecService() { execService } /** @@ -1008,6 +1014,9 @@ class Session implements ISession { final trace = handler.safeTraceRecord() cache.putTaskAsync(handler, trace) + // save the task meta file to the task directory + handler.writeMetaFile() + // notify the event to the observers for( int i=0; i + */ +@Slf4j +class ConcreteDAG { + + Map nodes = new HashMap<>(100) + + /** + * Create a new node for a task + * + * @param task + * @param hash + */ + synchronized void addTaskNode( TaskRun task, String hash ) { + final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" + final preds = task.getInputFilesMap().values() + .collect { p -> getPredecessorHash(p) } + .findAll { h -> h != null } + + nodes[hash] = new Node( + index: nodes.size(), + label: label, + predecessors: preds + ) + } + + static public String getPredecessorHash(Path path) { + final pattern = Pattern.compile('.*/([a-z0-9]{2}/[a-z0-9]{30})') + final matcher = pattern.matcher(path.toString()) + + matcher.find() ? matcher.group(1).replace('/', '') : null + } + + @MapConstructor + @ToString(includeNames = true, includes = 'label', includePackage=false) + protected class Node { + + int index + + String label + + List predecessors + + String getSlug() { "t${index}" } + + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy index 1d0bbe117f..7cd207807b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy @@ -29,7 +29,7 @@ import java.nio.file.Path class CytoscapeHtmlRenderer implements DagRenderer { @Override - void renderDocument(DAG dag, Path file) { + void renderAbstractGraph(DAG dag, Path file) { String tmplPage = readTemplate() String network = CytoscapeJsRenderer.renderNetwork(dag) file.text = tmplPage.replaceAll(~/\/\* REPLACE_WITH_NETWORK_DATA \*\//, network) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy index d6eff29552..1486f4648b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy @@ -29,7 +29,7 @@ import java.nio.file.Path class CytoscapeJsRenderer implements DagRenderer { @Override - void renderDocument(DAG dag, Path file) { + void renderAbstractGraph(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy index 808bc86660..34e703c0e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy @@ -41,7 +41,7 @@ import nextflow.script.params.OutputsList import nextflow.script.params.TupleInParam import nextflow.script.params.TupleOutParam /** - * Model a direct acyclic graph of the pipeline execution. + * Model the abstract graph of a pipeline execution. * * @author Paolo Di Tommaso */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy index 4b7e487937..f526d5f765 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy @@ -24,10 +24,19 @@ import java.nio.file.Path * @author Paolo Di Tommaso * @author Mike Smoot */ -interface DagRenderer { +trait DagRenderer { /** - * Render the dag to the specified file. + * Render an abstract (process/operator) DAG. */ - void renderDocument(DAG dag, Path file); + void renderAbstractGraph(DAG dag, Path file) { + throw new UnsupportedOperationException("Abstract graph rendering is not supported for this file format") + } + + /** + * Render a concrete (task) DAG. + */ + void renderConcreteGraph(ConcreteDAG dag, Path file) { + throw new UnsupportedOperationException("Concrete graph rendering is not supported for this file format") + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy index 65184f62c2..817d47a3d2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy @@ -46,7 +46,7 @@ class DotRenderer implements DagRenderer { static String normalise(String str) { str.replaceAll(/[^0-9_A-Za-z]/,'') } @Override - void renderDocument(DAG dag, Path file) { + void renderAbstractGraph(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy index c2bac51110..43aaba6e9c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy @@ -41,7 +41,7 @@ class GexfRenderer implements DagRenderer { } @Override - void renderDocument(DAG dag, Path file) { + void renderAbstractGraph(DAG dag, Path file) { final Charset charset = Charset.defaultCharset() Writer bw = Files.newBufferedWriter(file, charset) final XMLOutputFactory xof = XMLOutputFactory.newFactory() diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy index 5d119adbdc..7ce79b839c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy @@ -42,7 +42,7 @@ class GraphvizRenderer implements DagRenderer { * See http://www.graphviz.org for more info. */ @Override - void renderDocument(DAG dag, Path target) { + void renderAbstractGraph(DAG dag, Path target) { def result = Files.createTempFile('nxf-',".$format") def temp = Files.createTempFile('nxf-','.dot') // save the DAG as `dot` to a temp file diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index 4b3ec56c2a..6e55674054 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -27,11 +27,7 @@ import java.nio.file.Path class MermaidRenderer implements DagRenderer { @Override - void renderDocument(DAG dag, Path file) { - file.text = renderNetwork(dag) - } - - String renderNetwork(DAG dag) { + void renderAbstractGraph(DAG dag, Path file) { def lines = [] lines << "flowchart TD" @@ -45,7 +41,7 @@ class MermaidRenderer implements DagRenderer { lines << "" - return lines.join('\n') + file.text = lines.join('\n') } private String renderVertex(DAG.Vertex vertex) { @@ -76,4 +72,25 @@ class MermaidRenderer implements DagRenderer { return "${edge.from.name} -->${label} ${edge.to.name}" } + + @Override + void renderConcreteGraph(ConcreteDAG graph, Path file) { + def lines = [] + lines << "flowchart TD" + + graph.nodes.values().each { node -> + lines << " ${node.getSlug()}[\"${node.label}\"]" + + node.predecessors.each { key -> + final pred = graph.nodes[key] + + if( pred ) + lines << " ${pred.getSlug()} --> ${node.getSlug()}" + } + } + + lines << "" + + file.text = lines.join('\n') + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy index 4e5dc60c1c..d9f8f14990 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy @@ -22,6 +22,7 @@ import groovyx.gpars.dataflow.operator.DataflowProcessor import nextflow.Global import nextflow.Session import nextflow.processor.TaskProcessor +import nextflow.processor.TaskRun import nextflow.script.params.InputsList import nextflow.script.params.OutputsList /** @@ -41,7 +42,7 @@ class NodeMarker { static private Session getSession() { Global.session as Session } /** - * Creates a new vertex in the DAG representing a computing `process` + * Creates a vertex in the abstract DAG representing a computing `process` * * @param label The label associated to the process * @param inputs The list of inputs entering in the process @@ -53,7 +54,7 @@ class NodeMarker { } /** - * Creates a new DAG vertex representing a dataflow operator + * Creates a vertex in the abstract DAG representing a dataflow operator * * @param label The operator label * @param inputs The operator input(s). It can be either a single channel or a list of channels. @@ -67,7 +68,7 @@ class NodeMarker { } /** - * Creates a vertex in the DAG representing a dataflow channel source. + * Creates a vertex in the abstract DAG representing a dataflow channel source. * * @param label The node description * @param source Either a dataflow channel or a list of channel. @@ -88,4 +89,15 @@ class NodeMarker { session.dag.addDataflowBroadcastPair(readChannel, broadcastChannel) } + /** + * Creates a vertex in the concrete DAG representing a task + * + * @param task + * @param hash + */ + static void addTaskNode( TaskRun task, String hash ) { + if( session?.concreteDAG && !session.aborted ) + session.concreteDAG.addTaskNode( task, hash ) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy index e14b612566..61efe65c18 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy @@ -21,7 +21,11 @@ import static nextflow.processor.TaskStatus.* import java.nio.file.NoSuchFileException +import groovy.json.JsonBuilder import groovy.util.logging.Slf4j +import nextflow.dag.ConcreteDAG +import nextflow.extension.FilesEx +import nextflow.script.params.FileOutParam import nextflow.trace.TraceRecord /** * Actions to handle the underlying job running the user task. @@ -214,6 +218,31 @@ abstract class TaskHandler { return record } + void writeMetaFile() { + final record = [ + hash: task.hash.toString(), + inputs: task.getInputFilesMap().collect { name, path -> + [ + name: name, + path: path.toString(), + predecessor: ConcreteDAG.getPredecessorHash(path) + ] + }, + outputs: task.getOutputsByType(FileOutParam).values().flatten().collect { path -> + final file = path.toFile() + + [ + name: path.name, + path: path.toString(), + size: file.size(), + checksum: file.isFile() ? FilesEx.getChecksum(path) : null + ] + } + ] + + task.workDir.resolve(TaskRun.CMD_META).text = new JsonBuilder(record).toString() + } + /** * Determine if a process can be forked i.e. can launch * a parallel task execution. This is only enforced when diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 2b547f27c0..05e4643928 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -524,7 +524,7 @@ class TaskProcessor { def invoke = new InvokeTaskAdapter(this, opInputs.size()) session.allOperators << (operator = new DataflowOperator(group, params, invoke)) - // notify the creation of a new vertex the execution DAG + // notify the creation of a new process in the abstract DAG NodeMarker.addProcessNode(this, config.getInputs(), config.getOutputs()) // fix issue #41 @@ -756,8 +756,11 @@ class TaskProcessor { log.trace "[${safeTaskName(task)}] Cacheable folder=${resumeDir?.toUriString()} -- exists=$exists; try=$tries; shouldTryCache=$shouldTryCache; entry=$entry" def cached = shouldTryCache && exists && checkCachedOutput(task.clone(), resumeDir, hash, entry) - if( cached ) + if( cached ) { + // add cached task to the task graph + NodeMarker.addTaskNode(task, hash.toString()) break + } } catch (Throwable t) { log.warn1("[${safeTaskName(task)}] Unable to resume cached task -- See log file for details", causedBy: t) @@ -780,6 +783,9 @@ class TaskProcessor { lock.release() } + // add submitted task to the task graph + NodeMarker.addTaskNode(task, hash.toString()) + // submit task for execution submitTask( task, hash, workDir ) break diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index af98f7f372..dd0942fd0e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -543,6 +543,7 @@ class TaskRun implements Cloneable { static final public String CMD_RUN = '.command.run' static final public String CMD_TRACE = '.command.trace' static final public String CMD_ENV = '.command.env' + static final public String CMD_META = '.command.meta.json' String toString( ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 42325c723a..832e73ede2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -93,6 +93,7 @@ class DefaultObserverFactory implements TraceObserverFactory { if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() def observer = new GraphObserver(traceFile) + config.navigate('dag.type') { observer.dagType = it ?: 'abstract' } config.navigate('dag.overwrite') { observer.overwrite = it } result << observer } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 2b10d3d244..07e2c01b12 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -24,6 +24,7 @@ import groovy.util.logging.Slf4j import nextflow.Session import nextflow.dag.CytoscapeHtmlRenderer import nextflow.dag.DAG +import nextflow.dag.ConcreteDAG import nextflow.dag.DagRenderer import nextflow.dag.DotRenderer import nextflow.dag.GexfRenderer @@ -46,7 +47,11 @@ class GraphObserver implements TraceObserver { private Path file - private DAG dag + private String type + + private DAG abstractDag + + private ConcreteDAG concreteDag private String name @@ -67,7 +72,8 @@ class GraphObserver implements TraceObserver { @Override void onFlowCreate(Session session) { - this.dag = session.dag + this.abstractDag = session.dag + this.concreteDag = session.concreteDag // check file existance final attrs = FileHelper.readAttributes(file) if( attrs ) { @@ -80,14 +86,22 @@ class GraphObserver implements TraceObserver { @Override void onFlowComplete() { - // -- normalise the DAG - dag.normalize() - // -- render it to a file - createRender().renderDocument(dag,file) + if( type == 'abstract' ) { + // -- normalise the DAG + abstractDag.normalize() + // -- render it to a file + createRenderer().renderAbstractGraph(abstractDag,file) + } + else if( type == 'concrete' ) { + createRenderer().renderConcreteGraph(concreteDag,file) + } + else { + log.warn("Invalid DAG type '${type}', should be 'abstract' or 'concrete'") + } } @PackageScope - DagRenderer createRender() { + DagRenderer createRenderer() { if( format == 'dot' ) new DotRenderer(name) @@ -104,28 +118,6 @@ class GraphObserver implements TraceObserver { new GraphvizRenderer(name, format) } - - @Override - void onProcessCreate(TaskProcessor process) { - - } - - - @Override - void onProcessSubmit(TaskHandler handler, TraceRecord trace) { - - } - - @Override - void onProcessStart(TaskHandler handler, TraceRecord trace) { - - } - - @Override - void onProcessComplete(TaskHandler handler, TraceRecord trace) { - - } - @Override boolean enableMetrics() { return false diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy index 499a68105a..5fbab45413 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy @@ -55,7 +55,7 @@ class DotRendererTest extends Specification { dag.normalize() when: - new DotRenderer('TheGraph').renderDocument(dag, file) + new DotRenderer('TheGraph').renderAbstractGraph(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy index f112732b2e..6d4a9e3539 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy @@ -47,7 +47,7 @@ class GexfRendererTest extends Specification { dag.normalize() when: - new GexfRenderer('TheGraph').renderDocument(dag, file.toPath()) + new GexfRenderer('TheGraph').renderAbstractGraph(dag, file.toPath()) then: def graph = new XmlSlurper().parse(file); assert graph.name() == 'gexf' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index 43255e4c88..0b46bd3f84 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -47,7 +47,7 @@ class MermaidRendererTest extends Specification { dag.normalize() when: - new MermaidRenderer().renderDocument(dag, file) + new MermaidRenderer().renderAbstractGraph(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy index de052de06a..559be4ef2c 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy @@ -17,10 +17,13 @@ package nextflow.processor +import java.nio.file.Paths import java.util.concurrent.atomic.LongAdder +import com.google.common.hash.HashCode import nextflow.Session import nextflow.executor.Executor +import nextflow.script.params.FileOutParam import nextflow.util.Duration import nextflow.util.MemoryUnit import spock.lang.Specification @@ -136,6 +139,29 @@ class TaskHandlerTest extends Specification { } + def 'should write meta file' () { + + given: + def folder = File.createTempDir() + def outputFile = new File(folder, 'bar.txt') ; outputFile.text = 'bar' + def task = Mock(TaskRun) { + hash >> HashCode.fromString("aabbccddeeff00112233445566778899") + workDir >> folder.toPath() + getInputFilesMap() >> [ 'foo.txt': Paths.get('/tmp/00/112233445566778899aabbccddeeff/foo.txt') ] + getOutputsByType(FileOutParam) >> [ 'bar.txt': outputFile.toPath() ] + } + def handler = [:] as TaskHandler + handler.task = task + + when: + handler.writeMetaFile() + then: + task.workDir.resolve(TaskRun.CMD_META).text == """{"hash":"aabbccddeeff00112233445566778899","inputs":[{"name":"foo.txt","path":"/tmp/00/112233445566778899aabbccddeeff/foo.txt","predecessor":"00112233445566778899aabbccddeeff"}],"outputs":[{"name":"bar.txt","path":"${folder}/bar.txt","size":3,"checksum":"37b51d194a7513e45b56f6524f2d51f2"}]}""" + + cleanup: + folder.delete() + } + LongAdder _adder(Integer x) { if( x != null ) { def adder = new LongAdder() diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 94447e677f..9d79cd0a1b 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -35,6 +35,7 @@ import java.nio.file.attribute.FileAttribute import java.nio.file.attribute.FileTime import java.nio.file.attribute.PosixFilePermission import java.nio.file.attribute.PosixFilePermissions +import java.security.MessageDigest import groovy.transform.CompileStatic import groovy.transform.PackageScope @@ -1600,4 +1601,11 @@ class FilesEx { static String getScheme(Path path) { path.getFileSystem().provider().getScheme() } + + static String getChecksum(Path path) { + final data = Files.readAllBytes(path) + final hash = MessageDigest.getInstance("MD5").digest(data) + + new BigInteger(1, hash).toString(16) + } } From ae6702734b61615922f3956124a9f091ec7c20f5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 28 Mar 2023 14:09:43 -0500 Subject: [PATCH 02/20] Add task inputs and outputs to conrete DAG Signed-off-by: Ben Sherman --- .../groovy/nextflow/dag/ConcreteDAG.groovy | 59 +++++++++---- .../nextflow/dag/MermaidRenderer.groovy | 38 ++++++-- .../groovy/nextflow/dag/NodeMarker.groovy | 12 --- .../nextflow/processor/TaskProcessor.groovy | 8 +- .../trace/DefaultObserverFactory.groovy | 2 +- .../nextflow/trace/GraphObserver.groovy | 16 ++++ .../nextflow/dag/ConcreteDAGTest.groovy | 86 +++++++++++++++++++ .../nextflow/dag/MermaidRendererTest.groovy | 68 ++++++++++++++- .../nextflow/processor/TaskHandlerTest.groovy | 5 +- 9 files changed, 246 insertions(+), 48 deletions(-) create mode 100644 modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy index 735a7cf76b..5768916d16 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy @@ -23,6 +23,7 @@ import groovy.transform.MapConstructor import groovy.transform.ToString import groovy.util.logging.Slf4j import nextflow.processor.TaskRun +import nextflow.script.params.FileOutParam /** * Model the conrete (task) graph of a pipeline execution. * @@ -31,46 +32,74 @@ import nextflow.processor.TaskRun @Slf4j class ConcreteDAG { - Map nodes = new HashMap<>(100) + Map nodes = new HashMap<>(100) /** - * Create a new node for a task + * Add a task to the graph * * @param task - * @param hash */ - synchronized void addTaskNode( TaskRun task, String hash ) { + synchronized void addTask( TaskRun task ) { + final hash = task.hash.toString() final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" - final preds = task.getInputFilesMap().values() - .collect { p -> getPredecessorHash(p) } - .findAll { h -> h != null } + final inputs = task.getInputFilesMap() + .collect { name, path -> + new Input(name: name, path: path, predecessor: getPredecessorHash(path)) + } - nodes[hash] = new Node( + nodes[hash] = new Task( index: nodes.size(), label: label, - predecessors: preds + inputs: inputs ) } static public String getPredecessorHash(Path path) { - final pattern = Pattern.compile('.*/([a-z0-9]{2}/[a-z0-9]{30})') + final pattern = Pattern.compile('.*/([0-9a-f]{2}/[0-9a-f]{30})') final matcher = pattern.matcher(path.toString()) matcher.find() ? matcher.group(1).replace('/', '') : null } + /** + * Add a task's outputs to the graph + * + * @param task + */ + synchronized void addTaskOutputs( TaskRun task ) { + final hash = task.hash.toString() + final outputs = task.getOutputsByType(FileOutParam) + .values() + .flatten() + .collect { path -> + new Output(name: path.name, path: path) + } + + nodes[hash].outputs = outputs + } + @MapConstructor @ToString(includeNames = true, includes = 'label', includePackage=false) - protected class Node { - + static protected class Task { int index - String label - - List predecessors + List inputs + List outputs String getSlug() { "t${index}" } + } + @MapConstructor + static protected class Input { + String name + Path path + String predecessor + } + + @MapConstructor + static protected class Output { + String name + Path path } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index 6e55674054..706a93f5d9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -75,17 +75,41 @@ class MermaidRenderer implements DagRenderer { @Override void renderConcreteGraph(ConcreteDAG graph, Path file) { + def renderedOutputs = [] as Set + def numInputs = 0 + def numOutputs = 0 + def lines = [] lines << "flowchart TD" - graph.nodes.values().each { node -> - lines << " ${node.getSlug()}[\"${node.label}\"]" - - node.predecessors.each { key -> - final pred = graph.nodes[key] + // render tasks and task inputs + graph.nodes.values().each { task -> + // render task node + lines << " ${task.getSlug()}[\"${task.label}\"]" + + task.inputs.each { input -> + // render task input from predecessor + if( input.predecessor != null ) { + final pred = graph.nodes[input.predecessor] + lines << " ${pred.getSlug()} -->|${input.name}| ${task.getSlug()}" + renderedOutputs << input.path + } + + // render task input from source node + else { + numInputs += 1 + lines << " i${numInputs}(( )) -->|${input.name}| ${task.getSlug()}" + } + } + } - if( pred ) - lines << " ${pred.getSlug()} --> ${node.getSlug()}" + // render task outputs with sink nodes + graph.nodes.values().each { task -> + task.outputs.each { output -> + if( output.path !in renderedOutputs ) { + numOutputs += 1 + lines << " ${task.getSlug()} -->|${output.name}| o${numOutputs}(( ))" + } } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy index d9f8f14990..0d543130e9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy @@ -22,7 +22,6 @@ import groovyx.gpars.dataflow.operator.DataflowProcessor import nextflow.Global import nextflow.Session import nextflow.processor.TaskProcessor -import nextflow.processor.TaskRun import nextflow.script.params.InputsList import nextflow.script.params.OutputsList /** @@ -89,15 +88,4 @@ class NodeMarker { session.dag.addDataflowBroadcastPair(readChannel, broadcastChannel) } - /** - * Creates a vertex in the concrete DAG representing a task - * - * @param task - * @param hash - */ - static void addTaskNode( TaskRun task, String hash ) { - if( session?.concreteDAG && !session.aborted ) - session.concreteDAG.addTaskNode( task, hash ) - } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 05e4643928..0032b8e9ae 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -756,11 +756,8 @@ class TaskProcessor { log.trace "[${safeTaskName(task)}] Cacheable folder=${resumeDir?.toUriString()} -- exists=$exists; try=$tries; shouldTryCache=$shouldTryCache; entry=$entry" def cached = shouldTryCache && exists && checkCachedOutput(task.clone(), resumeDir, hash, entry) - if( cached ) { - // add cached task to the task graph - NodeMarker.addTaskNode(task, hash.toString()) + if( cached ) break - } } catch (Throwable t) { log.warn1("[${safeTaskName(task)}] Unable to resume cached task -- See log file for details", causedBy: t) @@ -783,9 +780,6 @@ class TaskProcessor { lock.release() } - // add submitted task to the task graph - NodeMarker.addTaskNode(task, hash.toString()) - // submit task for execution submitTask( task, hash, workDir ) break diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 832e73ede2..72a8b31d5f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -93,7 +93,7 @@ class DefaultObserverFactory implements TraceObserverFactory { if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() def observer = new GraphObserver(traceFile) - config.navigate('dag.type') { observer.dagType = it ?: 'abstract' } + config.navigate('dag.type') { observer.type = it ?: 'abstract' } config.navigate('dag.overwrite') { observer.overwrite = it } result << observer } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 07e2c01b12..2be9444056 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -84,6 +84,22 @@ class GraphObserver implements TraceObserver { } } + @Override + void onProcessSubmit(TaskHandler handler, TraceRecord trace) { + concreteDag.addTask( handler.task ) + } + + @Override + void onProcessComplete(TaskHandler handler, TraceRecord trace) { + concreteDag.addTaskOutputs( handler.task ) + } + + @Override + void onProcessCached(TaskHandler handler, TraceRecord trace) { + concreteDag.addTask( handler.task ) + concreteDag.addTaskOutputs( handler.task ) + } + @Override void onFlowComplete() { if( type == 'abstract' ) { diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy new file mode 100644 index 0000000000..a666372169 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy @@ -0,0 +1,86 @@ +/* + * Copyright 2013-2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.dag + +import java.nio.file.Paths + +import com.google.common.hash.HashCode +import nextflow.processor.TaskRun +import spock.lang.Specification +/** + * + * @author Ben Sherman + */ +class ConcreteDAGTest extends Specification { + + def 'should add task nodes and outputs' () { + + given: + def task1 = Mock(TaskRun) { + getHash() >> HashCode.fromString('00112233445566778899aabbccddeeff') + getName() >> 'foo' + getInputFilesMap() >> [ + 'data.txt': Paths.get('/inputs/data.txt') + ] + getOutputsByType(_) >> [ + 'data.foo': Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + ] + } + def task2 = Mock(TaskRun) { + getHash() >> HashCode.fromString('aabbccddeeff00112233445566778899') + getName() >> 'bar' + getInputFilesMap() >> [ + 'data.foo': Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + ] + getOutputsByType(_) >> [ + 'data.bar': Paths.get('/work/aa/bbccddeeff00112233445566778899/data.bar') + ] + } + def dag = new ConcreteDAG() + + when: + dag.addTask( task1 ) + dag.addTask( task2 ) + def node1 = dag.nodes['00112233445566778899aabbccddeeff'] + def node2 = dag.nodes['aabbccddeeff00112233445566778899'] + then: + node1.index == 0 + node1.label == '[00/112233] foo' + node1.inputs.size() == 1 + node1.inputs[0].name == 'data.txt' + node1.inputs[0].path == Paths.get('/inputs/data.txt') + node1.inputs[0].predecessor == null + node2.index == 1 + node2.label == '[aa/bbccdd] bar' + node2.inputs.size() == 1 + node2.inputs[0].name == 'data.foo' + node2.inputs[0].path == Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + node2.inputs[0].predecessor == '00112233445566778899aabbccddeeff' + + when: + dag.addTaskOutputs( task1 ) + dag.addTaskOutputs( task2 ) + then: + node1.outputs.size() == 1 + node1.outputs[0].name == 'data.foo' + node1.outputs[0].path == Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + node2.outputs.size() == 1 + node2.outputs[0].name == 'data.bar' + node2.outputs[0].path == Paths.get('/work/aa/bbccddeeff00112233445566778899/data.bar') + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index 0b46bd3f84..95d408fdc3 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -16,12 +16,13 @@ */ package nextflow.dag + import java.nio.file.Files +import java.nio.file.Paths import groovyx.gpars.dataflow.DataflowQueue -import spock.lang.Specification - import nextflow.Session +import spock.lang.Specification /** * @@ -33,7 +34,7 @@ class MermaidRendererTest extends Specification { new Session() } - def 'should render a graph using the `mmd` format' () { + def 'should render an abstract graph using the `mmd` format' () { given: def file = Files.createTempFile('test', null) def ch1 = new DataflowQueue() @@ -65,4 +66,65 @@ class MermaidRendererTest extends Specification { cleanup: file.delete() } + + def 'should render a concrete graph using the `mmd` format' () { + given: + def file = Files.createTempFile('test', null) + + def dag = Mock(ConcreteDAG) { + nodes >> [ + '012345': new ConcreteDAG.Task( + index: 1, + label: 'foo', + inputs: [ + new ConcreteDAG.Input( + name: 'data.txt', + path: Paths.get('/inputs/data.txt'), + predecessor: null + ) + ], + outputs: [ + new ConcreteDAG.Output( + name: 'data.foo', + path: Paths.get('/work/012345/data.foo'), + ) + ] + ), + 'abcdef': new ConcreteDAG.Task( + index: 2, + label: 'bar', + inputs: [ + new ConcreteDAG.Input( + name: 'data.foo', + path: Paths.get('/work/012345/data.foo'), + predecessor: '012345' + ) + ], + outputs: [ + new ConcreteDAG.Output( + name: 'data.bar', + path: Paths.get('/work/abcdef/data.bar'), + ) + ] + ) + ] + } + + when: + new MermaidRenderer().renderConcreteGraph(dag, file) + then: + file.text == + ''' + flowchart TD + t1["foo"] + i1(( )) -->|data.txt| t1 + t2["bar"] + t1 -->|data.foo| t2 + t2 -->|data.bar| o1(( )) + ''' + .stripIndent().leftTrim() + + cleanup: + file.delete() + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy index 559be4ef2c..cc73df6c00 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy @@ -23,7 +23,6 @@ import java.util.concurrent.atomic.LongAdder import com.google.common.hash.HashCode import nextflow.Session import nextflow.executor.Executor -import nextflow.script.params.FileOutParam import nextflow.util.Duration import nextflow.util.MemoryUnit import spock.lang.Specification @@ -145,10 +144,10 @@ class TaskHandlerTest extends Specification { def folder = File.createTempDir() def outputFile = new File(folder, 'bar.txt') ; outputFile.text = 'bar' def task = Mock(TaskRun) { - hash >> HashCode.fromString("aabbccddeeff00112233445566778899") + hash >> HashCode.fromString('aabbccddeeff00112233445566778899') workDir >> folder.toPath() getInputFilesMap() >> [ 'foo.txt': Paths.get('/tmp/00/112233445566778899aabbccddeeff/foo.txt') ] - getOutputsByType(FileOutParam) >> [ 'bar.txt': outputFile.toPath() ] + getOutputsByType(_) >> [ 'bar.txt': outputFile.toPath() ] } def handler = [:] as TaskHandler handler.task = task From 8f95cd6466e5d79aadc68e7ff18f17aaa530978b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 28 Mar 2023 14:33:44 -0500 Subject: [PATCH 03/20] Fix failing tests Signed-off-by: Ben Sherman --- .../nextflow/trace/GraphObserver.groovy | 2 +- .../nextflow/trace/GraphObserverTest.groovy | 34 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 2be9444056..3826e0b6b5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -47,7 +47,7 @@ class GraphObserver implements TraceObserver { private Path file - private String type + private String type = 'abstract' private DAG abstractDag diff --git a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy index bc1efe8214..baca8d5270 100644 --- a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy @@ -36,7 +36,7 @@ import test.TestHelper */ class GraphObserverTest extends Specification { - DAG test_dag + DAG dag def setup() { new Session() @@ -46,28 +46,28 @@ class GraphObserverTest extends Specification { def ch2 = new DataflowQueue() def ch3 = new DataflowQueue() - test_dag = new DAG() + dag = new DAG() - test_dag.addVertex( + dag.addVertex( DAG.Type.ORIGIN, 'Source', null, [ new DAG.ChannelHandler(channel: src, label: 'src') ] ) - test_dag.addVertex( + dag.addVertex( DAG.Type.PROCESS, 'Process 1', [ new DAG.ChannelHandler(channel: src, label: 'Source') ], [ new DAG.ChannelHandler(channel: ch1, label: 'Channel 1') ] ) - test_dag.addVertex( + dag.addVertex( DAG.Type.OPERATOR, 'Filter', [ new DAG.ChannelHandler(channel: ch1, label: 'Channel 1') ], [ new DAG.ChannelHandler(channel: ch2, label: 'Channel 2') ] ) - test_dag.addVertex( + dag.addVertex( DAG.Type.PROCESS, 'Process 2', [ new DAG.ChannelHandler(channel: ch2, label: 'Channel 2') ], @@ -79,7 +79,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf_','.dot') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -106,7 +106,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.html') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -134,7 +134,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.svg') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -152,7 +152,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.png') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -169,7 +169,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.pdf') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -186,7 +186,7 @@ class GraphObserverTest extends Specification { def folder = Files.createTempDirectory('test') def file = folder.resolve('nope') def gr = new GraphObserver(file) - gr.dag = test_dag + gr.abstractDag = dag when: gr.onFlowComplete() @@ -217,34 +217,34 @@ class GraphObserverTest extends Specification { then: observer.name == 'hello-world' observer.format == 'dot' - observer.createRender() instanceof DotRenderer + observer.createRenderer() instanceof DotRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.html')) then: observer.name == 'TheGraph' observer.format == 'html' - observer.createRender() instanceof CytoscapeHtmlRenderer + observer.createRenderer() instanceof CytoscapeHtmlRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.mmd')) then: observer.name == 'TheGraph' observer.format == 'mmd' - observer.createRender() instanceof MermaidRenderer + observer.createRenderer() instanceof MermaidRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.SVG')) then: observer.name == 'TheGraph' observer.format == 'svg' - observer.createRender() instanceof GraphvizRenderer + observer.createRenderer() instanceof GraphvizRenderer when: observer = new GraphObserver(Paths.get('/path/to/anonymous')) then: observer.name == 'anonymous' observer.format == 'dot' - observer.createRender() instanceof DotRenderer + observer.createRenderer() instanceof DotRenderer } } From 9f11e4b193b9187a928992416115df1bbd6b4a61 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 29 Mar 2023 11:18:40 -0500 Subject: [PATCH 04/20] Use path-based APIs to get file metadata Signed-off-by: Ben Sherman --- .../nextflow/processor/TaskHandler.groovy | 7 ++--- .../main/nextflow/extension/FilesEx.groovy | 17 +++++++++-- .../main/nextflow/file/ETagAwareFile.groovy | 29 +++++++++++++++++++ .../src/main/com/upplication/s3fs/S3Path.java | 11 ++++++- .../cloud/azure/nio/AzFileAttributes.groovy | 8 +++++ .../nextflow/cloud/azure/nio/AzPath.groovy | 8 ++++- 6 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 modules/nf-commons/src/main/nextflow/file/ETagAwareFile.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy index 61efe65c18..44212e8cd8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy @@ -19,6 +19,7 @@ package nextflow.processor import static nextflow.processor.TaskStatus.* +import java.nio.file.Files import java.nio.file.NoSuchFileException import groovy.json.JsonBuilder @@ -229,13 +230,11 @@ abstract class TaskHandler { ] }, outputs: task.getOutputsByType(FileOutParam).values().flatten().collect { path -> - final file = path.toFile() - [ name: path.name, path: path.toString(), - size: file.size(), - checksum: file.isFile() ? FilesEx.getChecksum(path) : null + size: Files.size(path), + checksum: FilesEx.getChecksum(path) ] } ] diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 9d79cd0a1b..1bac3c19db 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -35,6 +35,7 @@ import java.nio.file.attribute.FileAttribute import java.nio.file.attribute.FileTime import java.nio.file.attribute.PosixFilePermission import java.nio.file.attribute.PosixFilePermissions +import java.security.DigestInputStream import java.security.MessageDigest import groovy.transform.CompileStatic @@ -42,6 +43,7 @@ import groovy.transform.PackageScope import groovy.transform.stc.ClosureParams import groovy.transform.stc.FromString import groovy.util.logging.Slf4j +import nextflow.file.ETagAwareFile import nextflow.file.FileHelper import nextflow.file.FileSystemPathFactory import nextflow.io.ByteBufferBackedInputStream @@ -1603,9 +1605,18 @@ class FilesEx { } static String getChecksum(Path path) { - final data = Files.readAllBytes(path) - final hash = MessageDigest.getInstance("MD5").digest(data) + if( Files.isDirectory(path) ) + return null + + if( path instanceof ETagAwareFile ) + return ((ETagAwareFile)path).getETag() + + final md = MessageDigest.getInstance('MD5') + final is = Files.newInputStream(path) + final dis = new DigestInputStream(is, md) + + while( dis.read() != -1 ) {} - new BigInteger(1, hash).toString(16) + new BigInteger(1, md.digest()).toString(16) } } diff --git a/modules/nf-commons/src/main/nextflow/file/ETagAwareFile.groovy b/modules/nf-commons/src/main/nextflow/file/ETagAwareFile.groovy new file mode 100644 index 0000000000..f1c40073b3 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/file/ETagAwareFile.groovy @@ -0,0 +1,29 @@ +/* + * Copyright 2013-2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.file + +/** + * Defines the interface for files that have an ETag + * + * @author Ben Sherman + */ +interface ETagAwareFile { + + String getETag() + +} diff --git a/plugins/nf-amazon/src/main/com/upplication/s3fs/S3Path.java b/plugins/nf-amazon/src/main/com/upplication/s3fs/S3Path.java index dfd12cea08..9e9688945e 100644 --- a/plugins/nf-amazon/src/main/com/upplication/s3fs/S3Path.java +++ b/plugins/nf-amazon/src/main/com/upplication/s3fs/S3Path.java @@ -66,13 +66,14 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import nextflow.file.ETagAwareFile; import nextflow.file.TagAwareFile; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Iterables.filter; import static com.google.common.collect.Iterables.transform; import static java.lang.String.format; -public class S3Path implements Path, TagAwareFile { +public class S3Path implements Path, ETagAwareFile, TagAwareFile { public static final String PATH_SEPARATOR = "/"; /** @@ -571,6 +572,14 @@ public String getContentType() { return contentType; } + @Override + public String getETag() { + return fileSystem + .getClient() + .getObjectMetadata(getBucket(), getKey()) + .getETag(); + } + // ~ helpers methods private static Function strip(final String ... strs) { diff --git a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy index a955888d70..ba07cda22f 100644 --- a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy +++ b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy @@ -46,6 +46,8 @@ class AzFileAttributes implements BasicFileAttributes { private String objectId + private String etag + static AzFileAttributes root() { new AzFileAttributes(size: 0, objectId: '/', directory: true) } @@ -60,6 +62,7 @@ class AzFileAttributes implements BasicFileAttributes { updateTime = time(props.getLastModified()) directory = client.blobName.endsWith('/') size = props.getBlobSize() + etag = props.getETag() } AzFileAttributes(String containerName, BlobItem item) { @@ -69,6 +72,7 @@ class AzFileAttributes implements BasicFileAttributes { creationTime = time(item.properties.getCreationTime()) updateTime = time(item.properties.getLastModified()) size = item.properties.getContentLength() + etag = item.properties.getETag() } } @@ -144,6 +148,10 @@ class AzFileAttributes implements BasicFileAttributes { return objectId } + String getETag() { + return etag + } + @Override boolean equals( Object obj ) { if( this.class != obj?.class ) return false diff --git a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy index 2f654b4ad8..3bdd222f6b 100644 --- a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy +++ b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy @@ -29,6 +29,7 @@ import com.azure.storage.blob.models.BlobItem import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope +import nextflow.file.ETagAwareFile /** * Implements Azure path object @@ -37,7 +38,7 @@ import groovy.transform.PackageScope */ @CompileStatic @EqualsAndHashCode(includes = 'fs,path,directory', includeFields = true) -class AzPath implements Path { +class AzPath implements Path, ETagAwareFile { private AzFileSystem fs @@ -305,6 +306,11 @@ class AzPath implements Path { return this.toString() <=> other.toString() } + @Override + String getETag() { + return attributes.getETag() + } + String getContainerName() { if( path.isAbsolute() ) { path.nameCount==0 ? '/' : path.getName(0) From 84568928fc483ec4c1c55ae415b20f006a52f4d0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 30 Mar 2023 09:57:20 -0500 Subject: [PATCH 05/20] Use buffer to compute checksum Signed-off-by: Ben Sherman --- .../src/main/nextflow/extension/FilesEx.groovy | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index df77ee2023..026d0dbc92 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -34,7 +34,6 @@ import java.nio.file.attribute.FileAttribute import java.nio.file.attribute.FileTime import java.nio.file.attribute.PosixFilePermission import java.nio.file.attribute.PosixFilePermissions -import java.security.DigestInputStream import java.security.MessageDigest import groovy.transform.CompileStatic @@ -1610,11 +1609,13 @@ class FilesEx { if( path instanceof ETagAwareFile ) return ((ETagAwareFile)path).getETag() - final md = MessageDigest.getInstance('MD5') final is = Files.newInputStream(path) - final dis = new DigestInputStream(is, md) + final md = MessageDigest.getInstance('MD5') + final buf = new byte[16 << 10] - while( dis.read() != -1 ) {} + int len + while( (len=is.read(buf)) != -1 ) + md.update(buf, 0, len) new BigInteger(1, md.digest()).toString(16) } From e81e584c27172f76dd65d8ed3b189e4b3e61ed9f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 26 Apr 2023 11:54:54 -0500 Subject: [PATCH 06/20] Replace synchronized with lock Signed-off-by: Ben Sherman --- docs/config.md | 3 +- .../groovy/nextflow/dag/ConcreteDAG.groovy | 38 ++++++++++++++----- .../groovy/nextflow/dag/DagRenderer.groovy | 2 +- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/docs/config.md b/docs/config.md index fec1b46930..a6bccaca22 100644 --- a/docs/config.md +++ b/docs/config.md @@ -484,8 +484,7 @@ The following settings are available: : When `true` overwrites any existing DAG file with the same name. `dag.type` -: Can be `abstract` to render the abstract DAG or `concrete` to render the concrete (task) DAG (default: `abstract`). - +: Can be `abstract` to render the abstract (process) DAG or `concrete` to render the concrete (task) DAG (default: `abstract`). Read the {ref}`dag-visualisation` page to learn more about the execution graph that can be generated by Nextflow. diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy index 5768916d16..c580f62147 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy @@ -18,6 +18,8 @@ package nextflow.dag import java.nio.file.Path import java.util.regex.Pattern +import java.util.concurrent.locks.Lock +import java.util.concurrent.locks.ReentrantLock import groovy.transform.MapConstructor import groovy.transform.ToString @@ -32,14 +34,20 @@ import nextflow.script.params.FileOutParam @Slf4j class ConcreteDAG { - Map nodes = new HashMap<>(100) + private Lock sync = new ReentrantLock() + + private Map nodes = new HashMap<>(100) + + Map getNodes() { + nodes + } /** * Add a task to the graph * * @param task */ - synchronized void addTask( TaskRun task ) { + void addTask(TaskRun task) { final hash = task.hash.toString() final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" final inputs = task.getInputFilesMap() @@ -47,11 +55,17 @@ class ConcreteDAG { new Input(name: name, path: path, predecessor: getPredecessorHash(path)) } - nodes[hash] = new Task( - index: nodes.size(), - label: label, - inputs: inputs - ) + sync.lock() + try { + nodes[hash] = new Task( + index: nodes.size(), + label: label, + inputs: inputs + ) + } + finally { + sync.unlock() + } } static public String getPredecessorHash(Path path) { @@ -66,7 +80,7 @@ class ConcreteDAG { * * @param task */ - synchronized void addTaskOutputs( TaskRun task ) { + void addTaskOutputs(TaskRun task) { final hash = task.hash.toString() final outputs = task.getOutputsByType(FileOutParam) .values() @@ -75,7 +89,13 @@ class ConcreteDAG { new Output(name: path.name, path: path) } - nodes[hash].outputs = outputs + sync.lock() + try { + nodes[hash].outputs = outputs + } + finally { + sync.unlock() + } } @MapConstructor diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy index ccdb8fecc0..768c9ef11e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy @@ -26,7 +26,7 @@ import java.nio.file.Path trait DagRenderer { /** - * Render an abstract (process/operator) DAG. + * Render an abstract (process) DAG. */ void renderAbstractGraph(DAG dag, Path file) { throw new UnsupportedOperationException("Abstract graph rendering is not supported for this file format") From 35bac943d7f5d51806ef6267b23f0f51311d2145 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 29 Apr 2023 10:49:22 -0500 Subject: [PATCH 07/20] Refactor task graph to not depend on task directory naming Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 2 +- .../groovy/nextflow/dag/ConcreteDAG.groovy | 116 ++++++++++-------- .../nextflow/dag/MermaidRenderer.groovy | 32 ++--- .../nextflow/processor/TaskHandler.groovy | 28 ----- .../nextflow/dag/ConcreteDAGTest.groovy | 85 +++++++++---- .../nextflow/dag/MermaidRendererTest.groovy | 57 ++++----- .../nextflow/processor/TaskHandlerTest.groovy | 25 ---- 7 files changed, 163 insertions(+), 182 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 87660008ac..b8bcf0e45b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -1014,7 +1014,7 @@ class Session implements ISession { cache.putTaskAsync(handler, trace) // save the task meta file to the task directory - handler.writeMetaFile() + concreteDag.writeMetaFile(handler.task) // notify the event to the observers for( int i=0; i */ @Slf4j +@CompileStatic class ConcreteDAG { - private Lock sync = new ReentrantLock() + private Map vertices = new HashMap<>() - private Map nodes = new HashMap<>(100) + private Map taskLookup = new HashMap<>() - Map getNodes() { - nodes - } + private Lock sync = new ReentrantLock() + + Map getVertices() { vertices } /** - * Add a task to the graph + * Add a task to the graph. * * @param task */ void addTask(TaskRun task) { final hash = task.hash.toString() final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" - final inputs = task.getInputFilesMap() - .collect { name, path -> - new Input(name: name, path: path, predecessor: getPredecessorHash(path)) - } + final inputs = task.getInputFilesMap().values() as Set sync.lock() try { - nodes[hash] = new Task( - index: nodes.size(), - label: label, - inputs: inputs - ) + // add new task to graph + vertices[task] = new Vertex(vertices.size(), label, inputs) } finally { sync.unlock() } } - static public String getPredecessorHash(Path path) { - final pattern = Pattern.compile('.*/([0-9a-f]{2}/[0-9a-f]{30})') - final matcher = pattern.matcher(path.toString()) - - matcher.find() ? matcher.group(1).replace('/', '') : null - } - /** - * Add a task's outputs to the graph + * Add a task's outputs to the graph. * * @param task */ void addTaskOutputs(TaskRun task) { - final hash = task.hash.toString() - final outputs = task.getOutputsByType(FileOutParam) + final outputs = task + .getOutputsByType(FileOutParam) .values() - .flatten() - .collect { path -> - new Output(name: path.name, path: path) - } + .flatten() as Set sync.lock() try { - nodes[hash].outputs = outputs + // add task outputs to graph + vertices[task].outputs = outputs + + // add new output files to task lookup + for( Path path : outputs ) + taskLookup[path] = task } finally { sync.unlock() } } - @MapConstructor - @ToString(includeNames = true, includes = 'label', includePackage=false) - static protected class Task { + /** + * Get the vertex for the task that produced the given file. + * + * @param path + */ + Vertex getProducerVertex(Path path) { vertices[taskLookup[path]] } + + /** + * Write the metadata JSON file for a task. + * + * @param task + */ + void writeMetaFile(TaskRun task) { + final record = [ + hash: task.hash.toString(), + inputs: task.getInputFilesMap().collect { name, path -> + [ + name: name, + path: path.toUriString(), + predecessor: taskLookup[path]?.hash?.toString() + ] + }, + outputs: vertices[task].outputs.collect { path -> + [ + name: path.name, + path: path.toUriString(), + size: Files.size(path), + checksum: FilesEx.getChecksum(path) + ] + } + ] + + task.workDir.resolve(TaskRun.CMD_META).text = new JsonBuilder(record).toString() + } + + @TupleConstructor(excludes = 'outputs') + static class Vertex { int index String label - List inputs - List outputs + Set inputs + Set outputs String getSlug() { "t${index}" } - } - - @MapConstructor - static protected class Input { - String name - Path path - String predecessor - } - @MapConstructor - static protected class Output { - String name - Path path + @Override + String toString() { label } } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index fa8f6f602f..58fce81931 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -73,7 +73,7 @@ class MermaidRenderer implements DagRenderer { } @Override - void renderConcreteGraph(ConcreteDAG graph, Path file) { + void renderConcreteGraph(ConcreteDAG dag, Path file) { def renderedOutputs = [] as Set def numInputs = 0 def numOutputs = 0 @@ -82,32 +82,32 @@ class MermaidRenderer implements DagRenderer { lines << "flowchart TD" // render tasks and task inputs - graph.nodes.values().each { task -> - // render task node - lines << " ${task.getSlug()}[\"${task.label}\"]" - - task.inputs.each { input -> - // render task input from predecessor - if( input.predecessor != null ) { - final pred = graph.nodes[input.predecessor] - lines << " ${pred.getSlug()} -->|${input.name}| ${task.getSlug()}" - renderedOutputs << input.path + dag.vertices.each { task, vertex -> + // render task vertex + lines << " ${vertex.getSlug()}[\"${vertex.label}\"]" + + vertex.inputs.each { path -> + // render task input from predecessor task + final pred = dag.getProducerVertex(path) + if( pred != null ) { + lines << " ${pred.getSlug()} -->|${path.name}| ${vertex.getSlug()}" + renderedOutputs << path } // render task input from source node else { numInputs += 1 - lines << " i${numInputs}(( )) -->|${input.name}| ${task.getSlug()}" + lines << " i${numInputs}(( )) -->|${path}| ${vertex.getSlug()}" } } } // render task outputs with sink nodes - graph.nodes.values().each { task -> - task.outputs.each { output -> - if( output.path !in renderedOutputs ) { + dag.vertices.each { task, vertex -> + vertex.outputs.each { path -> + if( path !in renderedOutputs ) { numOutputs += 1 - lines << " ${task.getSlug()} -->|${output.name}| o${numOutputs}(( ))" + lines << " ${vertex.getSlug()} -->|${path.name}| o${numOutputs}(( ))" } } } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy index 42ee522185..a94ded210f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHandler.groovy @@ -18,14 +18,9 @@ package nextflow.processor import static nextflow.processor.TaskStatus.* -import java.nio.file.Files import java.nio.file.NoSuchFileException -import groovy.json.JsonBuilder import groovy.util.logging.Slf4j -import nextflow.dag.ConcreteDAG -import nextflow.extension.FilesEx -import nextflow.script.params.FileOutParam import nextflow.trace.TraceRecord /** * Actions to handle the underlying job running the user task. @@ -218,29 +213,6 @@ abstract class TaskHandler { return record } - void writeMetaFile() { - final record = [ - hash: task.hash.toString(), - inputs: task.getInputFilesMap().collect { name, path -> - [ - name: name, - path: path.toString(), - predecessor: ConcreteDAG.getPredecessorHash(path) - ] - }, - outputs: task.getOutputsByType(FileOutParam).values().flatten().collect { path -> - [ - name: path.name, - path: path.toString(), - size: Files.size(path), - checksum: FilesEx.getChecksum(path) - ] - } - ] - - task.workDir.resolve(TaskRun.CMD_META).text = new JsonBuilder(record).toString() - } - /** * Determine if a process can be forked i.e. can launch * a parallel task execution. This is only enforced when diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy index a666372169..66342f9498 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy @@ -27,27 +27,27 @@ import spock.lang.Specification */ class ConcreteDAGTest extends Specification { - def 'should add task nodes and outputs' () { + def 'should add task vertices and outputs' () { given: def task1 = Mock(TaskRun) { - getHash() >> HashCode.fromString('00112233445566778899aabbccddeeff') + getHash() >> HashCode.fromString('00112233') getName() >> 'foo' getInputFilesMap() >> [ 'data.txt': Paths.get('/inputs/data.txt') ] getOutputsByType(_) >> [ - 'data.foo': Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + 'data.foo': Paths.get('/work/00112233/data.foo') ] } def task2 = Mock(TaskRun) { - getHash() >> HashCode.fromString('aabbccddeeff00112233445566778899') + getHash() >> HashCode.fromString('aabbccdd') getName() >> 'bar' getInputFilesMap() >> [ - 'data.foo': Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') + 'data.foo': Paths.get('/work/00112233/data.foo') ] getOutputsByType(_) >> [ - 'data.bar': Paths.get('/work/aa/bbccddeeff00112233445566778899/data.bar') + 'data.bar': Paths.get('/work/aabbccdd/data.bar') ] } def dag = new ConcreteDAG() @@ -55,32 +55,65 @@ class ConcreteDAGTest extends Specification { when: dag.addTask( task1 ) dag.addTask( task2 ) - def node1 = dag.nodes['00112233445566778899aabbccddeeff'] - def node2 = dag.nodes['aabbccddeeff00112233445566778899'] + def v1 = dag.vertices[task1] + def v2 = dag.vertices[task2] then: - node1.index == 0 - node1.label == '[00/112233] foo' - node1.inputs.size() == 1 - node1.inputs[0].name == 'data.txt' - node1.inputs[0].path == Paths.get('/inputs/data.txt') - node1.inputs[0].predecessor == null - node2.index == 1 - node2.label == '[aa/bbccdd] bar' - node2.inputs.size() == 1 - node2.inputs[0].name == 'data.foo' - node2.inputs[0].path == Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') - node2.inputs[0].predecessor == '00112233445566778899aabbccddeeff' + v1.index == 0 + v1.label == '[00/112233] foo' + v1.inputs.size() == 1 + v1.inputs[0] == Paths.get('/inputs/data.txt') + and: + v2.index == 1 + v2.label == '[aa/bbccdd] bar' + v2.inputs.size() == 1 + v2.inputs[0] == Paths.get('/work/00112233/data.foo') when: dag.addTaskOutputs( task1 ) dag.addTaskOutputs( task2 ) then: - node1.outputs.size() == 1 - node1.outputs[0].name == 'data.foo' - node1.outputs[0].path == Paths.get('/work/00/112233445566778899aabbccddeeff/data.foo') - node2.outputs.size() == 1 - node2.outputs[0].name == 'data.bar' - node2.outputs[0].path == Paths.get('/work/aa/bbccddeeff00112233445566778899/data.bar') + v1.outputs.size() == 1 + v1.outputs[0] == Paths.get('/work/00112233/data.foo') + and: + v2.outputs.size() == 1 + v2.outputs[0] == Paths.get('/work/aabbccdd/data.bar') + and: + dag.getProducerVertex(v1.inputs[0]) == null + dag.getProducerVertex(v2.inputs[0]) == v1 + } + + def 'should write meta file' () { + + given: + def folder = File.createTempDir() + def outputFile = new File(folder, 'data.bar') ; outputFile.text = 'bar' + + def task1 = Mock(TaskRun) { + name >> 'foo' + hash >> HashCode.fromString('00112233') + getInputFilesMap() >> [ 'data.txt': Paths.get('/inputs/data.txt') ] + getOutputsByType(_) >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] + } + def task2 = Mock(TaskRun) { + name >> 'bar' + workDir >> folder.toPath() + hash >> HashCode.fromString('aabbccdd') + getInputFilesMap() >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] + getOutputsByType(_) >> [ 'data.bar': outputFile.toPath() ] + } + def dag = new ConcreteDAG() + + when: + dag.addTask(task1) + dag.addTaskOutputs(task1) + dag.addTask(task2) + dag.addTaskOutputs(task2) + dag.writeMetaFile(task2) + then: + task2.workDir.resolve(TaskRun.CMD_META).text == """{"hash":"aabbccdd","inputs":[{"name":"data.foo","path":"/work/00112233/data.foo","predecessor":"00112233"}],"outputs":[{"name":"data.bar","path":"${folder}/data.bar","size":3,"checksum":"37b51d194a7513e45b56f6524f2d51f2"}]}""" + + cleanup: + folder.delete() } } diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index b0d1226727..f237f37b99 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -21,6 +21,7 @@ import java.nio.file.Paths import groovyx.gpars.dataflow.DataflowQueue import nextflow.Session +import nextflow.processor.TaskRun import spock.lang.Specification /** @@ -70,43 +71,27 @@ class MermaidRendererTest extends Specification { given: def file = Files.createTempFile('test', null) + def task1 = Mock(TaskRun) + def task2 = Mock(TaskRun) + def output1 = Paths.get('/work/012345/data.foo') + def v1 = new ConcreteDAG.Vertex( + index: 1, + label: 'foo', + inputs: [ Paths.get('/inputs/data.txt') ], + outputs: [ output1 ] + ) + def v2 = new ConcreteDAG.Vertex( + index: 2, + label: 'bar', + inputs: [ output1 ], + outputs: [ Paths.get('/work/abcdef/data.bar') ] + ) def dag = Mock(ConcreteDAG) { - nodes >> [ - '012345': new ConcreteDAG.Task( - index: 1, - label: 'foo', - inputs: [ - new ConcreteDAG.Input( - name: 'data.txt', - path: Paths.get('/inputs/data.txt'), - predecessor: null - ) - ], - outputs: [ - new ConcreteDAG.Output( - name: 'data.foo', - path: Paths.get('/work/012345/data.foo'), - ) - ] - ), - 'abcdef': new ConcreteDAG.Task( - index: 2, - label: 'bar', - inputs: [ - new ConcreteDAG.Input( - name: 'data.foo', - path: Paths.get('/work/012345/data.foo'), - predecessor: '012345' - ) - ], - outputs: [ - new ConcreteDAG.Output( - name: 'data.bar', - path: Paths.get('/work/abcdef/data.bar'), - ) - ] - ) + vertices >> [ + (task1): v1, + (task2): v2 ] + getProducerVertex(output1) >> v1 } when: @@ -116,7 +101,7 @@ class MermaidRendererTest extends Specification { ''' flowchart TD t1["foo"] - i1(( )) -->|data.txt| t1 + i1(( )) -->|/inputs/data.txt| t1 t2["bar"] t1 -->|data.foo| t2 t2 -->|data.bar| o1(( )) diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy index ad94ce47a7..1846fb0838 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskHandlerTest.groovy @@ -16,10 +16,8 @@ package nextflow.processor -import java.nio.file.Paths import java.util.concurrent.atomic.LongAdder -import com.google.common.hash.HashCode import nextflow.Session import nextflow.executor.Executor import nextflow.util.Duration @@ -137,29 +135,6 @@ class TaskHandlerTest extends Specification { } - def 'should write meta file' () { - - given: - def folder = File.createTempDir() - def outputFile = new File(folder, 'bar.txt') ; outputFile.text = 'bar' - def task = Mock(TaskRun) { - hash >> HashCode.fromString('aabbccddeeff00112233445566778899') - workDir >> folder.toPath() - getInputFilesMap() >> [ 'foo.txt': Paths.get('/tmp/00/112233445566778899aabbccddeeff/foo.txt') ] - getOutputsByType(_) >> [ 'bar.txt': outputFile.toPath() ] - } - def handler = [:] as TaskHandler - handler.task = task - - when: - handler.writeMetaFile() - then: - task.workDir.resolve(TaskRun.CMD_META).text == """{"hash":"aabbccddeeff00112233445566778899","inputs":[{"name":"foo.txt","path":"/tmp/00/112233445566778899aabbccddeeff/foo.txt","predecessor":"00112233445566778899aabbccddeeff"}],"outputs":[{"name":"bar.txt","path":"${folder}/bar.txt","size":3,"checksum":"37b51d194a7513e45b56f6524f2d51f2"}]}""" - - cleanup: - folder.delete() - } - LongAdder _adder(Integer x) { if( x != null ) { def adder = new LongAdder() From 8bbe3d76d15508ce6decee939ea02606d4be7e38 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 29 Apr 2023 11:09:22 -0500 Subject: [PATCH 08/20] Replace abstract/concrete with process/task Signed-off-by: Ben Sherman --- docs/config.md | 2 +- .../src/main/groovy/nextflow/Session.groovy | 10 +++--- .../nextflow/dag/CytoscapeHtmlRenderer.groovy | 2 +- .../nextflow/dag/CytoscapeJsRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/DAG.groovy | 2 +- .../groovy/nextflow/dag/DagRenderer.groovy | 12 +++---- .../groovy/nextflow/dag/DotRenderer.groovy | 2 +- .../groovy/nextflow/dag/GexfRenderer.groovy | 2 +- .../nextflow/dag/GraphVizRenderer.groovy | 2 +- .../nextflow/dag/MermaidRenderer.groovy | 4 +-- .../groovy/nextflow/dag/NodeMarker.groovy | 6 ++-- .../{ConcreteDAG.groovy => TaskDAG.groovy} | 4 +-- .../nextflow/processor/TaskProcessor.groovy | 2 +- .../trace/DefaultObserverFactory.groovy | 2 +- .../nextflow/trace/GraphObserver.groovy | 32 +++++++++---------- .../nextflow/dag/DotRendererTest.groovy | 2 +- .../nextflow/dag/GexfRendererTest.groovy | 2 +- .../nextflow/dag/MermaidRendererTest.groovy | 14 ++++---- ...creteDAGTest.groovy => TaskDAGTest.groovy} | 6 ++-- .../nextflow/trace/GraphObserverTest.groovy | 12 +++---- 20 files changed, 61 insertions(+), 61 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/dag/{ConcreteDAG.groovy => TaskDAG.groovy} (97%) rename modules/nextflow/src/test/groovy/nextflow/dag/{ConcreteDAGTest.groovy => TaskDAGTest.groovy} (96%) diff --git a/docs/config.md b/docs/config.md index a6bccaca22..75ffa9df84 100644 --- a/docs/config.md +++ b/docs/config.md @@ -484,7 +484,7 @@ The following settings are available: : When `true` overwrites any existing DAG file with the same name. `dag.type` -: Can be `abstract` to render the abstract (process) DAG or `concrete` to render the concrete (task) DAG (default: `abstract`). +: Can be `'process'` to render the process DAG or `'task'` to render the task DAG (default: `'process'`). Read the {ref}`dag-visualisation` page to learn more about the execution graph that can be generated by Nextflow. diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index b8bcf0e45b..82e5057f24 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -39,7 +39,7 @@ import nextflow.conda.CondaConfig import nextflow.config.Manifest import nextflow.container.ContainerConfig import nextflow.dag.DAG -import nextflow.dag.ConcreteDAG +import nextflow.dag.TaskDAG import nextflow.exception.AbortOperationException import nextflow.exception.AbortSignalException import nextflow.exception.IllegalConfigException @@ -194,7 +194,7 @@ class Session implements ISession { private DAG dag - private ConcreteDAG concreteDag + private TaskDAG taskDag private CacheDB cache @@ -348,7 +348,7 @@ class Session implements ISession { // -- DAG object this.dag = new DAG() - this.concreteDag = new ConcreteDAG() + this.taskDag = new TaskDAG() // -- init work dir this.workDir = ((config.workDir ?: 'work') as Path).complete() @@ -803,7 +803,7 @@ class Session implements ISession { DAG getDag() { this.dag } - ConcreteDAG getConcreteDAG() { this.concreteDag } + TaskDAG getTaskDag() { this.taskDag } ExecutorService getExecService() { execService } @@ -1014,7 +1014,7 @@ class Session implements ISession { cache.putTaskAsync(handler, trace) // save the task meta file to the task directory - concreteDag.writeMetaFile(handler.task) + taskDag.writeMetaFile(handler.task) // notify the event to the observers for( int i=0; i */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy index 768c9ef11e..1c3b61d7ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy @@ -26,16 +26,16 @@ import java.nio.file.Path trait DagRenderer { /** - * Render an abstract (process) DAG. + * Render a process DAG. */ - void renderAbstractGraph(DAG dag, Path file) { - throw new UnsupportedOperationException("Abstract graph rendering is not supported for this file format") + void renderProcessGraph(DAG dag, Path file) { + throw new UnsupportedOperationException("Process DAG rendering is not supported for this file format") } /** - * Render a concrete (task) DAG. + * Render a task DAG. */ - void renderConcreteGraph(ConcreteDAG dag, Path file) { - throw new UnsupportedOperationException("Concrete graph rendering is not supported for this file format") + void renderTaskGraph(TaskDAG dag, Path file) { + throw new UnsupportedOperationException("Task DAG rendering is not supported for this file format") } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy index c5411f820b..357b28c685 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy @@ -45,7 +45,7 @@ class DotRenderer implements DagRenderer { static String normalise(String str) { str.replaceAll(/[^0-9_A-Za-z]/,'') } @Override - void renderAbstractGraph(DAG dag, Path file) { + void renderProcessGraph(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy index 43aaba6e9c..45b107f4e0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy @@ -41,7 +41,7 @@ class GexfRenderer implements DagRenderer { } @Override - void renderAbstractGraph(DAG dag, Path file) { + void renderProcessGraph(DAG dag, Path file) { final Charset charset = Charset.defaultCharset() Writer bw = Files.newBufferedWriter(file, charset) final XMLOutputFactory xof = XMLOutputFactory.newFactory() diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy index 0fddcee58d..7068c1c7bf 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy @@ -41,7 +41,7 @@ class GraphvizRenderer implements DagRenderer { * See http://www.graphviz.org for more info. */ @Override - void renderAbstractGraph(DAG dag, Path target) { + void renderProcessGraph(DAG dag, Path target) { def result = Files.createTempFile('nxf-',".$format") def temp = Files.createTempFile('nxf-','.dot') // save the DAG as `dot` to a temp file diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index 58fce81931..7ec8ce8920 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -26,7 +26,7 @@ import java.nio.file.Path class MermaidRenderer implements DagRenderer { @Override - void renderAbstractGraph(DAG dag, Path file) { + void renderProcessGraph(DAG dag, Path file) { def lines = [] lines << "flowchart TD" @@ -73,7 +73,7 @@ class MermaidRenderer implements DagRenderer { } @Override - void renderConcreteGraph(ConcreteDAG dag, Path file) { + void renderTaskGraph(TaskDAG dag, Path file) { def renderedOutputs = [] as Set def numInputs = 0 def numOutputs = 0 diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy index 18550b4cc6..68d56a12d5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/NodeMarker.groovy @@ -40,7 +40,7 @@ class NodeMarker { static private Session getSession() { Global.session as Session } /** - * Creates a vertex in the abstract DAG representing a computing `process` + * Creates a new vertex in the DAG representing a computing `process` * * @param label The label associated to the process * @param inputs The list of inputs entering in the process @@ -52,7 +52,7 @@ class NodeMarker { } /** - * Creates a vertex in the abstract DAG representing a dataflow operator + * Creates a new DAG vertex representing a dataflow operator * * @param label The operator label * @param inputs The operator input(s). It can be either a single channel or a list of channels. @@ -66,7 +66,7 @@ class NodeMarker { } /** - * Creates a vertex in the abstract DAG representing a dataflow channel source. + * Creates a vertex in the DAG representing a dataflow channel source. * * @param label The node description * @param source Either a dataflow channel or a list of channel. diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy similarity index 97% rename from modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy rename to modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index 678443a067..3669b73133 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/ConcreteDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -29,13 +29,13 @@ import nextflow.extension.FilesEx import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam /** - * Model the conrete (task) graph of a pipeline execution. + * Model the task graph of a pipeline execution. * * @author Ben Sherman */ @Slf4j @CompileStatic -class ConcreteDAG { +class TaskDAG { private Map vertices = new HashMap<>() diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 147de2d0d6..45af17c644 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -543,7 +543,7 @@ class TaskProcessor { def invoke = new InvokeTaskAdapter(this, opInputs.size()) session.allOperators << (operator = new DataflowOperator(group, params, invoke)) - // notify the creation of a new process in the abstract DAG + // notify the creation of a new vertex the execution DAG NodeMarker.addProcessNode(this, config.getInputs(), config.getOutputs()) // fix issue #41 diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 72a8b31d5f..0e536a1dd4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -93,7 +93,7 @@ class DefaultObserverFactory implements TraceObserverFactory { if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() def observer = new GraphObserver(traceFile) - config.navigate('dag.type') { observer.type = it ?: 'abstract' } + config.navigate('dag.type') { observer.type = it ?: 'process' } config.navigate('dag.overwrite') { observer.overwrite = it } result << observer } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 23c565277d..13f88a2bf7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -23,7 +23,7 @@ import groovy.util.logging.Slf4j import nextflow.Session import nextflow.dag.CytoscapeHtmlRenderer import nextflow.dag.DAG -import nextflow.dag.ConcreteDAG +import nextflow.dag.TaskDAG import nextflow.dag.DagRenderer import nextflow.dag.DotRenderer import nextflow.dag.GexfRenderer @@ -46,11 +46,11 @@ class GraphObserver implements TraceObserver { private Path file - private String type = 'abstract' + private String type = 'process' - private DAG abstractDag + private DAG dag - private ConcreteDAG concreteDag + private TaskDAG taskDag private String name @@ -71,8 +71,8 @@ class GraphObserver implements TraceObserver { @Override void onFlowCreate(Session session) { - this.abstractDag = session.dag - this.concreteDag = session.concreteDag + this.dag = session.dag + this.taskDag = session.taskDag // check file existance final attrs = FileHelper.readAttributes(file) if( attrs ) { @@ -85,33 +85,33 @@ class GraphObserver implements TraceObserver { @Override void onProcessSubmit(TaskHandler handler, TraceRecord trace) { - concreteDag.addTask( handler.task ) + taskDag.addTask(handler.task) } @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { - concreteDag.addTaskOutputs( handler.task ) + taskDag.addTaskOutputs(handler.task) } @Override void onProcessCached(TaskHandler handler, TraceRecord trace) { - concreteDag.addTask( handler.task ) - concreteDag.addTaskOutputs( handler.task ) + taskDag.addTask(handler.task) + taskDag.addTaskOutputs(handler.task) } @Override void onFlowComplete() { - if( type == 'abstract' ) { + if( type == 'process' ) { // -- normalise the DAG - abstractDag.normalize() + dag.normalize() // -- render it to a file - createRenderer().renderAbstractGraph(abstractDag,file) + createRenderer().renderProcessGraph(dag, file) } - else if( type == 'concrete' ) { - createRenderer().renderConcreteGraph(concreteDag,file) + else if( type == 'task' ) { + createRenderer().renderTaskGraph(taskDag, file) } else { - log.warn("Invalid DAG type '${type}', should be 'abstract' or 'concrete'") + log.warn("Invalid DAG type '${type}', should be 'process' or 'task'") } } diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy index 7e5921aeef..2289546654 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy @@ -54,7 +54,7 @@ class DotRendererTest extends Specification { dag.normalize() when: - new DotRenderer('TheGraph').renderAbstractGraph(dag, file) + new DotRenderer('TheGraph').renderProcessGraph(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy index 6d4a9e3539..f053276344 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy @@ -47,7 +47,7 @@ class GexfRendererTest extends Specification { dag.normalize() when: - new GexfRenderer('TheGraph').renderAbstractGraph(dag, file.toPath()) + new GexfRenderer('TheGraph').renderProcessGraph(dag, file.toPath()) then: def graph = new XmlSlurper().parse(file); assert graph.name() == 'gexf' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index f237f37b99..1016f209b9 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -34,7 +34,7 @@ class MermaidRendererTest extends Specification { new Session() } - def 'should render an abstract graph using the `mmd` format' () { + def 'should render a process graph using the `mmd` format' () { given: def file = Files.createTempFile('test', null) def ch1 = new DataflowQueue() @@ -48,7 +48,7 @@ class MermaidRendererTest extends Specification { dag.normalize() when: - new MermaidRenderer().renderAbstractGraph(dag, file) + new MermaidRenderer().renderProcessGraph(dag, file) then: file.text == ''' @@ -67,26 +67,26 @@ class MermaidRendererTest extends Specification { file.delete() } - def 'should render a concrete graph using the `mmd` format' () { + def 'should render a task graph using the `mmd` format' () { given: def file = Files.createTempFile('test', null) def task1 = Mock(TaskRun) def task2 = Mock(TaskRun) def output1 = Paths.get('/work/012345/data.foo') - def v1 = new ConcreteDAG.Vertex( + def v1 = new TaskDAG.Vertex( index: 1, label: 'foo', inputs: [ Paths.get('/inputs/data.txt') ], outputs: [ output1 ] ) - def v2 = new ConcreteDAG.Vertex( + def v2 = new TaskDAG.Vertex( index: 2, label: 'bar', inputs: [ output1 ], outputs: [ Paths.get('/work/abcdef/data.bar') ] ) - def dag = Mock(ConcreteDAG) { + def dag = Mock(TaskDAG) { vertices >> [ (task1): v1, (task2): v2 @@ -95,7 +95,7 @@ class MermaidRendererTest extends Specification { } when: - new MermaidRenderer().renderConcreteGraph(dag, file) + new MermaidRenderer().renderTaskGraph(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy similarity index 96% rename from modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy rename to modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy index 66342f9498..e44ea30cba 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/ConcreteDAGTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy @@ -25,7 +25,7 @@ import spock.lang.Specification * * @author Ben Sherman */ -class ConcreteDAGTest extends Specification { +class TaskDAGTest extends Specification { def 'should add task vertices and outputs' () { @@ -50,7 +50,7 @@ class ConcreteDAGTest extends Specification { 'data.bar': Paths.get('/work/aabbccdd/data.bar') ] } - def dag = new ConcreteDAG() + def dag = new TaskDAG() when: dag.addTask( task1 ) @@ -101,7 +101,7 @@ class ConcreteDAGTest extends Specification { getInputFilesMap() >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] getOutputsByType(_) >> [ 'data.bar': outputFile.toPath() ] } - def dag = new ConcreteDAG() + def dag = new TaskDAG() when: dag.addTask(task1) diff --git a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy index 384c737944..626ec8e3c1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy @@ -78,7 +78,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf_','.dot') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() @@ -105,7 +105,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.html') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() @@ -133,7 +133,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.svg') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() @@ -151,7 +151,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.png') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() @@ -168,7 +168,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.pdf') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() @@ -185,7 +185,7 @@ class GraphObserverTest extends Specification { def folder = Files.createTempDirectory('test') def file = folder.resolve('nope') def gr = new GraphObserver(file) - gr.abstractDag = dag + gr.dag = dag when: gr.onFlowComplete() From 7ec397fd262ce0186603cae6f7921966c9e44d55 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 24 May 2023 17:53:40 +0200 Subject: [PATCH 09/20] Add support for AWS SSE env variables Signed-off-by: Paolo Di Tommaso --- .../nextflow/cloud/aws/config/AwsS3Config.groovy | 4 ++-- .../cloud/aws/config/AwsS3ConfigTest.groovy | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy index e479925314..5ff28aac75 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy @@ -50,8 +50,8 @@ class AwsS3Config { this.debug = opts.debug as Boolean this.endpoint = opts.endpoint ?: SysEnv.get('AWS_S3_ENDPOINT') this.storageClass = parseStorageClass((opts.storageClass ?: opts.uploadStorageClass) as String) // 'uploadStorageClass' is kept for legacy purposes - this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String) - this.storageKmsKeyId = opts.storageKmsKeyId + this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String) ?: SysEnv.get('NXF_AWS_SSE_MODE') + this.storageKmsKeyId = opts.storageKmsKeyId ?: SysEnv.get('NXF_AWS_SSE_KMS_KEY_ID') this.pathStyleAccess = opts.s3PathStyleAccess as Boolean this.s3Acl = parseS3Acl(opts.s3Acl as String) } diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy index 116a64456b..0f5cd1f643 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy @@ -121,4 +121,18 @@ class AwsS3ConfigTest extends Specification { SysEnv.pop() } + + def 'should set storage encryption via env variable' () { + given: + SysEnv.push([NXF_AWS_SSE_MODE: 'aws:kms', NXF_AWS_SSE_KMS_KEY_ID: 'xyz1']) + + when: + def client = new AwsS3Config([:]) + then: + client.storageKmsKeyId == 'xyz1' + client.storageEncryption == 'aws:kms' + + cleanup: + SysEnv.pop() + } } From 49396cf6007d9ec145f778c014621bca612112c6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 25 May 2023 21:19:54 -0500 Subject: [PATCH 10/20] Fix failing tests Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 3 --- .../groovy/nextflow/dag/MermaidRenderer.groovy | 6 +++++- .../src/main/groovy/nextflow/dag/TaskDAG.groovy | 10 ++++++---- .../groovy/nextflow/trace/GraphObserver.groovy | 3 +++ .../groovy/nextflow/dag/MermaidRendererTest.groovy | 4 ++-- .../test/groovy/nextflow/dag/TaskDAGTest.groovy | 14 ++++++-------- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index ff07c7b4e1..93cea6a753 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -1016,9 +1016,6 @@ class Session implements ISession { final trace = handler.safeTraceRecord() cache.putTaskAsync(handler, trace) - // save the task meta file to the task directory - taskDag.writeMetaFile(handler.task) - // notify the event to the observers for( int i=0; i */ +@CompileStatic class MermaidRenderer implements DagRenderer { @Override @@ -86,7 +90,7 @@ class MermaidRenderer implements DagRenderer { // render task vertex lines << " ${vertex.getSlug()}[\"${vertex.label}\"]" - vertex.inputs.each { path -> + vertex.inputs.each { name, path -> // render task input from predecessor task final pred = dag.getProducerVertex(path) if( pred != null ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index 3669b73133..6d319b028d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -53,7 +53,7 @@ class TaskDAG { void addTask(TaskRun task) { final hash = task.hash.toString() final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" - final inputs = task.getInputFilesMap().values() as Set + final inputs = task.getInputFilesMap() sync.lock() try { @@ -95,7 +95,9 @@ class TaskDAG { * * @param path */ - Vertex getProducerVertex(Path path) { vertices[taskLookup[path]] } + Vertex getProducerVertex(Path path) { + vertices[taskLookup[path]] + } /** * Write the metadata JSON file for a task. @@ -105,7 +107,7 @@ class TaskDAG { void writeMetaFile(TaskRun task) { final record = [ hash: task.hash.toString(), - inputs: task.getInputFilesMap().collect { name, path -> + inputs: vertices[task].inputs.collect { name, path -> [ name: name, path: path.toUriString(), @@ -129,7 +131,7 @@ class TaskDAG { static class Vertex { int index String label - Set inputs + Map inputs Set outputs String getSlug() { "t${index}" } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 13f88a2bf7..5b7bf0521f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -18,6 +18,7 @@ package nextflow.trace import java.nio.file.Path +import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.Session @@ -40,6 +41,7 @@ import nextflow.processor.TaskProcessor * @author Paolo Di Tommaso */ @Slf4j +@CompileStatic class GraphObserver implements TraceObserver { static public final String DEF_FILE_NAME = "dag-${TraceHelper.launchTimestampFmt()}.dot" @@ -91,6 +93,7 @@ class GraphObserver implements TraceObserver { @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { taskDag.addTaskOutputs(handler.task) + taskDag.writeMetaFile(handler.task) } @Override diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index 1016f209b9..53992dd22d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -77,13 +77,13 @@ class MermaidRendererTest extends Specification { def v1 = new TaskDAG.Vertex( index: 1, label: 'foo', - inputs: [ Paths.get('/inputs/data.txt') ], + inputs: [ 'data.txt': Paths.get('/inputs/data.txt') ], outputs: [ output1 ] ) def v2 = new TaskDAG.Vertex( index: 2, label: 'bar', - inputs: [ output1 ], + inputs: [ 'data.foo': output1 ], outputs: [ Paths.get('/work/abcdef/data.bar') ] ) def dag = Mock(TaskDAG) { diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy index e44ea30cba..a2dd6f66ac 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy @@ -61,25 +61,23 @@ class TaskDAGTest extends Specification { v1.index == 0 v1.label == '[00/112233] foo' v1.inputs.size() == 1 - v1.inputs[0] == Paths.get('/inputs/data.txt') + v1.inputs['data.txt'] == Paths.get('/inputs/data.txt') and: v2.index == 1 v2.label == '[aa/bbccdd] bar' v2.inputs.size() == 1 - v2.inputs[0] == Paths.get('/work/00112233/data.foo') + v2.inputs['data.foo'] == Paths.get('/work/00112233/data.foo') when: dag.addTaskOutputs( task1 ) dag.addTaskOutputs( task2 ) then: - v1.outputs.size() == 1 - v1.outputs[0] == Paths.get('/work/00112233/data.foo') + v1.outputs == [ Paths.get('/work/00112233/data.foo') ] as Set and: - v2.outputs.size() == 1 - v2.outputs[0] == Paths.get('/work/aabbccdd/data.bar') + v2.outputs == [ Paths.get('/work/aabbccdd/data.bar') ] as Set and: - dag.getProducerVertex(v1.inputs[0]) == null - dag.getProducerVertex(v2.inputs[0]) == v1 + dag.getProducerVertex(v1.inputs['data.txt']) == null + dag.getProducerVertex(v2.inputs['data.foo']) == v1 } def 'should write meta file' () { From 0c63254d47d939f7a92f2e3e3ed9f67ea2726208 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 6 Jul 2023 22:29:52 -0500 Subject: [PATCH 11/20] Rename 'process' option to 'workflow' Signed-off-by: Ben Sherman --- docs/config.md | 2 +- .../main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy | 2 +- .../main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy | 2 +- modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy | 2 +- .../src/main/groovy/nextflow/dag/DagRenderer.groovy | 6 +++--- .../src/main/groovy/nextflow/dag/DotRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/GexfRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/GraphVizRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/MermaidRenderer.groovy | 2 +- .../groovy/nextflow/trace/DefaultObserverFactory.groovy | 2 +- .../src/main/groovy/nextflow/trace/GraphObserver.groovy | 8 ++++---- .../src/test/groovy/nextflow/dag/DotRendererTest.groovy | 2 +- .../src/test/groovy/nextflow/dag/GexfRendererTest.groovy | 2 +- .../test/groovy/nextflow/dag/MermaidRendererTest.groovy | 4 ++-- .../nf-commons/src/main/nextflow/extension/FilesEx.groovy | 4 +++- 15 files changed, 23 insertions(+), 21 deletions(-) diff --git a/docs/config.md b/docs/config.md index 774539fc24..9ecefb82a3 100644 --- a/docs/config.md +++ b/docs/config.md @@ -506,7 +506,7 @@ The following settings are available: : When `true` overwrites any existing DAG file with the same name. `dag.type` -: Can be `'process'` to render the process DAG or `'task'` to render the task DAG (default: `'process'`). +: Can be `'workflow'` to render the workflow DAG or `'task'` to render the task DAG (default: `'workflow'`). Read the {ref}`dag-visualisation` page to learn more about the execution graph that can be generated by Nextflow. diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy index 1895cf94f9..cf8fed4b6c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy @@ -28,7 +28,7 @@ import java.nio.file.Path class CytoscapeHtmlRenderer implements DagRenderer { @Override - void renderProcessGraph(DAG dag, Path file) { + void renderWorkflowGraph(DAG dag, Path file) { String tmplPage = readTemplate() String network = CytoscapeJsRenderer.renderNetwork(dag) file.text = tmplPage.replaceAll(~/\/\* REPLACE_WITH_NETWORK_DATA \*\//, network) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy index 1c978ef2d1..c7592392c9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy @@ -28,7 +28,7 @@ import java.nio.file.Path class CytoscapeJsRenderer implements DagRenderer { @Override - void renderProcessGraph(DAG dag, Path file) { + void renderWorkflowGraph(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy index 2c994bd004..154e0dd19b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy @@ -43,7 +43,7 @@ import nextflow.script.params.TupleOutParam import java.util.concurrent.atomic.AtomicLong /** - * Model the process graph of the pipeline execution. + * Model the workflow graph of the pipeline execution. * * @author Paolo Di Tommaso */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy index 1c3b61d7ec..442ddf761b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy @@ -26,10 +26,10 @@ import java.nio.file.Path trait DagRenderer { /** - * Render a process DAG. + * Render a workflow DAG. */ - void renderProcessGraph(DAG dag, Path file) { - throw new UnsupportedOperationException("Process DAG rendering is not supported for this file format") + void renderWorkflowGraph(DAG dag, Path file) { + throw new UnsupportedOperationException("Workflow DAG rendering is not supported for this file format") } /** diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy index 357b28c685..bf032f7d99 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy @@ -45,7 +45,7 @@ class DotRenderer implements DagRenderer { static String normalise(String str) { str.replaceAll(/[^0-9_A-Za-z]/,'') } @Override - void renderProcessGraph(DAG dag, Path file) { + void renderWorkflowGraph(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy index 45b107f4e0..8072ae0f06 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy @@ -41,7 +41,7 @@ class GexfRenderer implements DagRenderer { } @Override - void renderProcessGraph(DAG dag, Path file) { + void renderWorkflowGraph(DAG dag, Path file) { final Charset charset = Charset.defaultCharset() Writer bw = Files.newBufferedWriter(file, charset) final XMLOutputFactory xof = XMLOutputFactory.newFactory() diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy index 7068c1c7bf..61b8e967e2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy @@ -41,7 +41,7 @@ class GraphvizRenderer implements DagRenderer { * See http://www.graphviz.org for more info. */ @Override - void renderProcessGraph(DAG dag, Path target) { + void renderWorkflowGraph(DAG dag, Path target) { def result = Files.createTempFile('nxf-',".$format") def temp = Files.createTempFile('nxf-','.dot') // save the DAG as `dot` to a temp file diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index da868c8a18..ae27113e61 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -30,7 +30,7 @@ import groovy.transform.CompileStatic class MermaidRenderer implements DagRenderer { @Override - void renderProcessGraph(DAG dag, Path file) { + void renderWorkflowGraph(DAG dag, Path file) { def lines = [] lines << "flowchart TD" diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 0e536a1dd4..09d56a9ff2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -93,7 +93,7 @@ class DefaultObserverFactory implements TraceObserverFactory { if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() def observer = new GraphObserver(traceFile) - config.navigate('dag.type') { observer.type = it ?: 'process' } + config.navigate('dag.type') { observer.type = it ?: 'workflow' } config.navigate('dag.overwrite') { observer.overwrite = it } result << observer } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 5b7bf0521f..255657f48c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -48,7 +48,7 @@ class GraphObserver implements TraceObserver { private Path file - private String type = 'process' + private String type = 'workflow' private DAG dag @@ -104,17 +104,17 @@ class GraphObserver implements TraceObserver { @Override void onFlowComplete() { - if( type == 'process' ) { + if( type == 'workflow' ) { // -- normalise the DAG dag.normalize() // -- render it to a file - createRenderer().renderProcessGraph(dag, file) + createRenderer().renderWorkflowGraph(dag, file) } else if( type == 'task' ) { createRenderer().renderTaskGraph(taskDag, file) } else { - log.warn("Invalid DAG type '${type}', should be 'process' or 'task'") + log.warn("Invalid DAG type '${type}', should be 'workflow' or 'task'") } } diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy index 2289546654..ae61b37b57 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy @@ -54,7 +54,7 @@ class DotRendererTest extends Specification { dag.normalize() when: - new DotRenderer('TheGraph').renderProcessGraph(dag, file) + new DotRenderer('TheGraph').renderWorkflowGraph(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy index f053276344..a4920f10c7 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy @@ -47,7 +47,7 @@ class GexfRendererTest extends Specification { dag.normalize() when: - new GexfRenderer('TheGraph').renderProcessGraph(dag, file.toPath()) + new GexfRenderer('TheGraph').renderWorkflowGraph(dag, file.toPath()) then: def graph = new XmlSlurper().parse(file); assert graph.name() == 'gexf' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index 53992dd22d..9adc36b3a3 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -34,7 +34,7 @@ class MermaidRendererTest extends Specification { new Session() } - def 'should render a process graph using the `mmd` format' () { + def 'should render a workflow graph using the `mmd` format' () { given: def file = Files.createTempFile('test', null) def ch1 = new DataflowQueue() @@ -48,7 +48,7 @@ class MermaidRendererTest extends Specification { dag.normalize() when: - new MermaidRenderer().renderProcessGraph(dag, file) + new MermaidRenderer().renderWorkflowGraph(dag, file) then: file.text == ''' diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 026d0dbc92..c59adc4a45 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -1606,9 +1606,11 @@ class FilesEx { if( Files.isDirectory(path) ) return null + // use etag if available if( path instanceof ETagAwareFile ) - return ((ETagAwareFile)path).getETag() + return path.getETag() + // otherwise compute checksum manually final is = Files.newInputStream(path) final md = MessageDigest.getInstance('MD5') final buf = new byte[16 << 10] From 9b934e05dec959c225940ae0e28b9e02f57f66e0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 13 Jul 2023 04:53:04 -0500 Subject: [PATCH 12/20] Save task inputs and outputs to cache db instead of json file Signed-off-by: Ben Sherman --- .../main/groovy/nextflow/cache/CacheDB.groovy | 24 ++++++++++ .../main/groovy/nextflow/dag/TaskDAG.groovy | 36 +++------------ .../groovy/nextflow/processor/TaskRun.groovy | 1 - .../trace/DefaultObserverFactory.groovy | 7 ++- .../nextflow/trace/GraphObserver.groovy | 44 ++++++++++++++++--- .../groovy/nextflow/trace/TraceRecord.groovy | 32 ++++++++++++++ .../groovy/nextflow/dag/TaskDAGTest.groovy | 34 -------------- 7 files changed, 103 insertions(+), 75 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy b/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy index 214e3235e3..2e44a3980c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy @@ -157,6 +157,30 @@ class CacheDB implements Closeable { writer.send { writeTaskEntry0(handler, trace) } } + /** + * Finalize task entry in the cache DB. + * + * @param hash + * @param trace + */ + @PackageScope + void finalizeTaskEntry0( HashCode hash, TraceRecord trace ) { + + final payload = store.getEntry(hash) + if( !payload ) { + log.debug "Unable to finalize task with key: $hash" + return + } + + final record = (List)KryoHelper.deserialize(payload) + record[0] = trace.serialize() + store.putEntry(hash, KryoHelper.serialize(record)) + } + + void finalizeTaskAsync( HashCode hash, TraceRecord trace ) { + writer.send { finalizeTaskEntry0(hash, trace) } + } + void cacheTaskAsync( TaskHandler handler ) { writer.send { writeTaskIndex0(handler,true) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index 6d319b028d..d8bb790eb4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -16,16 +16,13 @@ package nextflow.dag -import java.nio.file.Files import java.nio.file.Path import java.util.concurrent.locks.Lock import java.util.concurrent.locks.ReentrantLock -import groovy.json.JsonBuilder import groovy.transform.CompileStatic import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j -import nextflow.extension.FilesEx import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam /** @@ -91,40 +88,21 @@ class TaskDAG { } /** - * Get the vertex for the task that produced the given file. + * Get the task that produced the given file. * * @param path */ - Vertex getProducerVertex(Path path) { - vertices[taskLookup[path]] + TaskRun getProducerTask(Path path) { + taskLookup[path] } /** - * Write the metadata JSON file for a task. + * Get the vertex for the task that produced the given file. * - * @param task + * @param path */ - void writeMetaFile(TaskRun task) { - final record = [ - hash: task.hash.toString(), - inputs: vertices[task].inputs.collect { name, path -> - [ - name: name, - path: path.toUriString(), - predecessor: taskLookup[path]?.hash?.toString() - ] - }, - outputs: vertices[task].outputs.collect { path -> - [ - name: path.name, - path: path.toUriString(), - size: Files.size(path), - checksum: FilesEx.getChecksum(path) - ] - } - ] - - task.workDir.resolve(TaskRun.CMD_META).text = new JsonBuilder(record).toString() + Vertex getProducerVertex(Path path) { + vertices[taskLookup[path]] } @TupleConstructor(excludes = 'outputs') diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index 44f090057d..fd2099f2f6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -550,7 +550,6 @@ class TaskRun implements Cloneable { static final public String CMD_STAGE = '.command.stage' static final public String CMD_TRACE = '.command.trace' static final public String CMD_ENV = '.command.env' - static final public String CMD_META = '.command.meta.json' String toString( ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 09d56a9ff2..6c5ffa3209 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -92,10 +92,9 @@ class DefaultObserverFactory implements TraceObserverFactory { String fileName = config.navigate('dag.file') if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() - def observer = new GraphObserver(traceFile) - config.navigate('dag.type') { observer.type = it ?: 'workflow' } - config.navigate('dag.overwrite') { observer.overwrite = it } - result << observer + String type = config.navigate('dag.type', 'workflow') + boolean overwrite = config.navigate('dag.overwrite') as Boolean + result << new GraphObserver(traceFile, type, overwrite) } /* diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 255657f48c..ef480b1ac2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -16,12 +16,14 @@ package nextflow.trace +import java.nio.file.Files import java.nio.file.Path import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.Session +import nextflow.cache.CacheDB import nextflow.dag.CytoscapeHtmlRenderer import nextflow.dag.DAG import nextflow.dag.TaskDAG @@ -31,6 +33,7 @@ import nextflow.dag.GexfRenderer import nextflow.dag.GraphvizRenderer import nextflow.dag.MermaidRenderer import nextflow.exception.AbortOperationException +import nextflow.extension.FilesEx import nextflow.file.FileHelper import nextflow.processor.TaskHandler import nextflow.processor.TaskProcessor @@ -48,12 +51,14 @@ class GraphObserver implements TraceObserver { private Path file - private String type = 'workflow' + private String type private DAG dag private TaskDAG taskDag + private CacheDB cache + private String name private String format @@ -64,17 +69,25 @@ class GraphObserver implements TraceObserver { String getName() { name } - GraphObserver( Path file ) { + GraphObserver( Path file, String type='workflow', boolean overwrite=false ) { assert file + + if( type !in ['workflow', 'task'] ) + throw new IllegalArgumentException("Invalid DAG type '${type}', should be 'workflow' or 'task'") + this.file = file this.name = file.baseName this.format = file.getExtension().toLowerCase() ?: 'dot' + this.type = type + this.overwrite = overwrite } @Override void onFlowCreate(Session session) { this.dag = session.dag this.taskDag = session.taskDag + this.cache = session.cache + // check file existance final attrs = FileHelper.readAttributes(file) if( attrs ) { @@ -92,8 +105,28 @@ class GraphObserver implements TraceObserver { @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { - taskDag.addTaskOutputs(handler.task) - taskDag.writeMetaFile(handler.task) + final task = handler.task + + // update task graph + taskDag.addTaskOutputs(task) + + // save inputs and outputs to trace record + final vertex = taskDag.vertices[task] + trace.inputs = vertex.inputs.collect { name, path -> + new TraceRecord.Input( + name, + path, + taskDag.getProducerTask(path)?.hash) + } + trace.outputs = vertex.outputs.collect { path -> + new TraceRecord.Output( + path, + Files.size(path), + FilesEx.getChecksum(path)) + } + + // update cache db + cache.finalizeTaskAsync(task.hash, trace) } @Override @@ -113,9 +146,6 @@ class GraphObserver implements TraceObserver { else if( type == 'task' ) { createRenderer().renderTaskGraph(taskDag, file) } - else { - log.warn("Invalid DAG type '${type}', should be 'workflow' or 'task'") - } } @PackageScope diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy index 0662c8fb20..10a8c4e2e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy @@ -19,10 +19,12 @@ package nextflow.trace import java.nio.file.Path import java.util.regex.Pattern +import com.google.common.hash.HashCode import groovy.json.StringEscapeUtils import groovy.transform.CompileStatic import groovy.transform.Memoized import groovy.transform.PackageScope +import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j import nextflow.cloud.types.CloudMachineInfo import nextflow.extension.Bolts @@ -604,4 +606,34 @@ class TraceRecord implements Serializable { this.machineInfo = value } + List getInputs() { + return store.inputs as List + } + + void setInputs(List inputs) { + store.inputs = inputs + } + + List getOutputs() { + return store.outputs as List + } + + void setOutputs(List outputs) { + store.outputs = outputs + } + + @TupleConstructor + static class Input { + String stageName + Path storePath + HashCode source + } + + @TupleConstructor + static class Output { + Path path + long size + String checksum + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy index a2dd6f66ac..6e62682476 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy @@ -80,38 +80,4 @@ class TaskDAGTest extends Specification { dag.getProducerVertex(v2.inputs['data.foo']) == v1 } - def 'should write meta file' () { - - given: - def folder = File.createTempDir() - def outputFile = new File(folder, 'data.bar') ; outputFile.text = 'bar' - - def task1 = Mock(TaskRun) { - name >> 'foo' - hash >> HashCode.fromString('00112233') - getInputFilesMap() >> [ 'data.txt': Paths.get('/inputs/data.txt') ] - getOutputsByType(_) >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] - } - def task2 = Mock(TaskRun) { - name >> 'bar' - workDir >> folder.toPath() - hash >> HashCode.fromString('aabbccdd') - getInputFilesMap() >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] - getOutputsByType(_) >> [ 'data.bar': outputFile.toPath() ] - } - def dag = new TaskDAG() - - when: - dag.addTask(task1) - dag.addTaskOutputs(task1) - dag.addTask(task2) - dag.addTaskOutputs(task2) - dag.writeMetaFile(task2) - then: - task2.workDir.resolve(TaskRun.CMD_META).text == """{"hash":"aabbccdd","inputs":[{"name":"data.foo","path":"/work/00112233/data.foo","predecessor":"00112233"}],"outputs":[{"name":"data.bar","path":"${folder}/data.bar","size":3,"checksum":"37b51d194a7513e45b56f6524f2d51f2"}]}""" - - cleanup: - folder.delete() - } - } From 08fda25d2bdf012144363eade5eafc304dab3d45 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 24 Jul 2023 15:47:31 -0500 Subject: [PATCH 13/20] Decouple task graph from DAG renderer Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 10 +++++ .../main/groovy/nextflow/cache/CacheDB.groovy | 24 ----------- .../main/groovy/nextflow/dag/TaskDAG.groovy | 28 ++++++++++++ .../nextflow/trace/GraphObserver.groovy | 43 ------------------- .../groovy/nextflow/trace/TraceRecord.groovy | 3 +- 5 files changed, 39 insertions(+), 69 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index a5ba8b785f..cd08abd82b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -975,6 +975,8 @@ class Session implements ISession { void notifyTaskSubmit( TaskHandler handler ) { final task = handler.task log.info "[${task.hashLog}] ${task.runType.message} > ${task.name}" + // -- update task graph + taskDag.addTask(task) // -- save a record in the cache index cache.putIndexAsync(handler) @@ -1012,8 +1014,12 @@ class Session implements ISession { * @param handler */ void notifyTaskComplete( TaskHandler handler ) { + // update task graph + taskDag.addTaskOutputs(handler.task) + // save the completed task in the cache DB final trace = handler.safeTraceRecord() + taskDag.apply(handler.task, trace) cache.putTaskAsync(handler, trace) // notify the event to the observers @@ -1029,6 +1035,10 @@ class Session implements ISession { } void notifyTaskCached( TaskHandler handler ) { + // update task graph + taskDag.addTask(handler.task) + taskDag.addTaskOutputs(handler.task) + final trace = handler.getTraceRecord() // save a record in the cache index only the when the trace record is available // otherwise it means that the event is trigger by a `stored dir` driven task diff --git a/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy b/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy index 2e44a3980c..214e3235e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cache/CacheDB.groovy @@ -157,30 +157,6 @@ class CacheDB implements Closeable { writer.send { writeTaskEntry0(handler, trace) } } - /** - * Finalize task entry in the cache DB. - * - * @param hash - * @param trace - */ - @PackageScope - void finalizeTaskEntry0( HashCode hash, TraceRecord trace ) { - - final payload = store.getEntry(hash) - if( !payload ) { - log.debug "Unable to finalize task with key: $hash" - return - } - - final record = (List)KryoHelper.deserialize(payload) - record[0] = trace.serialize() - store.putEntry(hash, KryoHelper.serialize(record)) - } - - void finalizeTaskAsync( HashCode hash, TraceRecord trace ) { - writer.send { finalizeTaskEntry0(hash, trace) } - } - void cacheTaskAsync( TaskHandler handler ) { writer.send { writeTaskIndex0(handler,true) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index d8bb790eb4..bd0e1dc256 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -16,6 +16,7 @@ package nextflow.dag +import java.nio.file.Files import java.nio.file.Path import java.util.concurrent.locks.Lock import java.util.concurrent.locks.ReentrantLock @@ -23,8 +24,10 @@ import java.util.concurrent.locks.ReentrantLock import groovy.transform.CompileStatic import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j +import nextflow.extension.FilesEx import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam +import nextflow.trace.TraceRecord /** * Model the task graph of a pipeline execution. * @@ -105,6 +108,31 @@ class TaskDAG { vertices[taskLookup[path]] } + /** + * Save task input and output metadata to trace record. + * + * @param task + * @param record + */ + void apply(TaskRun task, TraceRecord record) { + final vertex = vertices[task] + + record.inputs = vertex.inputs.collect { name, path -> + final producer = getProducerTask(path) + new TraceRecord.Input( + name, + path, + producer ? producer.hash.toString() : null) + } + + record.outputs = vertex.outputs.collect { path -> + new TraceRecord.Output( + path, + Files.size(path), + FilesEx.getChecksum(path)) + } + } + @TupleConstructor(excludes = 'outputs') static class Vertex { int index diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index ef480b1ac2..1a7eebc4b4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -16,14 +16,12 @@ package nextflow.trace -import java.nio.file.Files import java.nio.file.Path import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.Session -import nextflow.cache.CacheDB import nextflow.dag.CytoscapeHtmlRenderer import nextflow.dag.DAG import nextflow.dag.TaskDAG @@ -33,7 +31,6 @@ import nextflow.dag.GexfRenderer import nextflow.dag.GraphvizRenderer import nextflow.dag.MermaidRenderer import nextflow.exception.AbortOperationException -import nextflow.extension.FilesEx import nextflow.file.FileHelper import nextflow.processor.TaskHandler import nextflow.processor.TaskProcessor @@ -57,8 +54,6 @@ class GraphObserver implements TraceObserver { private TaskDAG taskDag - private CacheDB cache - private String name private String format @@ -86,7 +81,6 @@ class GraphObserver implements TraceObserver { void onFlowCreate(Session session) { this.dag = session.dag this.taskDag = session.taskDag - this.cache = session.cache // check file existance final attrs = FileHelper.readAttributes(file) @@ -98,43 +92,6 @@ class GraphObserver implements TraceObserver { } } - @Override - void onProcessSubmit(TaskHandler handler, TraceRecord trace) { - taskDag.addTask(handler.task) - } - - @Override - void onProcessComplete(TaskHandler handler, TraceRecord trace) { - final task = handler.task - - // update task graph - taskDag.addTaskOutputs(task) - - // save inputs and outputs to trace record - final vertex = taskDag.vertices[task] - trace.inputs = vertex.inputs.collect { name, path -> - new TraceRecord.Input( - name, - path, - taskDag.getProducerTask(path)?.hash) - } - trace.outputs = vertex.outputs.collect { path -> - new TraceRecord.Output( - path, - Files.size(path), - FilesEx.getChecksum(path)) - } - - // update cache db - cache.finalizeTaskAsync(task.hash, trace) - } - - @Override - void onProcessCached(TaskHandler handler, TraceRecord trace) { - taskDag.addTask(handler.task) - taskDag.addTaskOutputs(handler.task) - } - @Override void onFlowComplete() { if( type == 'workflow' ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy index 10a8c4e2e3..b868702c27 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy @@ -19,7 +19,6 @@ package nextflow.trace import java.nio.file.Path import java.util.regex.Pattern -import com.google.common.hash.HashCode import groovy.json.StringEscapeUtils import groovy.transform.CompileStatic import groovy.transform.Memoized @@ -626,7 +625,7 @@ class TraceRecord implements Serializable { static class Input { String stageName Path storePath - HashCode source + String source } @TupleConstructor From 5eb9de1cfed6e3b17cbd7938c1504e95e2607105 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 25 Jul 2023 06:40:37 -0500 Subject: [PATCH 14/20] Improve DAG rendering Signed-off-by: Ben Sherman --- .../nextflow/dag/MermaidRenderer.groovy | 67 ++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index ae27113e61..2cd8dc303e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -19,6 +19,9 @@ package nextflow.dag import java.nio.file.Path import groovy.transform.CompileStatic +import nextflow.Global +import nextflow.Session +import nextflow.script.WorkflowMetadata /** * Render the DAG using the Mermaid format to the specified file. @@ -29,6 +32,8 @@ import groovy.transform.CompileStatic @CompileStatic class MermaidRenderer implements DagRenderer { + private WorkflowMetadata metadata = ((Session)Global.session).workflowMetadata + @Override void renderWorkflowGraph(DAG dag, Path file) { def lines = [] @@ -78,46 +83,78 @@ class MermaidRenderer implements DagRenderer { @Override void renderTaskGraph(TaskDAG dag, Path file) { - def renderedOutputs = [] as Set - def numInputs = 0 - def numOutputs = 0 - def lines = [] lines << "flowchart TD" - // render tasks and task inputs + // render workflow inputs + final inputs = [:] as Map + + lines << " subgraph \" \"" + dag.vertices.each { task, vertex -> - // render task vertex - lines << " ${vertex.getSlug()}[\"${vertex.label}\"]" + vertex.inputs.each { name, path -> + if( dag.getProducerVertex(path) || path in inputs ) + return + + inputs[path] = "in${inputs.size()}".toString() + lines << " ${inputs[path]}[\"${normalizePath(path)}\"]" + } + } + + lines << " end" + + // render tasks + final taskOutputs = [] as Set + + dag.vertices.each { task, vertex -> + lines << " ${vertex.getSlug()}([\"${vertex.label}\"])" vertex.inputs.each { name, path -> // render task input from predecessor task final pred = dag.getProducerVertex(path) if( pred != null ) { lines << " ${pred.getSlug()} -->|${path.name}| ${vertex.getSlug()}" - renderedOutputs << path + taskOutputs << path } // render task input from source node else { - numInputs += 1 - lines << " i${numInputs}(( )) -->|${path}| ${vertex.getSlug()}" + lines << " ${inputs[path]} --> ${vertex.getSlug()}" } } } - // render task outputs with sink nodes + // render task outputs + final outputs = [:] as Map + dag.vertices.each { task, vertex -> vertex.outputs.each { path -> - if( path !in renderedOutputs ) { - numOutputs += 1 - lines << " ${vertex.getSlug()} -->|${path.name}| o${numOutputs}(( ))" - } + if( path in taskOutputs || path in outputs ) + return + + outputs[path] = "out${outputs.size()}".toString() + lines << " ${vertex.getSlug()} --> ${outputs[path]}" } } + // render workflow outputs + lines << " subgraph \" \"" + + outputs.each { path, label -> + lines << " ${label}[${path.name}]" + } + + lines << " end" + lines << "" file.text = lines.join('\n') } + + String normalizePath(Path path) { + path.toUriString() + .replace(metadata.workDir.toUriString(), '/work') + .replace(metadata.projectDir.toUriString(), '') + .replace(metadata.launchDir.toUriString(), '') + } } From c7422dfb36a9d064b360578419b6bbb1bade60b5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 23 Sep 2023 10:54:11 -0500 Subject: [PATCH 15/20] Add subworkflows to task graph Signed-off-by: Ben Sherman --- .../nextflow/dag/MermaidRenderer.groovy | 97 +++++++++++++++++-- .../main/groovy/nextflow/dag/TaskDAG.groovy | 2 +- 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index 150236c7ab..31c8b94452 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -24,6 +24,7 @@ import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j import nextflow.Global import nextflow.Session +import nextflow.processor.TaskRun import nextflow.script.WorkflowMetadata /** * Render the DAG using the Mermaid format to the specified file. @@ -498,7 +499,15 @@ class MermaidRenderer implements DagRenderer { @Override void renderTaskGraph(TaskDAG dag, Path file) { - def lines = [] + file.text = renderNetwork(dag) + } + + String renderNetwork(TaskDAG dag) { + // construct task tree + final taskTree = getTaskTree(dag.vertices) + + // render diagram + def lines = [] as List lines << "flowchart TD" // render workflow inputs @@ -512,29 +521,30 @@ class MermaidRenderer implements DagRenderer { return inputs[path] = "in${inputs.size()}".toString() - lines << " ${inputs[path]}[\"${normalizePath(path)}\"]" + lines << " ${inputs[path]}[\"${normalizePath(path)}\"]".toString() } } lines << " end" // render tasks + renderTaskTree(lines, null, taskTree) + + // render task inputs final taskOutputs = [] as Set dag.vertices.each { task, vertex -> - lines << " ${vertex.getSlug()}([\"${vertex.label}\"])" - vertex.inputs.each { name, path -> // render task input from predecessor task final pred = dag.getProducerVertex(path) if( pred != null ) { - lines << " ${pred.getSlug()} -->|${path.name}| ${vertex.getSlug()}" + lines << " ${pred.name} -->|${path.name}| ${vertex.name}".toString() taskOutputs << path } - // render task input from source node + // render task input from workflow input else { - lines << " ${inputs[path]} --> ${vertex.getSlug()}" + lines << " ${inputs[path]} --> ${vertex.name}".toString() } } } @@ -548,7 +558,7 @@ class MermaidRenderer implements DagRenderer { return outputs[path] = "out${outputs.size()}".toString() - lines << " ${vertex.getSlug()} --> ${outputs[path]}" + lines << " ${vertex.name} --> ${outputs[path]}".toString() } } @@ -556,12 +566,12 @@ class MermaidRenderer implements DagRenderer { lines << " subgraph \" \"" outputs.each { path, label -> - lines << " ${label}[${path.name}]" + lines << " ${label}[${path.name}]".toString() } lines << " end" - file.text = lines.join('\n') + return lines.join('\n') } String normalizePath(Path path) { @@ -570,4 +580,71 @@ class MermaidRenderer implements DagRenderer { .replace(metadata.projectDir.toUriString(), '') .replace(metadata.launchDir.toUriString(), '') } + + /** + * Construct a task tree with a subgraph for each subworkflow. + * + * @param vertices + */ + private Map getTaskTree(Map vertices) { + def taskTree = [:] + + for( def entry : vertices ) { + def task = entry.key + def vertex = entry.value + + // infer subgraph keys from fully qualified process name + final result = getSubgraphKeys(task.processor.name) + final keys = (List)result[0] + + // update vertex label + final hash = task.hash.toString() + final name = task.name.replace(task.processor.name, (String)result[1]) + vertex.label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${name}" + + // navigate to given subgraph + def subgraph = taskTree + for( def key : keys ) { + if( key !in subgraph ) + subgraph[key] = [:] + subgraph = subgraph[key] + } + + // add vertex to tree + subgraph[vertex.name] = vertex + } + + return taskTree + } + + /** + * Render a tree of tasks and subgraphs. + * + * @param lines + * @param name + * @param taskTree + */ + private void renderTaskTree(List lines, String name, Map taskTree) { + if( name ) + lines << " subgraph ${name}".toString() + + taskTree.each { key, value -> + if( value instanceof Map ) + renderTaskTree(lines, key, value) + else if( value instanceof TaskDAG.Vertex ) + lines << " ${renderTask(value)}".toString() + } + + if( name ) + lines << " end" + } + + /** + * Render a task. + * + * @param vertex + */ + private String renderTask(TaskDAG.Vertex vertex) { + return "${vertex.name}([\"${vertex.label}\"])" + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index bd0e1dc256..ea73f44f20 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -140,7 +140,7 @@ class TaskDAG { Map inputs Set outputs - String getSlug() { "t${index}" } + String getName() { "t${index}" } @Override String toString() { label } From cc8b6dcf172ffa24b4b5d5c0113ac1ca64f317d0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 28 Sep 2023 11:44:25 -0500 Subject: [PATCH 16/20] Remove task DAG rendering (in favor of nf-prov) Signed-off-by: Ben Sherman --- docs/config.md | 3 - .../nextflow/dag/CytoscapeHtmlRenderer.groovy | 2 +- .../nextflow/dag/CytoscapeJsRenderer.groovy | 2 +- .../src/main/groovy/nextflow/dag/DAG.groovy | 2 +- .../groovy/nextflow/dag/DagRenderer.groovy | 15 +- .../groovy/nextflow/dag/DotRenderer.groovy | 2 +- .../groovy/nextflow/dag/GexfRenderer.groovy | 2 +- .../nextflow/dag/GraphVizRenderer.groovy | 2 +- .../nextflow/dag/MermaidRenderer.groovy | 159 +----------------- .../trace/DefaultObserverFactory.groovy | 6 +- .../nextflow/trace/GraphObserver.groovy | 48 +++--- .../nextflow/dag/DotRendererTest.groovy | 2 +- .../nextflow/dag/GexfRendererTest.groovy | 2 +- .../nextflow/dag/MermaidRendererTest.groovy | 57 +------ .../nextflow/trace/GraphObserverTest.groovy | 34 ++-- .../cloud/aws/config/AwsS3Config.groovy | 4 +- .../cloud/aws/config/AwsS3ConfigTest.groovy | 14 -- 17 files changed, 64 insertions(+), 292 deletions(-) diff --git a/docs/config.md b/docs/config.md index af5580228b..c013547aaf 100644 --- a/docs/config.md +++ b/docs/config.md @@ -529,9 +529,6 @@ The following settings are available: ::: : When `false`, channel names are omitted, operators are collapsed, and empty workflow inputs are removed (default: `false`; only supported by the Mermaid render). -`dag.type` -: Can be `'workflow'` to render the workflow DAG or `'task'` to render the task DAG (default: `'workflow'`). - Read the {ref}`dag-visualisation` page to learn more about the execution graph that can be generated by Nextflow. (config-docker)= diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy index cf8fed4b6c..5aff50ffe3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeHtmlRenderer.groovy @@ -28,7 +28,7 @@ import java.nio.file.Path class CytoscapeHtmlRenderer implements DagRenderer { @Override - void renderWorkflowGraph(DAG dag, Path file) { + void renderDocument(DAG dag, Path file) { String tmplPage = readTemplate() String network = CytoscapeJsRenderer.renderNetwork(dag) file.text = tmplPage.replaceAll(~/\/\* REPLACE_WITH_NETWORK_DATA \*\//, network) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy index c7592392c9..dfcb8246df 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/CytoscapeJsRenderer.groovy @@ -28,7 +28,7 @@ import java.nio.file.Path class CytoscapeJsRenderer implements DagRenderer { @Override - void renderWorkflowGraph(DAG dag, Path file) { + void renderDocument(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy index 0ee5ea43e1..93af836854 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy @@ -43,7 +43,7 @@ import nextflow.script.params.TupleOutParam import java.util.concurrent.atomic.AtomicLong /** - * Model the workflow graph of the pipeline execution. + * Model a direct acyclic graph of the workflow definition. * * @author Paolo Di Tommaso */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy index 442ddf761b..0d3bfcd252 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DagRenderer.groovy @@ -23,19 +23,10 @@ import java.nio.file.Path * @author Paolo Di Tommaso * @author Mike Smoot */ -trait DagRenderer { +interface DagRenderer { /** - * Render a workflow DAG. + * Render the dag to the specified file. */ - void renderWorkflowGraph(DAG dag, Path file) { - throw new UnsupportedOperationException("Workflow DAG rendering is not supported for this file format") - } - - /** - * Render a task DAG. - */ - void renderTaskGraph(TaskDAG dag, Path file) { - throw new UnsupportedOperationException("Task DAG rendering is not supported for this file format") - } + void renderDocument(DAG dag, Path file); } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy index bf032f7d99..f9b5a36e41 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DotRenderer.groovy @@ -45,7 +45,7 @@ class DotRenderer implements DagRenderer { static String normalise(String str) { str.replaceAll(/[^0-9_A-Za-z]/,'') } @Override - void renderWorkflowGraph(DAG dag, Path file) { + void renderDocument(DAG dag, Path file) { file.text = renderNetwork(dag) } diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy index 8072ae0f06..c2bac51110 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GexfRenderer.groovy @@ -41,7 +41,7 @@ class GexfRenderer implements DagRenderer { } @Override - void renderWorkflowGraph(DAG dag, Path file) { + void renderDocument(DAG dag, Path file) { final Charset charset = Charset.defaultCharset() Writer bw = Files.newBufferedWriter(file, charset) final XMLOutputFactory xof = XMLOutputFactory.newFactory() diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy index 61b8e967e2..3106890b9c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/GraphVizRenderer.groovy @@ -41,7 +41,7 @@ class GraphvizRenderer implements DagRenderer { * See http://www.graphviz.org for more info. */ @Override - void renderWorkflowGraph(DAG dag, Path target) { + void renderDocument(DAG dag, Path target) { def result = Files.createTempFile('nxf-',".$format") def temp = Files.createTempFile('nxf-','.dot') // save the DAG as `dot` to a temp file diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy index 31c8b94452..76507c4ab4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidRenderer.groovy @@ -24,8 +24,6 @@ import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j import nextflow.Global import nextflow.Session -import nextflow.processor.TaskRun -import nextflow.script.WorkflowMetadata /** * Render the DAG using the Mermaid format to the specified file. * See https://mermaid-js.github.io/mermaid/#/ for more info. @@ -42,14 +40,12 @@ class MermaidRenderer implements DagRenderer { private Session session = Global.session as Session - private WorkflowMetadata metadata = session.workflowMetadata - private int depth = session.config.navigate('dag.depth', -1) as int private boolean verbose = session.config.navigate('dag.verbose', false) @Override - void renderWorkflowGraph(DAG dag, Path file) { + void renderDocument(DAG dag, Path file) { file.text = renderNetwork(dag) } @@ -103,6 +99,8 @@ class MermaidRenderer implements DagRenderer { lines << " ${source.name} -->${label} ${target.name}".toString() } + lines << "" + return lines.join('\n') } @@ -496,155 +494,4 @@ class MermaidRenderer implements DagRenderer { Set inputs = [] Set outputs = [] } - - @Override - void renderTaskGraph(TaskDAG dag, Path file) { - file.text = renderNetwork(dag) - } - - String renderNetwork(TaskDAG dag) { - // construct task tree - final taskTree = getTaskTree(dag.vertices) - - // render diagram - def lines = [] as List - lines << "flowchart TD" - - // render workflow inputs - final inputs = [:] as Map - - lines << " subgraph \" \"" - - dag.vertices.each { task, vertex -> - vertex.inputs.each { name, path -> - if( dag.getProducerVertex(path) || path in inputs ) - return - - inputs[path] = "in${inputs.size()}".toString() - lines << " ${inputs[path]}[\"${normalizePath(path)}\"]".toString() - } - } - - lines << " end" - - // render tasks - renderTaskTree(lines, null, taskTree) - - // render task inputs - final taskOutputs = [] as Set - - dag.vertices.each { task, vertex -> - vertex.inputs.each { name, path -> - // render task input from predecessor task - final pred = dag.getProducerVertex(path) - if( pred != null ) { - lines << " ${pred.name} -->|${path.name}| ${vertex.name}".toString() - taskOutputs << path - } - - // render task input from workflow input - else { - lines << " ${inputs[path]} --> ${vertex.name}".toString() - } - } - } - - // render task outputs - final outputs = [:] as Map - - dag.vertices.each { task, vertex -> - vertex.outputs.each { path -> - if( path in taskOutputs || path in outputs ) - return - - outputs[path] = "out${outputs.size()}".toString() - lines << " ${vertex.name} --> ${outputs[path]}".toString() - } - } - - // render workflow outputs - lines << " subgraph \" \"" - - outputs.each { path, label -> - lines << " ${label}[${path.name}]".toString() - } - - lines << " end" - - return lines.join('\n') - } - - String normalizePath(Path path) { - path.toUriString() - .replace(metadata.workDir.toUriString(), '/work') - .replace(metadata.projectDir.toUriString(), '') - .replace(metadata.launchDir.toUriString(), '') - } - - /** - * Construct a task tree with a subgraph for each subworkflow. - * - * @param vertices - */ - private Map getTaskTree(Map vertices) { - def taskTree = [:] - - for( def entry : vertices ) { - def task = entry.key - def vertex = entry.value - - // infer subgraph keys from fully qualified process name - final result = getSubgraphKeys(task.processor.name) - final keys = (List)result[0] - - // update vertex label - final hash = task.hash.toString() - final name = task.name.replace(task.processor.name, (String)result[1]) - vertex.label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${name}" - - // navigate to given subgraph - def subgraph = taskTree - for( def key : keys ) { - if( key !in subgraph ) - subgraph[key] = [:] - subgraph = subgraph[key] - } - - // add vertex to tree - subgraph[vertex.name] = vertex - } - - return taskTree - } - - /** - * Render a tree of tasks and subgraphs. - * - * @param lines - * @param name - * @param taskTree - */ - private void renderTaskTree(List lines, String name, Map taskTree) { - if( name ) - lines << " subgraph ${name}".toString() - - taskTree.each { key, value -> - if( value instanceof Map ) - renderTaskTree(lines, key, value) - else if( value instanceof TaskDAG.Vertex ) - lines << " ${renderTask(value)}".toString() - } - - if( name ) - lines << " end" - } - - /** - * Render a task. - * - * @param vertex - */ - private String renderTask(TaskDAG.Vertex vertex) { - return "${vertex.name}([\"${vertex.label}\"])" - } } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 3e44f5c1dd..6c391625c9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -77,9 +77,9 @@ class DefaultObserverFactory implements TraceObserverFactory { String fileName = config.navigate('dag.file') if( !fileName ) fileName = GraphObserver.DEF_FILE_NAME def traceFile = (fileName as Path).complete() - String type = config.navigate('dag.type', 'workflow') - boolean overwrite = config.navigate('dag.overwrite') as Boolean - result << new GraphObserver(traceFile, type, overwrite) + def observer = new GraphObserver(traceFile) + config.navigate('dag.overwrite') { observer.overwrite = it } + result << observer } /* diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 3ed7718382..0b41fbbf33 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -18,13 +18,11 @@ package nextflow.trace import java.nio.file.Path -import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.Session import nextflow.dag.CytoscapeHtmlRenderer import nextflow.dag.DAG -import nextflow.dag.TaskDAG import nextflow.dag.DagRenderer import nextflow.dag.DotRenderer import nextflow.dag.GexfRenderer @@ -41,19 +39,14 @@ import nextflow.processor.TaskProcessor * @author Paolo Di Tommaso */ @Slf4j -@CompileStatic class GraphObserver implements TraceObserver { static public final String DEF_FILE_NAME = "dag-${TraceHelper.launchTimestampFmt()}.dot" private Path file - private String type - private DAG dag - private TaskDAG taskDag - private String name private String format @@ -64,24 +57,16 @@ class GraphObserver implements TraceObserver { String getName() { name } - GraphObserver( Path file, String type='workflow', boolean overwrite=false ) { + GraphObserver( Path file ) { assert file - - if( type !in ['workflow', 'task'] ) - throw new IllegalArgumentException("Invalid DAG type '${type}', should be 'workflow' or 'task'") - this.file = file this.name = file.baseName this.format = file.getExtension().toLowerCase() ?: 'dot' - this.type = type - this.overwrite = overwrite } @Override void onFlowCreate(Session session) { this.dag = session.dag - this.taskDag = session.taskDag - // check file existence final attrs = FileHelper.readAttributes(file) if( attrs ) { @@ -94,19 +79,14 @@ class GraphObserver implements TraceObserver { @Override void onFlowComplete() { - if( type == 'workflow' ) { // -- normalise the DAG dag.normalize() // -- render it to a file - createRenderer().renderWorkflowGraph(dag, file) - } - else if( type == 'task' ) { - createRenderer().renderTaskGraph(taskDag, file) - } + createRender().renderDocument(dag,file) } @PackageScope - DagRenderer createRenderer() { + DagRenderer createRender() { if( format == 'dot' ) new DotRenderer(name) @@ -123,6 +103,28 @@ class GraphObserver implements TraceObserver { new GraphvizRenderer(name, format) } + + @Override + void onProcessCreate(TaskProcessor process) { + + } + + + @Override + void onProcessSubmit(TaskHandler handler, TraceRecord trace) { + + } + + @Override + void onProcessStart(TaskHandler handler, TraceRecord trace) { + + } + + @Override + void onProcessComplete(TaskHandler handler, TraceRecord trace) { + + } + @Override boolean enableMetrics() { return false diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy index b61f2272eb..77f7eb8c7a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/DotRendererTest.groovy @@ -54,7 +54,7 @@ class DotRendererTest extends Specification { dag.normalize() when: - new DotRenderer('TheGraph').renderWorkflowGraph(dag, file) + new DotRenderer('TheGraph').renderDocument(dag, file) then: file.text == ''' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy index a4920f10c7..f112732b2e 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/GexfRendererTest.groovy @@ -47,7 +47,7 @@ class GexfRendererTest extends Specification { dag.normalize() when: - new GexfRenderer('TheGraph').renderWorkflowGraph(dag, file.toPath()) + new GexfRenderer('TheGraph').renderDocument(dag, file.toPath()) then: def graph = new XmlSlurper().parse(file); assert graph.name() == 'gexf' diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy index 37259489e3..0c61713b77 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/MermaidRendererTest.groovy @@ -17,11 +17,9 @@ package nextflow.dag import java.nio.file.Files -import java.nio.file.Paths import groovyx.gpars.dataflow.DataflowQueue import nextflow.Session -import nextflow.processor.TaskRun import spock.lang.Specification /** * @@ -29,11 +27,7 @@ import spock.lang.Specification */ class MermaidRendererTest extends Specification { - def setupSpec() { - new Session() - } - - def 'should render a workflow graph using the `mmd` format' () { + def 'should render a graph using the `mmd` format' () { given: def file = Files.createTempFile('test', null) def ch1 = new DataflowQueue() @@ -48,7 +42,7 @@ class MermaidRendererTest extends Specification { dag.normalize() when: - new MermaidRenderer().renderWorkflowGraph(dag, file) + new MermaidRenderer().renderDocument(dag, file) then: file.text == ''' @@ -65,52 +59,7 @@ class MermaidRendererTest extends Specification { v1 --> v2 v2 --> v3 ''' - .stripIndent().trim() - - cleanup: - file.delete() - } - - def 'should render a task graph using the `mmd` format' () { - given: - def file = Files.createTempFile('test', null) - - def task1 = Mock(TaskRun) - def task2 = Mock(TaskRun) - def output1 = Paths.get('/work/012345/data.foo') - def v1 = new TaskDAG.Vertex( - index: 1, - label: 'foo', - inputs: [ 'data.txt': Paths.get('/inputs/data.txt') ], - outputs: [ output1 ] - ) - def v2 = new TaskDAG.Vertex( - index: 2, - label: 'bar', - inputs: [ 'data.foo': output1 ], - outputs: [ Paths.get('/work/abcdef/data.bar') ] - ) - def dag = Mock(TaskDAG) { - vertices >> [ - (task1): v1, - (task2): v2 - ] - getProducerVertex(output1) >> v1 - } - - when: - new MermaidRenderer().renderTaskGraph(dag, file) - then: - file.text == - ''' - flowchart TD - t1["foo"] - i1(( )) -->|/inputs/data.txt| t1 - t2["bar"] - t1 -->|data.foo| t2 - t2 -->|data.bar| o1(( )) - ''' - .stripIndent().trim() + .stripIndent().leftTrim() cleanup: file.delete() diff --git a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy index 626ec8e3c1..f3d13bc507 100644 --- a/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/trace/GraphObserverTest.groovy @@ -35,7 +35,7 @@ import test.TestHelper */ class GraphObserverTest extends Specification { - DAG dag + DAG test_dag def setup() { new Session() @@ -45,28 +45,28 @@ class GraphObserverTest extends Specification { def ch2 = new DataflowQueue() def ch3 = new DataflowQueue() - dag = new DAG() + test_dag = new DAG() - dag.addVertex( + test_dag.addVertex( DAG.Type.ORIGIN, 'Source', null, [ new DAG.ChannelHandler(channel: src, label: 'src') ] ) - dag.addVertex( + test_dag.addVertex( DAG.Type.PROCESS, 'Process 1', [ new DAG.ChannelHandler(channel: src, label: 'Source') ], [ new DAG.ChannelHandler(channel: ch1, label: 'Channel 1') ] ) - dag.addVertex( + test_dag.addVertex( DAG.Type.OPERATOR, 'Filter', [ new DAG.ChannelHandler(channel: ch1, label: 'Channel 1') ], [ new DAG.ChannelHandler(channel: ch2, label: 'Channel 2') ] ) - dag.addVertex( + test_dag.addVertex( DAG.Type.PROCESS, 'Process 2', [ new DAG.ChannelHandler(channel: ch2, label: 'Channel 2') ], @@ -78,7 +78,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf_','.dot') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -105,7 +105,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.html') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -133,7 +133,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.svg') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -151,7 +151,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.png') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -168,7 +168,7 @@ class GraphObserverTest extends Specification { given: def file = Files.createTempFile('nxf-','.pdf') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -185,7 +185,7 @@ class GraphObserverTest extends Specification { def folder = Files.createTempDirectory('test') def file = folder.resolve('nope') def gr = new GraphObserver(file) - gr.dag = dag + gr.dag = test_dag when: gr.onFlowComplete() @@ -216,34 +216,34 @@ class GraphObserverTest extends Specification { then: observer.name == 'hello-world' observer.format == 'dot' - observer.createRenderer() instanceof DotRenderer + observer.createRender() instanceof DotRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.html')) then: observer.name == 'TheGraph' observer.format == 'html' - observer.createRenderer() instanceof CytoscapeHtmlRenderer + observer.createRender() instanceof CytoscapeHtmlRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.mmd')) then: observer.name == 'TheGraph' observer.format == 'mmd' - observer.createRenderer() instanceof MermaidRenderer + observer.createRender() instanceof MermaidRenderer when: observer = new GraphObserver(Paths.get('/path/to/TheGraph.SVG')) then: observer.name == 'TheGraph' observer.format == 'svg' - observer.createRenderer() instanceof GraphvizRenderer + observer.createRender() instanceof GraphvizRenderer when: observer = new GraphObserver(Paths.get('/path/to/anonymous')) then: observer.name == 'anonymous' observer.format == 'dot' - observer.createRenderer() instanceof DotRenderer + observer.createRender() instanceof DotRenderer } } diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy index 6f564d7f2c..6f052d6cb7 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy @@ -52,8 +52,8 @@ class AwsS3Config { this.debug = opts.debug as Boolean this.endpoint = opts.endpoint ?: SysEnv.get('AWS_S3_ENDPOINT') this.storageClass = parseStorageClass((opts.storageClass ?: opts.uploadStorageClass) as String) // 'uploadStorageClass' is kept for legacy purposes - this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String) ?: SysEnv.get('NXF_AWS_SSE_MODE') - this.storageKmsKeyId = opts.storageKmsKeyId ?: SysEnv.get('NXF_AWS_SSE_KMS_KEY_ID') + this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String) + this.storageKmsKeyId = opts.storageKmsKeyId this.pathStyleAccess = opts.s3PathStyleAccess as Boolean this.anonymous = opts.anonymous as Boolean this.s3Acl = parseS3Acl(opts.s3Acl as String) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy index 4813adc95b..96fbda3acb 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsS3ConfigTest.groovy @@ -124,18 +124,4 @@ class AwsS3ConfigTest extends Specification { SysEnv.pop() } - - def 'should set storage encryption via env variable' () { - given: - SysEnv.push([NXF_AWS_SSE_MODE: 'aws:kms', NXF_AWS_SSE_KMS_KEY_ID: 'xyz1']) - - when: - def client = new AwsS3Config([:]) - then: - client.storageKmsKeyId == 'xyz1' - client.storageEncryption == 'aws:kms' - - cleanup: - SysEnv.pop() - } } From 73459b7631fbaa15951e0d1707dd3dfa18f7294f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 28 Sep 2023 11:48:12 -0500 Subject: [PATCH 17/20] Revert unrelated changes Signed-off-by: Ben Sherman --- modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy | 2 +- .../nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy | 2 +- .../src/main/groovy/nextflow/trace/GraphObserver.groovy | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy index 93af836854..414d664e9c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/DAG.groovy @@ -43,7 +43,7 @@ import nextflow.script.params.TupleOutParam import java.util.concurrent.atomic.AtomicLong /** - * Model a direct acyclic graph of the workflow definition. + * Model the directed acyclic graph of the workflow definition. * * @author Paolo Di Tommaso */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index ea73f44f20..558087208c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -29,7 +29,7 @@ import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam import nextflow.trace.TraceRecord /** - * Model the task graph of a pipeline execution. + * Model the directed acyclic graph of the workflow definition. * * @author Ben Sherman */ diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy index 5fcd226074..ccadaf9597 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/GraphObserver.groovy @@ -79,9 +79,9 @@ class GraphObserver implements TraceObserver { @Override void onFlowComplete() { - // -- normalise the DAG - dag.normalize() - // -- render it to a file + // -- normalise the DAG + dag.normalize() + // -- render it to a file createRender().renderDocument(dag,file) } From d46b346e5045e23a8f29160f2289c47efe6aed2a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 29 Sep 2023 15:30:53 -0500 Subject: [PATCH 18/20] Remove unused code Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 2 +- .../src/main/groovy/nextflow/dag/TaskDAG.groovy | 15 +++------------ .../test/groovy/nextflow/dag/TaskDAGTest.groovy | 8 -------- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 96f2213732..f844b87349 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -1030,7 +1030,7 @@ class Session implements ISession { // save the completed task in the cache DB final trace = handler.safeTraceRecord() - taskDag.apply(handler.task, trace) + taskDag.saveToRecord(handler.task, trace) cache.putTaskAsync(handler, trace) // notify the event to the observers diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index 558087208c..3b4ac1c68d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -29,7 +29,7 @@ import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam import nextflow.trace.TraceRecord /** - * Model the directed acyclic graph of the workflow definition. + * Model the directed acyclic graph of the workflow execution. * * @author Ben Sherman */ @@ -51,14 +51,12 @@ class TaskDAG { * @param task */ void addTask(TaskRun task) { - final hash = task.hash.toString() - final label = "[${hash.substring(0,2)}/${hash.substring(2,8)}] ${task.name}" final inputs = task.getInputFilesMap() sync.lock() try { // add new task to graph - vertices[task] = new Vertex(vertices.size(), label, inputs) + vertices[task] = new Vertex(inputs) } finally { sync.unlock() @@ -114,7 +112,7 @@ class TaskDAG { * @param task * @param record */ - void apply(TaskRun task, TraceRecord record) { + void saveToRecord(TaskRun task, TraceRecord record) { final vertex = vertices[task] record.inputs = vertex.inputs.collect { name, path -> @@ -135,15 +133,8 @@ class TaskDAG { @TupleConstructor(excludes = 'outputs') static class Vertex { - int index - String label Map inputs Set outputs - - String getName() { "t${index}" } - - @Override - String toString() { label } } } diff --git a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy index 6e62682476..fa5ba19f24 100644 --- a/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/dag/TaskDAGTest.groovy @@ -31,8 +31,6 @@ class TaskDAGTest extends Specification { given: def task1 = Mock(TaskRun) { - getHash() >> HashCode.fromString('00112233') - getName() >> 'foo' getInputFilesMap() >> [ 'data.txt': Paths.get('/inputs/data.txt') ] @@ -41,8 +39,6 @@ class TaskDAGTest extends Specification { ] } def task2 = Mock(TaskRun) { - getHash() >> HashCode.fromString('aabbccdd') - getName() >> 'bar' getInputFilesMap() >> [ 'data.foo': Paths.get('/work/00112233/data.foo') ] @@ -58,13 +54,9 @@ class TaskDAGTest extends Specification { def v1 = dag.vertices[task1] def v2 = dag.vertices[task2] then: - v1.index == 0 - v1.label == '[00/112233] foo' v1.inputs.size() == 1 v1.inputs['data.txt'] == Paths.get('/inputs/data.txt') and: - v2.index == 1 - v2.label == '[aa/bbccdd] bar' v2.inputs.size() == 1 v2.inputs['data.foo'] == Paths.get('/work/00112233/data.foo') From d851c2da017b5c6c27f5ee53c163e8c1f6c47da4 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 3 Oct 2023 10:15:17 -0500 Subject: [PATCH 19/20] Use CacheHelper to compute MD5 checksum Signed-off-by: Ben Sherman --- .../src/main/nextflow/extension/FilesEx.groovy | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index c59adc4a45..86c70f596b 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -34,8 +34,8 @@ import java.nio.file.attribute.FileAttribute import java.nio.file.attribute.FileTime import java.nio.file.attribute.PosixFilePermission import java.nio.file.attribute.PosixFilePermissions -import java.security.MessageDigest +import com.google.common.hash.Hashing import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.transform.stc.ClosureParams @@ -45,6 +45,7 @@ import nextflow.file.ETagAwareFile import nextflow.file.FileHelper import nextflow.file.FileSystemPathFactory import nextflow.io.ByteBufferBackedInputStream +import nextflow.util.CacheHelper import nextflow.util.CharsetHelper import nextflow.util.CheckHelper @@ -1611,14 +1612,6 @@ class FilesEx { return path.getETag() // otherwise compute checksum manually - final is = Files.newInputStream(path) - final md = MessageDigest.getInstance('MD5') - final buf = new byte[16 << 10] - - int len - while( (len=is.read(buf)) != -1 ) - md.update(buf, 0, len) - - new BigInteger(1, md.digest()).toString(16) + CacheHelper.hasher(Hashing.md5(), path, CacheHelper.HashMode.DEEP).hash().toString() } } From 501bce7efc25fd3ad2d96a4ed663a53b606fdbd3 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 3 Oct 2023 10:15:33 -0500 Subject: [PATCH 20/20] Add temporary debug logging Signed-off-by: Ben Sherman --- modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy | 2 ++ .../nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy | 3 +++ 2 files changed, 5 insertions(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy index 3b4ac1c68d..328670d646 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/TaskDAG.groovy @@ -129,6 +129,8 @@ class TaskDAG { Files.size(path), FilesEx.getChecksum(path)) } + + log.info "task ${task.name} ; inputs: ${record.inputs} ; outputs: ${record.outputs}" } @TupleConstructor(excludes = 'outputs') diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy index 773f7ec215..339edb77fe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceRecord.groovy @@ -23,6 +23,7 @@ import groovy.json.StringEscapeUtils import groovy.transform.CompileStatic import groovy.transform.Memoized import groovy.transform.PackageScope +import groovy.transform.ToString import groovy.transform.TupleConstructor import groovy.util.logging.Slf4j import nextflow.cloud.types.CloudMachineInfo @@ -627,6 +628,7 @@ class TraceRecord implements Serializable { } @TupleConstructor + @ToString static class Input { String stageName Path storePath @@ -634,6 +636,7 @@ class TraceRecord implements Serializable { } @TupleConstructor + @ToString static class Output { Path path long size