diff --git a/README.md b/README.md index 7f29b38..b7377e6 100644 --- a/README.md +++ b/README.md @@ -28,17 +28,23 @@ The following options are available: `prov.enabled` -Create the provenance manifest (default: `true` if plugin is loaded). +Create the provenance report (default: `true` if plugin is loaded). `prov.file` -The path of the provenance manifest (default: `manifest.json`). +The path of the provenance report (default: `manifest.json`). `prov.format` -The manifest format. Can be `legacy` or `bco` (default: `legacy`). +The report format. The following formats are available: -*Note: The BCO format is experimental and may change in future releases. Visit the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) to learn more about this format and how to extend it with information that isn't available to Nextflow.* +- `bco`: Render a [BioCompute Object](https://biocomputeobject.org/). + + Visit the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) to learn more about this format and how to extend it with information that isn't available to Nextflow. + +- `dag`: Render the task graph as a Mermaid diagram embedded in an HTML document. + +- `legacy`: Render the legacy format originally defined in this plugin (default). `prov.overwrite` @@ -46,7 +52,7 @@ Overwrite any existing provenance report with the same name (default: `false`). `prov.patterns` -List of file patterns to include in the provenance manifest, from the set of published files. By default, all published files are included. +List of file patterns to include in the provenance report, from the set of published files. By default, all published files are included. ## Development diff --git a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy index 9622080..3d31961 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy @@ -23,7 +23,6 @@ import java.time.format.DateTimeFormatter import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session -import nextflow.exception.AbortOperationException import nextflow.processor.TaskRun import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper @@ -36,53 +35,8 @@ import nextflow.util.CacheHelper @CompileStatic class BcoRenderer implements Renderer { - private URL repository - - private String commitId - - private String launchDir - - private String projectDir - - private String workDir - - /** - * Normalize local paths to remove environment-specific directories. - * - * @param path - */ - private String normalizePath(Path path) { - normalizePath(path.toUriString()) - } - - private String normalizePath(String path) { - // replace work directory with relative path - if( path.startsWith(workDir) ) - return path.replace(workDir, 'work') - - // replace project directory with source URL (if applicable) - if( repository && path.startsWith(projectDir) ) - return getProjectSourceUrl(path) - - // replace launch directory with relative path - if( path.startsWith(launchDir) ) - return path.replace(launchDir + '/', '') - - return path - } - - /** - * Get the source URL for a project asset. - * - * @param path - */ - private String getProjectSourceUrl(String path) { - // TODO: add other git providers - if( repository.host == 'github.com' ) - return path.replace(projectDir, "${repository}/tree/${commitId}") - else - return path - } + @Delegate + private PathNormalizer normalizer @Override void render(Session session, Set tasks, Map workflowOutputs, Path path) { @@ -92,13 +46,10 @@ class BcoRenderer implements Renderer { // get workflow metadata final metadata = session.workflowMetadata + this.normalizer = new PathNormalizer(metadata) + final manifest = metadata.manifest final nextflowMeta = metadata.nextflow - this.repository = metadata.repository ? new URL(metadata.repository) : null - this.commitId = metadata.commitId - this.projectDir = metadata.projectDir.toUriString() - this.launchDir = metadata.launchDir.toUriString() - this.workDir = metadata.workDir.toUriString() final dateCreated = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(metadata.start) final authors = (manifest.author ?: '').tokenize(',')*.trim() @@ -181,13 +132,16 @@ class BcoRenderer implements Renderer { // append git repository info if( metadata.repository ) { final extension_domain = bco.extension_domain as List + final scriptFile = metadata.scriptFile.toUriString() + final projectDir = metadata.projectDir.toUriString() + extension_domain << [ "extension_schema": "https://w3id.org/biocompute/extension_domain/1.1.0/scm/scm_extension.json", "scm_extension": [ "scm_repository": metadata.repository, "scm_type": "git", "scm_commit": metadata.commitId, - "scm_path": metadata.scriptFile.toUriString().replace(projectDir + '/', ''), + "scm_path": scriptFile.replace(projectDir + '/', ''), "scm_preview": normalizePath(metadata.scriptFile) ] ] diff --git a/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy new file mode 100644 index 0000000..16c541e --- /dev/null +++ b/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy @@ -0,0 +1,273 @@ +/* + * Copyright 2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.prov + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.transform.TupleConstructor +import nextflow.Session +import nextflow.processor.TaskRun +import nextflow.script.WorkflowMetadata +import nextflow.util.StringUtils + +/** + * Renderer for the task graph format. + * + * @author Ben Sherman + */ +@CompileStatic +class DagRenderer implements Renderer { + + @Delegate + private PathNormalizer normalizer + + @Override + void render(Session session, Set tasks, Map workflowOutputs, Path path) { + // get workflow metadata + final metadata = session.workflowMetadata + this.normalizer = new PathNormalizer(metadata) + + // get workflow inputs + final taskLookup = ProvHelper.getTaskLookup(tasks) + final workflowInputs = ProvHelper.getWorkflowInputs(tasks, taskLookup) + + // construct task graph + final dag = new Dag(getVertices(tasks), taskLookup) + + path.text = renderHtml(dag) + } + + private Map getVertices(Set tasks) { + def result = [:] + for( def task : tasks ) { + final inputs = task.getInputFilesMap() + final outputs = ProvHelper.getTaskOutputs(task) + + result[task] = new Vertex(result.size(), task.name, inputs, outputs) + } + + return result + } + + /** + * Render the task graph as a Mermaid diagram embedded + * in an HTML document. + * + * @param dag + */ + private String renderHtml(Dag dag) { + // load html template + final writer = new StringWriter() + final res = DagRenderer.class.getResourceAsStream('mermaid.dag.template.html') + int ch + while( (ch=res.read()) != -1 ) { + writer.append(ch as char) + } + final template = writer.toString() + + // render html document + final mmd = renderDiagram(dag) + return template.replace('REPLACE_WITH_NETWORK_DATA', mmd) + } + + /** + * Render the task graph as a Mermaid diagram. + * + * @param dag + */ + private String renderDiagram(Dag dag) { + // construct task tree + final taskTree = getTaskTree(dag.vertices) + + // render diagram + def lines = [] as List + lines << "flowchart TD" + + // render workflow inputs + final inputs = [:] as Map + + lines << " subgraph \" \"" + + dag.vertices.each { task, vertex -> + vertex.inputs.each { name, path -> + if( dag.getProducerVertex(path) || path in inputs ) + return + + inputs[path] = "in${inputs.size()}".toString() + lines << " ${inputs[path]}[\"${path.name}\"]".toString() + + // add hyperlink if path is remote URL + final sourcePath = normalizePath(path) + if( isRemotePath(sourcePath) ) + lines << " click ${inputs[path]} href \"${sourcePath}\" _blank".toString() + } + } + + lines << " end" + + // render tasks + renderTaskTree(lines, null, taskTree) + + // render task inputs + final taskOutputs = [] as Set + + dag.vertices.each { task, vertex -> + vertex.inputs.each { name, path -> + // render task input from predecessor task + final pred = dag.getProducerVertex(path) + if( pred != null ) { + lines << " ${pred.name} -->|${path.name}| ${vertex.name}".toString() + taskOutputs << path + } + + // render task input from workflow input + else { + lines << " ${inputs[path]} --> ${vertex.name}".toString() + } + } + } + + // render task outputs + final outputs = [:] as Map + + dag.vertices.each { task, vertex -> + vertex.outputs.each { path -> + if( path in taskOutputs || path in outputs ) + return + + outputs[path] = "out${outputs.size()}".toString() + lines << " ${vertex.name} --> ${outputs[path]}".toString() + } + } + + // render workflow outputs + lines << " subgraph \" \"" + + outputs.each { path, label -> + lines << " ${label}[${path.name}]".toString() + } + + lines << " end" + + return lines.join('\n') + } + + /** + * Construct a task tree with a subgraph for each subworkflow. + * + * @param vertices + */ + private Map getTaskTree(Map vertices) { + def taskTree = [:] + + for( def entry : vertices ) { + def task = entry.key + def vertex = entry.value + + // infer subgraph keys from fully qualified process name + final result = getSubgraphKeys(task.processor.name) + final keys = (List)result[0] + + // update vertex label + final hash = task.hash.toString() + vertex.label = task.name.replace(task.processor.name, (String)result[1]) + + // navigate to given subgraph + def subgraph = taskTree + for( def key : keys ) { + if( key !in subgraph ) + subgraph[key] = [:] + subgraph = subgraph[key] + } + + // add vertex to tree + subgraph[vertex.name] = vertex + } + + return taskTree + } + + /** + * Get the subgraph keys from a fully qualified process name. + * + * @param name + */ + private List getSubgraphKeys(String name) { + final tokens = name.tokenize(':') + return [ + tokens[0.. lines, String name, Map taskTree) { + if( name ) + lines << " subgraph ${name}".toString() + + taskTree.each { key, value -> + if( value instanceof Map ) + renderTaskTree(lines, key, value) + else if( value instanceof Vertex ) + lines << " ${renderTask(value)}".toString() + } + + if( name ) + lines << " end" + } + + private String renderTask(Vertex vertex) { + return "${vertex.name}([\"${vertex.label}\"])" + } + + @TupleConstructor + static class Dag { + Map vertices + Map taskLookup + + Vertex getProducerVertex(Path path) { + vertices[taskLookup[path]] + } + } + + @TupleConstructor + static class Vertex { + int index + String label + Map inputs + List outputs + + String getName() { "t${index}" } + + @Override + String toString() { label } + } + +} diff --git a/plugins/nf-prov/src/main/nextflow/prov/PathNormalizer.groovy b/plugins/nf-prov/src/main/nextflow/prov/PathNormalizer.groovy new file mode 100644 index 0000000..aa3186e --- /dev/null +++ b/plugins/nf-prov/src/main/nextflow/prov/PathNormalizer.groovy @@ -0,0 +1,89 @@ +/* + * Copyright 2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.prov + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import nextflow.script.WorkflowMetadata + +/** + * + * @author Ben Sherman + */ +@CompileStatic +class PathNormalizer { + + private URL repository + + private String commitId + + private String launchDir + + private String projectDir + + private String workDir + + PathNormalizer(WorkflowMetadata metadata) { + repository = metadata.repository ? new URL(metadata.repository) : null + commitId = metadata.commitId + projectDir = metadata.projectDir.toUriString() + launchDir = metadata.launchDir.toUriString() + workDir = metadata.workDir.toUriString() + } + + /** + * Normalize paths so that local absolute paths become + * relative paths, and local paths derived from remote URLs + * become the URLs. + * + * @param path + */ + String normalizePath(Path path) { + normalizePath(path.toUriString()) + } + + String normalizePath(String path) { + // replace work directory with relative path + if( path.startsWith(workDir) ) + return path.replace(workDir, 'work') + + // replace project directory with source URL (if applicable) + if( repository && path.startsWith(projectDir) ) + return getProjectSourceUrl(path) + + // replace launch directory with relative path + if( path.startsWith(launchDir) ) + return path.replace(launchDir + '/', '') + + return path + } + + /** + * Get the source URL for a project asset. + * + * @param path + */ + private String getProjectSourceUrl(String path) { + // TODO: add other git providers + if( repository.host == 'github.com' ) + return path.replace(projectDir, "${repository}/tree/${commitId}") + else + return path + } + +} diff --git a/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy b/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy index 1d446ac..59390c8 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy @@ -42,7 +42,7 @@ class ProvObserver implements TraceObserver { public static final String DEF_FILE_NAME = 'manifest.json' - public static final List VALID_FORMATS = ['legacy', 'bco'] + public static final List VALID_FORMATS = ['bco', 'dag', 'legacy'] private Session session @@ -56,7 +56,7 @@ class ProvObserver implements TraceObserver { private Set tasks = [] - private Map publishedOutputs = [:] + private Map workflowOutputs = [:] ProvObserver(Path path, String format, Boolean overwrite, List patterns) { this.path = path @@ -68,12 +68,15 @@ class ProvObserver implements TraceObserver { } private Renderer createRenderer(String format) { - if( format == 'legacy' ) - return new LegacyRenderer() - if( format == 'bco' ) return new BcoRenderer() + if( format == 'dag' ) + return new DagRenderer() + + if( format == 'legacy' ) + return new LegacyRenderer() + throw new IllegalArgumentException("Invalid provenance format -- valid formats are ${VALID_FORMATS.join(', ')}") } @@ -115,7 +118,7 @@ class ProvObserver implements TraceObserver { if( !match ) return - publishedOutputs[source] = destination + workflowOutputs[source] = destination } @Override @@ -123,7 +126,7 @@ class ProvObserver implements TraceObserver { if( !session.isSuccess() ) return - renderer.render(session, tasks, publishedOutputs, path) + renderer.render(session, tasks, workflowOutputs, path) } } diff --git a/plugins/nf-prov/src/resources/nextflow/prov/mermaid.dag.template.html b/plugins/nf-prov/src/resources/nextflow/prov/mermaid.dag.template.html new file mode 100644 index 0000000..2c2a22c --- /dev/null +++ b/plugins/nf-prov/src/resources/nextflow/prov/mermaid.dag.template.html @@ -0,0 +1,29 @@ + + + + + + +
+REPLACE_WITH_NETWORK_DATA
+
+ + +