From ff13382afa7d19c2daca8c998559aef9dbecc9f1 Mon Sep 17 00:00:00 2001 From: Brian Reilly Date: Fri, 14 Jan 2022 17:40:48 +0000 Subject: [PATCH 1/9] Update cromwell version from 74 to 75 --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index 929813bc892..14dd94c59b2 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -5,7 +5,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "74" + val cromwellVersion = "75" /** * Returns true if this project should be considered a snapshot. From 07fe047a2cdc800e5233c3715fe5085d3a92ed96 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 18 Jan 2022 11:36:36 -0500 Subject: [PATCH 2/9] Homebrew publishing fixes [CROM-6857] [CROM-6868] (#6652) --- publish/publish_workflow.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/publish/publish_workflow.wdl b/publish/publish_workflow.wdl index caf94ae1292..4d927c64a63 100644 --- a/publish/publish_workflow.wdl +++ b/publish/publish_workflow.wdl @@ -460,12 +460,17 @@ task releaseHomebrew { doc: "https://docs.brew.sh/How-To-Open-a-Homebrew-Pull-Request" } - # 'brew bump-formula-pr' seems very promising and could simplify a lot of this, however it's unclear if it would be - # able to update the womtool version too. + # 'brew bump-formula-pr' seems very promising and could simplify a lot of this, however it doesn't seem to update + # the womtool version which causes the Homebrew CI checks to fail. command <<< # Do not use `set -x` or it will print the GitHub token! set -euo pipefail + # Homebrew no longer lets the `root` user run its scripts. The `linuxbrew` user is already set up in the default + # `broadinstitute/cromwell-publish:latest` image used for this task so `su linuxbrew` to run with that identity + # instead. Despite the name there doesn't appear to be anything special about this user, it just isn't root. + su linuxbrew + echo 'Setup Git' /cromwell-publish/git-setup.sh \ --tokenFile '~{githubTokenFile}' \ @@ -577,6 +582,7 @@ task releaseHomebrew { sed \ -i \ -e '/guidelines for contributing/s/\[[[:space:]]\]/[x]/' \ + -e '/ensured that your commits follow/s/\[[[:space:]]\]/[x]/' \ -e "/checked that there aren't other open/s/\[[[:space:]]\]/[x]/" \ -e "/built your formula locally/s/\[[[:space:]]\]/[x]/" \ -e "/your test running fine/s/\[[[:space:]]\]/[x]/" \ From 0b4783825adb07d3a5034ed1cbceb046837f8319 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 21 Jan 2022 14:26:18 -0500 Subject: [PATCH 3/9] New v2beta backend status AwaitingCloudQuota [BW-1012] (#6655) --- CHANGELOG.md | 26 ++ .../pipelines/common/api/RunStatus.scala | 11 +- .../v2beta/api/Deserialization.scala | 3 + .../api/request/GetRequestHandler.scala | 28 +- .../api/request/GetRequestHandlerSpec.scala | 363 ++++++++++++++++++ 5 files changed, 420 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 375654a328b..c3301af2561 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Cromwell Change Log +## 75 Release Notes + +### New `AwaitingCloudQuota` backend status + +For Cloud Life Sciences v2beta only. + +When a user's GCP project reaches a quota limit, Cromwell continues to submit jobs and Life Sciences acknowledges them as created even if the physical VM cannot yet start. Cromwell now detects this condition in the backend and reports `AwaitingCloudQuota`. + +The status is informational and does not require any action. Users wishing to maximize throughput can use `AwaitingCloudQuota` as an indication they should check quota in Cloud Console and request a quota increase from GCP. + +`AwaitingCloudQuota` will appear between the `Initializing` and `Running` backend statuses, and will be skipped if not applicable. + +Now: + +| Status in metadata |Quota normal| Quota delay | Status meaning | +|--------------------|----|----------------------|---------------------------------------------------| +| `executionStatus` |`Running`| `Running` | Job state Cromwell is requesting from the backend | +| `backendStatus` |`Running`| `AwaitingCloudQuota` | Job state reported by backend | + +Previously: + +| Status in metadata |Quota normal|Quota delay| Status meaning | +|--------------------|----|----|-----------------------------------------------------------| +| `executionStatus` |`Running`|`Running`| Job state Cromwell is requesting from the backend | +| `backendStatus` |`Running`|`Running`| Job state reported by backend | + ## 73 Release Notes ### Workflow Restart Performance Improvements diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/api/RunStatus.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/api/RunStatus.scala index 77a10a7b40d..51064deff21 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/api/RunStatus.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/api/RunStatus.scala @@ -6,18 +6,11 @@ import cromwell.core.ExecutionEvent import scala.util.Try -sealed trait RunStatus { - import RunStatus._ - - // Could be defined as false for Initializing and true otherwise, but this is more defensive. - def isRunningOrComplete = this match { - case Running | _: TerminalRunStatus => true - case _ => false - } -} +sealed trait RunStatus object RunStatus { case object Initializing extends RunStatus + case object AwaitingCloudQuota extends RunStatus case object Running extends RunStatus sealed trait TerminalRunStatus extends RunStatus { diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Deserialization.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Deserialization.scala index 88ff3a298b4..f8c7c779c04 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Deserialization.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Deserialization.scala @@ -28,6 +28,9 @@ private [api] object Deserialization { implicit class OperationDeserialization(val operation: Operation) extends AnyVal { /** * Deserializes the events to com.google.api.services.genomics.v2beta.model.Event + * + * There could also be entries of `com.google.api.services.lifesciences.v2beta.model.DelayedEvent` + * They are effectively upcast to regular `Event` even though they're unrelated types */ def events: ErrorOr[List[Event]] = { val eventsErrorOrOption = for { diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandler.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandler.scala index 7f989423e3c..320cc89b1f9 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandler.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandler.scala @@ -10,7 +10,7 @@ import common.validation.Validation._ import cromwell.backend.google.pipelines.common.action.ActionLabels._ import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestManager._ import cromwell.backend.google.pipelines.common.api.RunStatus -import cromwell.backend.google.pipelines.common.api.RunStatus.{Initializing, Running, Success, UnsuccessfulRunStatus} +import cromwell.backend.google.pipelines.common.api.RunStatus.{AwaitingCloudQuota, Initializing, Running, Success, UnsuccessfulRunStatus} import cromwell.backend.google.pipelines.v2beta.PipelinesConversions._ import cromwell.backend.google.pipelines.v2beta.api.Deserialization._ import cromwell.backend.google.pipelines.v2beta.api.request.ErrorReporter._ @@ -53,10 +53,10 @@ trait GetRequestHandler { this: RequestHandler => UnsuccessfulRunStatus(Status.UNKNOWN, Option(errorMessage), Nil, None, None, None, wasPreemptible = false) } else { try { + val events: List[Event] = operation.events.fallBackTo(List.empty)(pollingRequest.workflowId -> operation) if (operation.getDone) { val metadata = Try(operation.getMetadata.asScala.toMap).getOrElse(Map[String, AnyRef]()) // Deserialize the response - val events: List[Event] = operation.events.fallBackTo(List.empty)(pollingRequest.workflowId -> operation) val pipeline: Option[Pipeline] = operation.pipeline.flatMap( _.toErrorOr.fallBack(pollingRequest.workflowId -> operation) ) @@ -108,6 +108,8 @@ trait GetRequestHandler { this: RequestHandler => errorReporter.toUnsuccessfulRunStatus(error, events) case None => Success(executionEvents, machineType, zone, instanceName) } + } else if (isQuotaDelayed(events)) { + AwaitingCloudQuota } else if (operation.hasStarted) { Running } else { @@ -164,4 +166,26 @@ trait GetRequestHandler { this: RequestHandler => starterEvent.toList ++ filteredExecutionEvents ++ completionEvent } + + // Future enhancement: parse as `com.google.api.services.lifesciences.v2beta.model.DelayedEvent` instead of + // generic `Event` and take advantage of `getMetrics` which has a string array of problem quota(s): + // "metrics": [ + // "CPUS" + // ] + private def isQuotaDelayed(events: List[Event]): Boolean = { + events.sortBy(_.getTimestamp).reverse.headOption match { + case Some(event) => + quotaMessages.exists(event.getDescription.contains) + case None => + // If the events list is empty, we're not waiting for quota yet + false + } + } + + private val quotaMessages = List( + "A resource limit has delayed the operation", + "usage too high", + "no available zones", + "resource_exhausted" + ) } diff --git a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandlerSpec.scala b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandlerSpec.scala index f8cbacfb67e..802a7b68106 100644 --- a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandlerSpec.scala +++ b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/request/GetRequestHandlerSpec.scala @@ -291,6 +291,369 @@ class GetRequestHandlerSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma None, None ) + ), + // As of 2022-01 the zone `us-west3` in `broad-dsde-cromwell-dev` has its CPU quota purposely de-rated to 1 for testing + ("check that a job is AwaitingCloudQuota if its most recent event is quota exhaustion", + """{ + | "metadata": { + | "@type": "type.googleapis.com/google.cloud.lifesciences.v2beta.Metadata", + | "createTime": "2022-01-19T21:53:55.138960Z", + | "events": [ + | { + | "delayed": { + | "cause": "generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "metrics": [ + | "CPUS" + | ] + | }, + | "description": "A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "timestamp": "2022-01-19T21:54:07.717679160Z" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-ac888b4e-2e6b-4dcc-a537-3c6db7764037", + | "wdl-task-name": "sleep" + | }, + | "pipeline": { + | "actions": [ + | { + | "commands": [ + | "-c", + | "printf '%s %s\\n' \"$(date -u '+%Y/%m/%d %H:%M:%S')\" Starting\\ container\\ setup." + | ], + | "entrypoint": "/bin/sh", + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine", + | "labels": { + | "logging": "ContainerSetup" + | }, + | "timeout": "300s" + | } + | ], + | "environment": { + | "MEM_SIZE": "2.0", + | "MEM_UNIT": "GB" + | }, + | "resources": { + | "virtualMachine": { + | "bootDiskSizeGb": 12, + | "bootImage": "projects/cos-cloud/global/images/family/cos-stable", + | "disks": [ + | { + | "name": "local-disk", + | "sizeGb": 10, + | "type": "pd-ssd" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-ac888b4e-2e6b-4dcc-a537-3c6db7764037", + | "goog-pipelines-worker": "true", + | "wdl-task-name": "sleep" + | }, + | "machineType": "custom-1-2048", + | "network": {}, + | "nvidiaDriverVersion": "450.51.06", + | "serviceAccount": { + | "email": "centaur@broad-dsde-cromwell-dev.iam.gserviceaccount.com", + | "scopes": [ + | "https://www.googleapis.com/auth/compute", + | "https://www.googleapis.com/auth/devstorage.full_control", + | "https://www.googleapis.com/auth/cloudkms", + | "https://www.googleapis.com/auth/userinfo.email", + | "https://www.googleapis.com/auth/userinfo.profile", + | "https://www.googleapis.com/auth/monitoring.write", + | "https://www.googleapis.com/auth/bigquery", + | "https://www.googleapis.com/auth/cloud-platform" + | ] + | }, + | "volumes": [ + | { + | "persistentDisk": { + | "sizeGb": 10, + | "type": "pd-ssd" + | }, + | "volume": "local-disk" + | } + | ] + | }, + | "zones": [ + | "us-west3-a", + | "us-west3-b", + | "us-west3-c" + | ] + | }, + | "timeout": "604800s" + | } + | }, + | "name": "projects/1005074806481/locations/us-central1/operations/3874882033889365536" + |}""".stripMargin, + AwaitingCloudQuota + ), + ("check that a job is Running and no longer AwaitingCloudQuota once a worker assigns", + """{ + | "metadata": { + | "@type": "type.googleapis.com/google.cloud.lifesciences.v2beta.Metadata", + | "createTime": "2022-01-19T21:53:55.138960Z", + | "events": [ + | { + | "description": "Started pulling \"gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine\"", + | "pullStarted": { + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine" + | }, + | "timestamp": "2022-01-19T22:09:55.410251187Z" + | }, + | { + | "description": "Worker \"google-pipelines-worker-e6c8bf8035860b2cd69488497bd602d8\" assigned in \"us-west3-c\" on a \"custom-1-2048\" machine", + | "timestamp": "2022-01-19T22:09:20.363771714Z", + | "workerAssigned": { + | "instance": "google-pipelines-worker-e6c8bf8035860b2cd69488497bd602d8", + | "machineType": "custom-1-2048", + | "zone": "us-west3-c" + | } + | }, + | { + | "delayed": { + | "cause": "generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "metrics": [ + | "CPUS" + | ] + | }, + | "description": "A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "timestamp": "2022-01-19T21:54:07.717679160Z" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-ac888b4e-2e6b-4dcc-a537-3c6db7764037", + | "wdl-task-name": "sleep" + | }, + | "pipeline": { + | "actions": [ + | { + | "commands": [ + | "-c", + | "printf '%s %s\\n' \"$(date -u '+%Y/%m/%d %H:%M:%S')\" Starting\\ container\\ setup." + | ], + | "entrypoint": "/bin/sh", + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine", + | "labels": { + | "logging": "ContainerSetup" + | }, + | "timeout": "300s" + | } + | ], + | "environment": { + | "MEM_SIZE": "2.0", + | "MEM_UNIT": "GB" + | }, + | "resources": { + | "virtualMachine": { + | "bootDiskSizeGb": 12, + | "bootImage": "projects/cos-cloud/global/images/family/cos-stable", + | "disks": [ + | { + | "name": "local-disk", + | "sizeGb": 10, + | "type": "pd-ssd" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-ac888b4e-2e6b-4dcc-a537-3c6db7764037", + | "goog-pipelines-worker": "true", + | "wdl-task-name": "sleep" + | }, + | "machineType": "custom-1-2048", + | "network": {}, + | "nvidiaDriverVersion": "450.51.06", + | "serviceAccount": { + | "email": "centaur@broad-dsde-cromwell-dev.iam.gserviceaccount.com", + | "scopes": [ + | "https://www.googleapis.com/auth/compute", + | "https://www.googleapis.com/auth/devstorage.full_control", + | "https://www.googleapis.com/auth/cloudkms", + | "https://www.googleapis.com/auth/userinfo.email", + | "https://www.googleapis.com/auth/userinfo.profile", + | "https://www.googleapis.com/auth/monitoring.write", + | "https://www.googleapis.com/auth/bigquery", + | "https://www.googleapis.com/auth/cloud-platform" + | ] + | }, + | "volumes": [ + | { + | "persistentDisk": { + | "sizeGb": 10, + | "type": "pd-ssd" + | }, + | "volume": "local-disk" + | } + | ] + | }, + | "zones": [ + | "us-west3-a", + | "us-west3-b", + | "us-west3-c" + | ] + | }, + | "timeout": "604800s" + | }, + | "startTime": "2022-01-19T22:09:20.363771714Z" + | }, + | "name": "projects/1005074806481/locations/us-central1/operations/3874882033889365536" + |} + | + | + |""".stripMargin, + Running + ), + ("check that a job is no longer AwaitingCloudQuota once it finishes", + """{ + | "done": true, + | "metadata": { + | "@type": "type.googleapis.com/google.cloud.lifesciences.v2beta.Metadata", + | "createTime": "2022-01-19T19:17:13.175579Z", + | "endTime": "2022-01-19T19:37:22.764120036Z", + | "events": [ + | { + | "description": "Worker released", + | "timestamp": "2022-01-19T19:37:22.764120036Z", + | "workerReleased": { + | "instance": "google-pipelines-worker-8eff543e6858c204c8f67520aee75432", + | "zone": "us-west3-c" + | } + | }, + | { + | "containerStopped": { + | "actionId": 19 + | }, + | "description": "Stopped running shortened for test", + | "timestamp": "2022-01-19T19:37:19.822873814Z" + | }, + | { + | "description": "Started pulling \"gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine\"", + | "pullStarted": { + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine" + | }, + | "timestamp": "2022-01-19T19:32:55.709674372Z" + | }, + | { + | "description": "Worker \"google-pipelines-worker-8eff543e6858c204c8f67520aee75432\" assigned in \"us-west3-c\" on a \"custom-1-2048\" machine", + | "timestamp": "2022-01-19T19:32:19.204055448Z", + | "workerAssigned": { + | "instance": "google-pipelines-worker-8eff543e6858c204c8f67520aee75432", + | "machineType": "custom-1-2048", + | "zone": "us-west3-c" + | } + | }, + | { + | "delayed": { + | "cause": "generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "metrics": [ + | "CPUS" + | ] + | }, + | "description": "A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", + | "timestamp": "2022-01-19T19:17:14.948193837Z" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-058bff35-4a55-4c0f-9113-0885f4119cd9", + | "wdl-task-name": "sleep" + | }, + | "pipeline": { + | "actions": [ + | { + | "commands": [ + | "-c", + | "printf '%s %s\\n' \"$(date -u '+%Y/%m/%d %H:%M:%S')\" Starting\\ container\\ setup." + | ], + | "entrypoint": "/bin/sh", + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine", + | "labels": { + | "logging": "ContainerSetup" + | }, + | "timeout": "300s" + | }, + | { + | "alwaysRun": true, + | "commands": [ + | "-c", + | "python3 -c 'import base64; shortened for test" + | ], + | "entrypoint": "/bin/sh", + | "imageUri": "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine", + | "labels": { + | "tag": "Delocalization" + | } + | } + | ], + | "environment": { + | "MEM_SIZE": "2.0", + | "MEM_UNIT": "GB" + | }, + | "resources": { + | "virtualMachine": { + | "bootDiskSizeGb": 12, + | "bootImage": "projects/cos-cloud/global/images/family/cos-stable", + | "disks": [ + | { + | "name": "local-disk", + | "sizeGb": 10, + | "type": "pd-ssd" + | } + | ], + | "labels": { + | "cromwell-workflow-id": "cromwell-058bff35-4a55-4c0f-9113-0885f4119cd9", + | "goog-pipelines-worker": "true", + | "wdl-task-name": "sleep" + | }, + | "machineType": "custom-1-2048", + | "network": {}, + | "nvidiaDriverVersion": "450.51.06", + | "serviceAccount": { + | "email": "centaur@broad-dsde-cromwell-dev.iam.gserviceaccount.com", + | "scopes": [ + | "https://www.googleapis.com/auth/compute", + | "https://www.googleapis.com/auth/devstorage.full_control", + | "https://www.googleapis.com/auth/cloudkms", + | "https://www.googleapis.com/auth/userinfo.email", + | "https://www.googleapis.com/auth/userinfo.profile", + | "https://www.googleapis.com/auth/monitoring.write", + | "https://www.googleapis.com/auth/bigquery", + | "https://www.googleapis.com/auth/cloud-platform" + | ] + | }, + | "volumes": [ + | { + | "persistentDisk": { + | "sizeGb": 10, + | "type": "pd-ssd" + | }, + | "volume": "local-disk" + | } + | ] + | }, + | "zones": [ + | "us-west3-a", + | "us-west3-b", + | "us-west3-c" + | ] + | }, + | "timeout": "604800s" + | }, + | "startTime": "2022-01-19T19:32:19.204055448Z" + | }, + | "name": "projects/1005074806481/locations/us-central1/operations/5001350794958839237", + | "response": { + | "@type": "type.googleapis.com/cloud.lifesciences.pipelines.RunPipelineResponse" + | } + |}""".stripMargin, + Success(List( + new ExecutionEvent("waiting for quota", OffsetDateTime.parse("2022-01-19T19:17:13.175579Z"), None), + new ExecutionEvent("Worker released", OffsetDateTime.parse("2022-01-19T19:37:22.764120036Z"), None), + new ExecutionEvent("Stopped running shortened for test", OffsetDateTime.parse("2022-01-19T19:37:19.822873814Z"), None), + new ExecutionEvent("Started pulling \"gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine\"", OffsetDateTime.parse("2022-01-19T19:32:55.709674372Z"), Option("Pulling \"gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine\"")), + new ExecutionEvent("Worker \"google-pipelines-worker-8eff543e6858c204c8f67520aee75432\" assigned in \"us-west3-c\" on a \"custom-1-2048\" machine", OffsetDateTime.parse("2022-01-19T19:32:19.204055448Z"), None), + new ExecutionEvent("A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: no available zones: us-west3: 1 CPUS (0/1 available) usage too high", OffsetDateTime.parse("2022-01-19T19:17:14.948193837Z"), None), + new ExecutionEvent("Complete in GCE / Cromwell Poll Interval", OffsetDateTime.parse("2022-01-19T19:37:22.764120036Z"), None) + ), Option("custom-1-2048"), Option("us-west3-c"), Option("google-pipelines-worker-8eff543e6858c204c8f67520aee75432")) ) ) From 42cce67cf53def59d4da7eb6e1a923ea0bec6575 Mon Sep 17 00:00:00 2001 From: Katrina P <68349264+kpierre13@users.noreply.github.com> Date: Tue, 25 Jan 2022 17:31:46 -0500 Subject: [PATCH 4/9] [BW-1004]_KP - Metadata Statistics processEvents could be renamed (#6656) * KP_BW-1004 - Metadata Statistics processEvents could be renamed * Changing additional methods. * Changing usage for new method. * Changing usage for new method. --- .../impl/MetadataStatisticsRecorder.scala | 6 +-- .../metadata/impl/WriteMetadataActor.scala | 2 +- .../MetadataStatisticsRecorderSpec.scala | 40 +++++++++---------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataStatisticsRecorder.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataStatisticsRecorder.scala index c797e4f1f4e..5e9316ac89f 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataStatisticsRecorder.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataStatisticsRecorder.scala @@ -49,11 +49,11 @@ object MetadataStatisticsRecorder { } sealed trait MetadataStatisticsRecorder { - def processEvents(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] + def processEventsAndGenerateAlerts(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] } final class NoopMetadataStatisticsRecorder extends MetadataStatisticsRecorder { - def processEvents(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] = Vector.empty + def processEventsAndGenerateAlerts(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] = Vector.empty } final class ActiveMetadataStatisticsRecorder(workflowCacheSize: Long = 100000L, // 100,000 @@ -69,7 +69,7 @@ final class ActiveMetadataStatisticsRecorder(workflowCacheSize: Long = 100000L, def writeStatisticsLoader(workflowId: WorkflowId): Callable[WorkflowMetadataWriteStatistics] = () => WorkflowMetadataWriteStatistics(workflowId, 0L, 0L, None) - def processEvents(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] = { + def processEventsAndGenerateAlerts(putEvents: Iterable[MetadataEvent]): Vector[HeavyMetadataAlert] = { putEvents.groupBy(_.key.workflowId).toVector.flatMap { case (id, list) => processEventsForWorkflow(id, list)} } diff --git a/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala index 9749fd2a8bc..46af4a28ee7 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala @@ -39,7 +39,7 @@ class WriteMetadataActor(override val batchSize: Int, val allPutEvents: Iterable[MetadataEvent] = putWithoutResponse ++ putWithResponse.flatMap(_._1) val dbAction = addMetadataEvents(allPutEvents) - statsRecorder.processEvents(allPutEvents) foreach(a => log.warning(s"${a.workflowId} has logged a heavy amount of metadata (${a.count} rows)")) + statsRecorder.processEventsAndGenerateAlerts(allPutEvents) foreach(a => log.warning(s"${a.workflowId} has logged a heavy amount of metadata (${a.count} rows)")) dbAction onComplete { case Success(_) => diff --git a/services/src/test/scala/cromwell/services/metadata/MetadataStatisticsRecorderSpec.scala b/services/src/test/scala/cromwell/services/metadata/MetadataStatisticsRecorderSpec.scala index a342cd1c114..2096e3f9553 100644 --- a/services/src/test/scala/cromwell/services/metadata/MetadataStatisticsRecorderSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/MetadataStatisticsRecorderSpec.scala @@ -27,8 +27,8 @@ class MetadataStatisticsRecorderSpec extends AnyFlatSpec with Matchers { val workflowId = WorkflowId(UUID.randomUUID()) (1 to 10) foreach { i => - recorder.processEvents(9 of uninterestingWriteEvent(workflowId)) should be(Vector.empty) - recorder.processEvents(1 of uninterestingWriteEvent(workflowId)) should be(Vector(HeavyMetadataAlert(workflowId, 10L * i))) + recorder.processEventsAndGenerateAlerts(9 of uninterestingWriteEvent(workflowId)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(1 of uninterestingWriteEvent(workflowId)) should be(Vector(HeavyMetadataAlert(workflowId, 10L * i))) () } } @@ -42,7 +42,7 @@ class MetadataStatisticsRecorderSpec extends AnyFlatSpec with Matchers { val bigMetadataDump = Math.abs(Random.nextInt(101)) + 10 val expectedCountAfterProcessing = runningCounter + bigMetadataDump - recorder.processEvents(bigMetadataDump of uninterestingWriteEvent(workflowId)) should be(Vector(HeavyMetadataAlert(workflowId, expectedCountAfterProcessing))) + recorder.processEventsAndGenerateAlerts(bigMetadataDump of uninterestingWriteEvent(workflowId)) should be(Vector(HeavyMetadataAlert(workflowId, expectedCountAfterProcessing))) runningCounter = expectedCountAfterProcessing () } @@ -54,25 +54,25 @@ class MetadataStatisticsRecorderSpec extends AnyFlatSpec with Matchers { val workflowId2 = WorkflowId(UUID.randomUUID()) val workflowId3 = WorkflowId(UUID.randomUUID()) - recorder.processEvents( + recorder.processEventsAndGenerateAlerts( (3 of uninterestingWriteEvent(workflowId1)) ++ (5 of uninterestingWriteEvent(workflowId2)) ++ (7 of uninterestingWriteEvent(workflowId3)) ) should be(Vector.empty) - recorder.processEvents( + recorder.processEventsAndGenerateAlerts( (3 of uninterestingWriteEvent(workflowId1)) ++ (5 of uninterestingWriteEvent(workflowId2)) ++ (7 of uninterestingWriteEvent(workflowId3)) ).toSet should be(Set(HeavyMetadataAlert(workflowId2, 10), HeavyMetadataAlert(workflowId3, 14))) - recorder.processEvents( + recorder.processEventsAndGenerateAlerts( (3 of uninterestingWriteEvent(workflowId1)) ++ (5 of uninterestingWriteEvent(workflowId2)) ++ (7 of uninterestingWriteEvent(workflowId3)) ) should be(Vector.empty) - recorder.processEvents( + recorder.processEventsAndGenerateAlerts( (3 of uninterestingWriteEvent(workflowId1)) ++ (5 of uninterestingWriteEvent(workflowId2)) ++ (7 of uninterestingWriteEvent(workflowId3)) @@ -89,41 +89,41 @@ class MetadataStatisticsRecorderSpec extends AnyFlatSpec with Matchers { // Recording SW1 parentage adds 2 events against subWorkflowId1 (and thus rootWorkflowId) withClue(recorder.statusString()) { - recorder.processEvents(parentNotificationEvent(rootWorkflowId, rootWorkflowId, subWorkflow1Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(parentNotificationEvent(rootWorkflowId, rootWorkflowId, subWorkflow1Id)) should be(Vector.empty) } // Recording SW2 parentage adds 2 events against subWorkflowId2 (and thus subWorkflow1Id and thus rootWorkflowId) withClue(recorder.statusString()) { - recorder.processEvents(parentNotificationEvent(rootWorkflowId, subWorkflow1Id, subWorkflow2Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(parentNotificationEvent(rootWorkflowId, subWorkflow1Id, subWorkflow2Id)) should be(Vector.empty) } // To get started, add 7 events to the root subworkflow: withClue(recorder.statusString()) { - recorder.processEvents(7 of uninterestingWriteEvent(rootWorkflowId)) should be(Vector(HeavyMetadataAlert(rootWorkflowId, 11))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(rootWorkflowId)) should be(Vector(HeavyMetadataAlert(rootWorkflowId, 11))) } // Current standing: root: 11, sub1: 4, sub2: 2 withClue(recorder.statusString()) { - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 11))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 11))) } // Current standing: root: 18, sub1: 11, sub2: 2 withClue(recorder.statusString()) { - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector(HeavyMetadataAlert(rootWorkflowId, 25))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector(HeavyMetadataAlert(rootWorkflowId, 25))) } // Current standing: root: 25, sub1: 18, sub2: 9 withClue(recorder.statusString()) { - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 25))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 25))) } // Current standing: root: 32, sub1: 25, sub2: 9 withClue(recorder.statusString()) { - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow2Id)).toSet should be(Set(HeavyMetadataAlert(rootWorkflowId, 39), HeavyMetadataAlert(subWorkflow2Id, 16))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow2Id)).toSet should be(Set(HeavyMetadataAlert(rootWorkflowId, 39), HeavyMetadataAlert(subWorkflow2Id, 16))) } // Current standing: root: 39, sub1: 32, sub2: 16 @@ -135,16 +135,16 @@ class MetadataStatisticsRecorderSpec extends AnyFlatSpec with Matchers { val subWorkflow1Id = WorkflowId(UUID.randomUUID()) val subWorkflow2Id = WorkflowId(UUID.randomUUID()) - recorder.processEvents(parentNotificationEvent(rootWorkflowId, rootWorkflowId, subWorkflow1Id)) should be(Vector.empty) - recorder.processEvents(parentNotificationEvent(rootWorkflowId, subWorkflow1Id, subWorkflow2Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(parentNotificationEvent(rootWorkflowId, rootWorkflowId, subWorkflow1Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(parentNotificationEvent(rootWorkflowId, subWorkflow1Id, subWorkflow2Id)) should be(Vector.empty) // If we were accumulating these would alert, but we see nothing if not accumulating: - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector.empty) - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector.empty) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector.empty) // When we trip the limits, we should only see alerts for individual workflows. // Note: it's 16 not 14 because of the two parent notification entries above - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 16))) - recorder.processEvents(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector(HeavyMetadataAlert(subWorkflow2Id, 16))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow1Id)) should be(Vector(HeavyMetadataAlert(subWorkflow1Id, 16))) + recorder.processEventsAndGenerateAlerts(7 of uninterestingWriteEvent(subWorkflow2Id)) should be(Vector(HeavyMetadataAlert(subWorkflow2Id, 16))) } } From 929feeec61a79f88835158326a7d5734046ae463 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Wed, 26 Jan 2022 11:01:50 -0500 Subject: [PATCH 5/9] Don't retry tests that expect cache hits from specific workflows (#6657) --- .../main/resources/standardTestCases/backendWithNoDocker.test | 3 +++ .../src/main/resources/standardTestCases/cacheBetweenWf.test | 3 +++ .../call_cache_hit_prefixes_empty_hint_papi.test | 3 +++ .../standardTestCases/call_cache_hit_prefixes_no_hint.test | 3 +++ ...cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test | 3 +++ .../standardTestCases/cwl_cache_between_workflows.test | 3 +++ .../src/main/resources/standardTestCases/floating_tags.test | 3 +++ centaur/src/main/resources/standardTestCases/fofn_caching.test | 3 +++ .../resources/standardTestCases/google_artifact_registry.test | 3 +++ .../main/resources/standardTestCases/hello_private_repo.test | 3 +++ .../main/resources/standardTestCases/use_cacheCopy_dir.test | 3 +++ 11 files changed, 33 insertions(+) diff --git a/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test b/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test index 7c2ef56192f..1ae4042e321 100644 --- a/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test +++ b/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test @@ -2,6 +2,9 @@ name: backendWithNoDocker backends: [LocalNoDocker] testFormat: runtwiceexpectingcallcaching +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: backendWithNoDocker/backendWithNoDocker.wdl } diff --git a/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test b/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test index c366b0c22bd..4bed721c6f7 100644 --- a/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test +++ b/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test @@ -1,6 +1,9 @@ name: cacheBetweenWF testFormat: runtwiceexpectingcallcaching +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: cacheBetweenWF/cacheBetweenWF.wdl options: common_options/cache_read_off_write_on.options diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test index d9c7ee419ed..fad1b44ff0a 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test @@ -3,6 +3,9 @@ name: call_cache_hit_prefixes_empty_hint_papi testFormat: runtwiceexpectingcallcaching backends: [Papi] +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: call_cache_hit_prefixes/call_cache_hit_prefixes.wdl inputs: call_cache_hit_prefixes/call_cache_hit_prefixes_empty_hint.inputs diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test index cd35a78dbee..c5bd716f69b 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test @@ -2,6 +2,9 @@ name: call_cache_hit_prefixes_no_hint testFormat: runtwiceexpectingcallcaching +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: call_cache_hit_prefixes/call_cache_hit_prefixes.wdl inputs: call_cache_hit_prefixes/call_cache_hit_prefixes_no_hint.inputs diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test index a0ef536165e..e26dd8bc779 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test @@ -5,6 +5,9 @@ name: call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi testFormat: runthriceexpectingcallcaching backends: [Papi] +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: call_cache_hit_prefixes/call_cache_hit_prefixes.wdl inputs: call_cache_hit_prefixes/call_cache_hit_prefixes_two_roots_empty_hint_hit_papi.inputs diff --git a/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test b/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test index e5381017829..230e36b0bb9 100644 --- a/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test +++ b/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test @@ -5,6 +5,9 @@ workflowType: CWL workflowTypeVersion: v1.0 skipDescribeEndpointValidation: true +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: cwl_cache_between_workflows/cwl_cache_between_workflows.cwl inputs: cwl_cache_between_workflows/cwl_cache_between_workflows.json diff --git a/centaur/src/main/resources/standardTestCases/floating_tags.test b/centaur/src/main/resources/standardTestCases/floating_tags.test index fc2c077d0a2..d3313f679a8 100644 --- a/centaur/src/main/resources/standardTestCases/floating_tags.test +++ b/centaur/src/main/resources/standardTestCases/floating_tags.test @@ -1,6 +1,9 @@ name: floating_tags testFormat: runtwiceexpectingcallcaching +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: floating_tags/floating_tags.wdl options: floating_tags/floating_tags.options diff --git a/centaur/src/main/resources/standardTestCases/fofn_caching.test b/centaur/src/main/resources/standardTestCases/fofn_caching.test index 1864bce29d5..fa7a98af6f5 100644 --- a/centaur/src/main/resources/standardTestCases/fofn_caching.test +++ b/centaur/src/main/resources/standardTestCases/fofn_caching.test @@ -2,6 +2,9 @@ name: fofn_caching testFormat: runtwiceexpectingcallcaching backends: [Papi-Caching-No-Copy] +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: fofn_caching/fofn_caching.wdl } diff --git a/centaur/src/main/resources/standardTestCases/google_artifact_registry.test b/centaur/src/main/resources/standardTestCases/google_artifact_registry.test index 44d1e2a725c..458dac37217 100644 --- a/centaur/src/main/resources/standardTestCases/google_artifact_registry.test +++ b/centaur/src/main/resources/standardTestCases/google_artifact_registry.test @@ -1,6 +1,9 @@ name: google_artifact_registry testFormat: runtwiceexpectingcallcaching +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: google_artifact_registry/google_artifact_registry.wdl } diff --git a/centaur/src/main/resources/standardTestCases/hello_private_repo.test b/centaur/src/main/resources/standardTestCases/hello_private_repo.test index 00be69dd7f9..c35562cd6ea 100644 --- a/centaur/src/main/resources/standardTestCases/hello_private_repo.test +++ b/centaur/src/main/resources/standardTestCases/hello_private_repo.test @@ -2,6 +2,9 @@ name: hello_private_repo testFormat: runtwiceexpectingcallcaching backends: [LocalDockerSecure] +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: hello_private_repo/hello_private_repo.wdl inputs: hello_private_repo/hello_private_repo.inputs.json diff --git a/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test b/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test index 1b33b90fe22..7ca5b7c6425 100644 --- a/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test +++ b/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test @@ -2,6 +2,9 @@ name: use_cache_copy_dir testFormat: runtwiceexpectingcallcaching backends: [Papiv2] +# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +retryTestFailures: false + files { workflow: use_cacheCopy_dir/use_cacheCopy_dir.wdl } From 8a566c2afbd466b04508858efc4474b2de8a88d0 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Fri, 28 Jan 2022 14:51:45 -0700 Subject: [PATCH 6/9] add caveats to HTTP inputs (#6661) caveats to HTTP inputs learned the hard way --- docs/filesystems/HTTP.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/filesystems/HTTP.md b/docs/filesystems/HTTP.md index 6a254a640b5..f36be5a97a0 100644 --- a/docs/filesystems/HTTP.md +++ b/docs/filesystems/HTTP.md @@ -37,3 +37,9 @@ backend { If there is a need to turn off this `http` filesystem in the default `Local` backend the following Java property allows for this: `-Dbackend.providers.Local.config.filesystems.http.enabled=false`. + +### Caveats + +Using HTTP inputs in Cromwell can produce some unexpected behavior: +- Files specified by HTTP URIs will be renamed locally, so programs that rely on file extensions or other filenaming conventions may not function properly. +- Files located in the same remote HTTP-defined directory will not be colocated locally. This can cause problems if a program is expecting an index file (e.g. `.fai`) to appear in the same directory as the associated data file (e.g. `.fa`) without specifying the index location. From c0bb1f9feb57b1372d0d7459143fabaef5c854cd Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Wed, 2 Feb 2022 11:17:42 -0500 Subject: [PATCH 7/9] Empty optional outputs on PAPI [BT-386] (#6662) --- CHANGELOG.md | 5 +++++ .../backendWithNoDocker.test | 2 +- .../standardTestCases/cacheBetweenWf.test | 2 +- ...ll_cache_hit_prefixes_empty_hint_papi.test | 2 +- .../call_cache_hit_prefixes_no_hint.test | 2 +- ...s_two_roots_empty_hint_cache_hit_papi.test | 2 +- .../cwl_cache_between_workflows.test | 2 +- .../standardTestCases/floating_tags.test | 2 +- .../standardTestCases/fofn_caching.test | 2 +- .../google_artifact_registry.test | 2 +- .../standardTestCases/hello_private_repo.test | 2 +- .../standardTestCases/use_cacheCopy_dir.test | 2 +- .../wdl_optional_outputs_call_caching.test | 16 ++++++++++++++ .../wdl_optional_outputs_call_caching.wdl | 22 +++++++++++++++++++ ...inesApiAsyncBackendJobExecutionActor.scala | 13 +++++++++-- ...inesApiAsyncBackendJobExecutionActor.scala | 13 +++++++++-- 16 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching.test create mode 100644 centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching/wdl_optional_outputs_call_caching.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index c3301af2561..1b6568cad34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,11 @@ Previously: | `executionStatus` |`Running`|`Running`| Job state Cromwell is requesting from the backend | | `backendStatus` |`Running`|`Running`| Job state reported by backend | + +### Bug Fixes + +* Fixed a bug on Google Pipelines API backends where missing optional output files (`File?`) were not correctly detected by Cromwell and caused invalid call cache entries to be written. + ## 73 Release Notes ### Workflow Restart Performance Improvements diff --git a/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test b/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test index 1ae4042e321..493cd9988d8 100644 --- a/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test +++ b/centaur/src/main/resources/standardTestCases/backendWithNoDocker.test @@ -2,7 +2,7 @@ name: backendWithNoDocker backends: [LocalNoDocker] testFormat: runtwiceexpectingcallcaching -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test b/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test index 4bed721c6f7..53c1847c686 100644 --- a/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test +++ b/centaur/src/main/resources/standardTestCases/cacheBetweenWf.test @@ -1,7 +1,7 @@ name: cacheBetweenWF testFormat: runtwiceexpectingcallcaching -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test index fad1b44ff0a..f148f234dba 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_empty_hint_papi.test @@ -3,7 +3,7 @@ name: call_cache_hit_prefixes_empty_hint_papi testFormat: runtwiceexpectingcallcaching backends: [Papi] -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test index c5bd716f69b..e4aca42b5cf 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_no_hint.test @@ -2,7 +2,7 @@ name: call_cache_hit_prefixes_no_hint testFormat: runtwiceexpectingcallcaching -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test index e26dd8bc779..8558f7fec8a 100644 --- a/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test +++ b/centaur/src/main/resources/standardTestCases/call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi.test @@ -5,7 +5,7 @@ name: call_cache_hit_prefixes_two_roots_empty_hint_cache_hit_papi testFormat: runthriceexpectingcallcaching backends: [Papi] -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test b/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test index 230e36b0bb9..1e3b6065617 100644 --- a/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test +++ b/centaur/src/main/resources/standardTestCases/cwl_cache_between_workflows.test @@ -5,7 +5,7 @@ workflowType: CWL workflowTypeVersion: v1.0 skipDescribeEndpointValidation: true -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/floating_tags.test b/centaur/src/main/resources/standardTestCases/floating_tags.test index d3313f679a8..f4be0030b4f 100644 --- a/centaur/src/main/resources/standardTestCases/floating_tags.test +++ b/centaur/src/main/resources/standardTestCases/floating_tags.test @@ -1,7 +1,7 @@ name: floating_tags testFormat: runtwiceexpectingcallcaching -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/fofn_caching.test b/centaur/src/main/resources/standardTestCases/fofn_caching.test index fa7a98af6f5..115f3dd476b 100644 --- a/centaur/src/main/resources/standardTestCases/fofn_caching.test +++ b/centaur/src/main/resources/standardTestCases/fofn_caching.test @@ -2,7 +2,7 @@ name: fofn_caching testFormat: runtwiceexpectingcallcaching backends: [Papi-Caching-No-Copy] -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/google_artifact_registry.test b/centaur/src/main/resources/standardTestCases/google_artifact_registry.test index 458dac37217..384dede4973 100644 --- a/centaur/src/main/resources/standardTestCases/google_artifact_registry.test +++ b/centaur/src/main/resources/standardTestCases/google_artifact_registry.test @@ -1,7 +1,7 @@ name: google_artifact_registry testFormat: runtwiceexpectingcallcaching -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/hello_private_repo.test b/centaur/src/main/resources/standardTestCases/hello_private_repo.test index c35562cd6ea..5ec7aa6a46e 100644 --- a/centaur/src/main/resources/standardTestCases/hello_private_repo.test +++ b/centaur/src/main/resources/standardTestCases/hello_private_repo.test @@ -2,7 +2,7 @@ name: hello_private_repo testFormat: runtwiceexpectingcallcaching backends: [LocalDockerSecure] -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test b/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test index 7ca5b7c6425..70bfef2594f 100644 --- a/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test +++ b/centaur/src/main/resources/standardTestCases/use_cacheCopy_dir.test @@ -2,7 +2,7 @@ name: use_cache_copy_dir testFormat: runtwiceexpectingcallcaching backends: [Papiv2] -# CROM-6807 Don't retry failures, subsequent runs with fail because of unexpected cache hits from the initial run +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run retryTestFailures: false files { diff --git a/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching.test b/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching.test new file mode 100644 index 00000000000..5a982fc2f03 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching.test @@ -0,0 +1,16 @@ +name: wdl_optional_outputs_call_caching +testFormat: runtwiceexpectingcallcaching +backends: [Papiv2] + +# CROM-6807 Don't retry failures, subsequent runs will fail because of unexpected cache hits from the initial run +retryTestFailures: false + +files { + workflow: wdl_optional_outputs_call_caching/wdl_optional_outputs_call_caching.wdl +} + +metadata { + workflowName: missing_optional_output + status: Succeeded + "calls.missing_optional_output.do_and_do_not_output.callCaching.result": "Cache Hit: <>:missing_optional_output.do_and_do_not_output:-1" +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching/wdl_optional_outputs_call_caching.wdl b/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching/wdl_optional_outputs_call_caching.wdl new file mode 100644 index 00000000000..f9ac709042e --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_optional_outputs_call_caching/wdl_optional_outputs_call_caching.wdl @@ -0,0 +1,22 @@ +version 1.0 + +task do_and_do_not_output { + command <<< + touch do_output.txt + >>> + runtime { + docker: "ubuntu" + } + output { + File? do_not_output = "do_not_output.txt" + File? do_output = "do_output.txt" + } +} + +workflow missing_optional_output { + call do_and_do_not_output + output { + File? should_be_present = do_and_do_not_output.do_output + File? should_be_null = do_and_do_not_output.do_not_output + } +} diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index c2fde26973a..319ac966889 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -19,6 +19,7 @@ import wom.core.FullyQualifiedName import wom.expression.FileEvaluation import wom.values.{GlobFunctions, WomFile, WomGlobFile, WomMaybeListedDirectory, WomMaybePopulatedFile, WomSingleFile, WomUnlistedDirectory} +import java.io.FileNotFoundException import scala.concurrent.Future import scala.io.Source import scala.language.postfixOps @@ -29,7 +30,7 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe with PipelinesApiReferenceFilesMappingOperations { // The original implementation assumes the WomFiles are all WomMaybePopulatedFiles and wraps everything in a PipelinesApiFileInput - // In v2 we can differentiate files from directories + // In v2 we can differentiate files from directories override protected def pipelinesApiInputsFromWomFiles(inputName: String, remotePathArray: Seq[WomFile], localPathArray: Seq[WomFile], @@ -289,7 +290,15 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe override def womFileToGcsPath(jesOutputs: Set[PipelinesApiOutput])(womFile: WomFile): WomFile = { womFile mapFile { path => jesOutputs collectFirst { - case jesOutput if jesOutput.name == makeSafeReferenceName(path) => jesOutput.cloudPath.pathAsString + case jesOutput if jesOutput.name == makeSafeReferenceName(path) => + val pathAsString = jesOutput.cloudPath.pathAsString + if (!jesOutput.cloudPath.exists) { + // This is not an error if the path represents a `File?` optional output (the PAPI delocalization script + // should have failed if this file output was not optional but missing). Throw to produce the correct "empty + // optional" value for a missing optional file output. + throw new FileNotFoundException(s"GCS output file not found: $pathAsString") + } + pathAsString } getOrElse { GcsPathBuilder.validateGcsPath(path) match { case _: ValidFullGcsPath => path diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala index c9589ae7c1b..1188b427b3d 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -19,6 +19,7 @@ import wom.core.FullyQualifiedName import wom.expression.FileEvaluation import wom.values.{GlobFunctions, WomFile, WomGlobFile, WomMaybeListedDirectory, WomMaybePopulatedFile, WomSingleFile, WomUnlistedDirectory} +import java.io.FileNotFoundException import scala.concurrent.Future import scala.io.Source import scala.language.postfixOps @@ -29,7 +30,7 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe with PipelinesApiReferenceFilesMappingOperations { // The original implementation assumes the WomFiles are all WomMaybePopulatedFiles and wraps everything in a PipelinesApiFileInput - // In v2 we can differentiate files from directories + // In v2 we can differentiate files from directories override protected def pipelinesApiInputsFromWomFiles(inputName: String, remotePathArray: Seq[WomFile], localPathArray: Seq[WomFile], @@ -289,7 +290,15 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe override def womFileToGcsPath(jesOutputs: Set[PipelinesApiOutput])(womFile: WomFile): WomFile = { womFile mapFile { path => jesOutputs collectFirst { - case jesOutput if jesOutput.name == makeSafeReferenceName(path) => jesOutput.cloudPath.pathAsString + case jesOutput if jesOutput.name == makeSafeReferenceName(path) => + val pathAsString = jesOutput.cloudPath.pathAsString + if (!jesOutput.cloudPath.exists) { + // This is not an error if the path represents a `File?` optional output (the PAPI delocalization script + // should have failed if this file output was not optional but missing). Throw to produce the correct "empty + // optional" value for a missing optional file output. + throw new FileNotFoundException(s"GCS output file not found: $pathAsString") + } + pathAsString } getOrElse { GcsPathBuilder.validateGcsPath(path) match { case _: ValidFullGcsPath => path From 8d5f786e4385929e12164797e0da90398c4a8f21 Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 2 Feb 2022 19:05:11 -0500 Subject: [PATCH 8/9] Replace some usages of setAccessible with non-reflection implementations (#6663) --- .../database/slick/SlickDatabase.scala | 36 +++++++---------- wom/src/main/scala/wom/util/YamlUtils.scala | 39 +++++++------------ 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala index e54974f6618..f95e3d1a7ae 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala @@ -1,18 +1,17 @@ package cromwell.database.slick -import java.sql.{Connection, PreparedStatement, Statement} -import java.util.concurrent.{ExecutorService, Executors} - import com.mysql.cj.jdbc.exceptions.MySQLTransactionRollbackException import com.typesafe.config.{Config, ConfigFactory} import cromwell.database.slick.tables.DataAccessComponent import cromwell.database.sql.SqlDatabase import net.ceedubs.ficus.Ficus._ -import org.postgresql.util.{PSQLException, ServerErrorMessage} -import org.slf4j.LoggerFactory +import org.postgresql.util.PSQLException +import org.slf4j.{Logger, LoggerFactory} import slick.basic.DatabaseConfig import slick.jdbc.{JdbcCapabilities, JdbcProfile, PostgresProfile, TransactionIsolation} +import java.sql.{Connection, PreparedStatement, Statement} +import java.util.concurrent.{ExecutorService, Executors} import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future} @@ -20,9 +19,9 @@ object SlickDatabase { /** * Returns either the "url" or "properties.url" */ - def urlKey(config: Config) = if (config.hasPath("db.url")) "db.url" else "db.properties.url" + def urlKey(config: Config): String = if (config.hasPath("db.url")) "db.url" else "db.properties.url" - lazy val log = LoggerFactory.getLogger("cromwell.database.slick") + lazy val log: Logger = LoggerFactory.getLogger("cromwell.database.slick") def createSchema(slickDatabase: SlickDatabase): Unit = { // NOTE: Slick 3.0.0 schema creation, Clobs, and MySQL don't mix: https://github.com/slick/slick/issues/637 @@ -57,7 +56,7 @@ object SlickDatabase { */ abstract class SlickDatabase(override val originalDatabaseConfig: Config) extends SqlDatabase { - override val urlKey = SlickDatabase.urlKey(originalDatabaseConfig) + override val urlKey: String = SlickDatabase.urlKey(originalDatabaseConfig) protected val slickConfig = DatabaseConfig.forConfig[JdbcProfile]("", databaseConfig) /* @@ -73,7 +72,7 @@ abstract class SlickDatabase(override val originalDatabaseConfig: Config) extend // NOTE: if you want to refactor database is inner-class type: this.dataAccess.driver.backend.DatabaseFactory val database = slickConfig.db - override lazy val connectionDescription = databaseConfig.getString(urlKey) + override lazy val connectionDescription: String = databaseConfig.getString(urlKey) SlickDatabase.log.info(s"Running with database $urlKey = $connectionDescription") @@ -134,10 +133,12 @@ abstract class SlickDatabase(override val originalDatabaseConfig: Config) extend actionThreadPool, database.executor.executionContext.reportFailure ) - protected[this] lazy val insertBatchSize = databaseConfig.getOrElse("insert-batch-size", 2000) + protected[this] lazy val insertBatchSize: Int = databaseConfig.getOrElse("insert-batch-size", 2000) - protected[this] lazy val useSlickUpserts = dataAccess.driver.capabilities.contains(JdbcCapabilities.insertOrUpdate) + protected[this] lazy val useSlickUpserts: Boolean = + dataAccess.driver.capabilities.contains(JdbcCapabilities.insertOrUpdate) + //noinspection SameParameterValue protected[this] def assertUpdateCount(description: String, updates: Int, expected: Int): DBIO[Unit] = { if (updates == expected) { DBIO.successful(()) @@ -220,20 +221,11 @@ abstract class SlickDatabase(override val originalDatabaseConfig: Config) extend /* The exception may contain possibly sensitive row contents within the DETAIL section. Remove it. - Tried adjusting this using configuration: - - log_error_verbosity=TERSE - - log_min_messages=PANIC - - client_min_messages=ERROR - - Instead resorting to reflection. + Discussion: https://github.com/pgjdbc/pgjdbc/issues/1577 */ val message = pSQLException.getServerErrorMessage - val field = classOf[ServerErrorMessage].getDeclaredField("mesgParts") - field.setAccessible(true) - val parts = field.get(message).asInstanceOf[java.util.Map[Character, String]] - parts.remove('D') // The original exception has already stored the DETAIL into a string. So we must create a new Exception. - throw new PSQLException(message) + throw new PSQLException(message, false) } } }(actionExecutionContext) diff --git a/wom/src/main/scala/wom/util/YamlUtils.scala b/wom/src/main/scala/wom/util/YamlUtils.scala index fbd3562eb87..31d0bd77af4 100644 --- a/wom/src/main/scala/wom/util/YamlUtils.scala +++ b/wom/src/main/scala/wom/util/YamlUtils.scala @@ -65,12 +65,6 @@ object YamlUtils { } } - private lazy val composerRecursiveNodesField = { - val field = classOf[Composer].getDeclaredField("recursiveNodes") - field.setAccessible(true) - field - } - private val yamlConfig = ConfigFactory.load().getConfig("yaml") private val defaultMaxNodes = yamlConfig.as[Int Refined NonNegative]("max-nodes") private val defaultMaxDepth = yamlConfig.as[Int Refined NonNegative]("max-depth") @@ -92,41 +86,39 @@ object YamlUtils { loaderOptions ) { - private lazy val composerRecursiveFields: java.util.Set[Node] = getRecursiveNodeSet(this) + private val depth = new Counter - private def checkDepth(): Unit = { - if (composerRecursiveFields.size > maxDepth.value) + private def checkDepth[A](f: => A): A = { + depth.count += 1 + if (depth.count > maxDepth.value) throw new IllegalArgumentException(s"Parsing halted at node depth $maxDepth") + val result = f + depth.count -= 1 + result } override def composeScalarNode(anchor: String, blockComments: util.List[CommentLine]): Node = { - checkDepth() - super.composeScalarNode(anchor, blockComments) + checkDepth(super.composeScalarNode(anchor, blockComments)) } override def composeSequenceNode(anchor: String): Node = { - checkDepth() - super.composeSequenceNode(anchor) + checkDepth(super.composeSequenceNode(anchor)) } override def composeMappingNode(anchor: String): Node = { - checkDepth() - super.composeMappingNode(anchor) + checkDepth(super.composeMappingNode(anchor)) } override def composeMappingChildren(children: util.List[NodeTuple], node: MappingNode): Unit = { - checkDepth() - super.composeMappingChildren(children, node) + checkDepth(super.composeMappingChildren(children, node)) } override def composeKeyNode(node: MappingNode): Node = { - checkDepth() - super.composeKeyNode(node) + checkDepth(super.composeKeyNode(node)) } override def composeValueNode(node: MappingNode): Node = { - checkDepth() - super.composeValueNode(node) + checkDepth(super.composeValueNode(node)) } } @@ -135,11 +127,6 @@ object YamlUtils { var count = 0L } - /** Use reflection to access the existing but private Set of nested nodes */ - private def getRecursiveNodeSet(composer: Composer): java.util.Set[Node] = { - composerRecursiveNodesField.get(composer).asInstanceOf[java.util.Set[Node]] - } - /** * Looks for loops and large documents in yaml parsed by SnakeYaml. * From 660229baf9c96f6b70a694c919631a3ec93640a0 Mon Sep 17 00:00:00 2001 From: Chris Llanwarne Date: Thu, 3 Feb 2022 10:02:06 -0500 Subject: [PATCH 9/9] Allow workflow ID choice by submitters [BW-1056] (#6659) --- CHANGELOG.md | 7 + .../src/main/resources/swagger/cromiam.yaml | 10 ++ .../CallCachingBlacklistManagerSpec.scala | 3 +- .../core/WorkflowSourceFilesCollection.scala | 16 ++- .../test/scala/cromwell/util/SampleWdl.scala | 6 +- .../slick/WorkflowStoreSlickDatabase.scala | 4 + .../tables/WorkflowStoreEntryComponent.scala | 9 ++ .../sql/WorkflowStoreSqlDatabase.scala | 2 + docs/api/RESTAPI.md | 4 +- .../src/main/resources/swagger/cromwell.yaml | 10 ++ .../workflowstore/SqlWorkflowStore.scala | 67 +++++++-- .../cromwell/server/CromwellRootActor.scala | 4 +- .../webservice/PartialWorkflowSources.scala | 80 ++++++++--- .../webservice/WorkflowJsonSupport.scala | 12 +- .../routes/WomtoolRouteSupport.scala | 3 +- .../workflowstore/SqlWorkflowStoreSpec.scala | 135 +++++++++++++++++- ...kflowStoreCoordinatedAccessActorSpec.scala | 6 +- .../PartialWorkflowSourcesSpec.scala | 70 +++++++++ .../NamespaceCacheSpec.scala | 3 +- .../wdl/draft3/WdlDraft3CachingSpec.scala | 3 +- .../scala/cromwell/CromwellEntryPoint.scala | 6 +- .../cromwell/SimpleWorkflowActorSpec.scala | 3 +- .../workflow/SqlWorkflowStoreBuilder.scala | 7 +- ...terializeWorkflowDescriptorActorSpec.scala | 36 +++-- .../SubWorkflowStoreSpec.scala | 7 +- .../services/womtool/DescriberSpec.scala | 3 +- .../WomtoolServiceInCromwellActorSpec.scala | 3 +- 27 files changed, 440 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b6568cad34..541f8740103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,13 @@ Previously: | `executionStatus` |`Running`|`Running`| Job state Cromwell is requesting from the backend | | `backendStatus` |`Running`|`Running`| Job state reported by backend | +### New 'requestedWorkflowId' API Option + +Allows users to choose their own workflow IDs at workflow submission time. + +If supplied for single workflows, this value must be a JSON string containing a valid, and not already used, UUID. For batch submissions, this value must be a JSON array of valid UUIDs. + +If not supplied, the behavior is as today: Cromwell will generate a random workflow ID for every workflow submitted. ### Bug Fixes diff --git a/CromIAM/src/main/resources/swagger/cromiam.yaml b/CromIAM/src/main/resources/swagger/cromiam.yaml index 47b56fc8cb1..c6bf57fa609 100644 --- a/CromIAM/src/main/resources/swagger/cromiam.yaml +++ b/CromIAM/src/main/resources/swagger/cromiam.yaml @@ -135,6 +135,11 @@ paths: required: false type: file in: formData + - name: requestedWorkflowId + description: An ID to assign to this workflow. Must be a JSON string in UUID-format. If not supplied a random ID will be generated for the workflow. + required: false + type: string + in: formData tags: - Workflows responses: @@ -198,6 +203,11 @@ paths: required: false type: file in: formData + - name: requestedWorkflowId + description: A set of IDs to assign to these workflows. Must be a JSON list of strings in UUID-format. Must have the same number of entries and be in the same order as the workflow inputs list. If not supplied, random ID will be generated for the workflows. + required: false + type: string + in: formData tags: - Workflows responses: diff --git a/backend/src/test/scala/cromwell/backend/standard/callcaching/CallCachingBlacklistManagerSpec.scala b/backend/src/test/scala/cromwell/backend/standard/callcaching/CallCachingBlacklistManagerSpec.scala index 66ead4a76ac..fdfa7f97e04 100644 --- a/backend/src/test/scala/cromwell/backend/standard/callcaching/CallCachingBlacklistManagerSpec.scala +++ b/backend/src/test/scala/cromwell/backend/standard/callcaching/CallCachingBlacklistManagerSpec.scala @@ -23,7 +23,8 @@ class CallCachingBlacklistManagerSpec extends AnyFlatSpec with CromwellTimeoutSp workflowOptions = WorkflowOptions(JsObject.empty), labelsJson = "", workflowOnHold = false, - warnings = List.empty + warnings = List.empty, + requestedWorkflowId = None ) val workflowSourcesYesGrouping = workflowSourcesNoGrouping.copy( diff --git a/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala index 9359eab5430..ee936b86819 100644 --- a/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala +++ b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala @@ -16,6 +16,7 @@ sealed trait WorkflowSourceFilesCollection { def workflowType: Option[WorkflowType] def workflowTypeVersion: Option[WorkflowTypeVersion] def workflowOnHold: Boolean + def requestedWorkflowId: Option[WorkflowId] def warnings: Seq[String] @@ -49,7 +50,8 @@ object WorkflowSourceFilesCollection { labelsJson: WorkflowJson, importsFile: Option[Array[Byte]], workflowOnHold: Boolean, - warnings: Seq[String]): WorkflowSourceFilesCollection = importsFile match { + warnings: Seq[String], + requestedWorkflowId: Option[WorkflowId]): WorkflowSourceFilesCollection = importsFile match { case Some(imports) => WorkflowSourceFilesWithDependenciesZip( workflowSource = workflowSource, @@ -62,7 +64,8 @@ object WorkflowSourceFilesCollection { labelsJson = labelsJson, importsZip = imports, workflowOnHold = workflowOnHold, - warnings = warnings) + warnings = warnings, + requestedWorkflowId = requestedWorkflowId) case None => WorkflowSourceFilesWithoutImports( workflowSource = workflowSource, @@ -74,7 +77,8 @@ object WorkflowSourceFilesCollection { workflowOptions = workflowOptions, labelsJson = labelsJson, workflowOnHold = workflowOnHold, - warnings = warnings) + warnings = warnings, + requestedWorkflowId = requestedWorkflowId) } } @@ -87,7 +91,8 @@ final case class WorkflowSourceFilesWithoutImports(workflowSource: Option[Workfl workflowOptions: WorkflowOptions, labelsJson: WorkflowJson, workflowOnHold: Boolean = false, - warnings: Seq[String]) extends WorkflowSourceFilesCollection + warnings: Seq[String], + requestedWorkflowId: Option[WorkflowId]) extends WorkflowSourceFilesCollection final case class WorkflowSourceFilesWithDependenciesZip(workflowSource: Option[WorkflowSource], workflowUrl: Option[WorkflowUrl], @@ -99,7 +104,8 @@ final case class WorkflowSourceFilesWithDependenciesZip(workflowSource: Option[W labelsJson: WorkflowJson, importsZip: Array[Byte], workflowOnHold: Boolean = false, - warnings: Seq[String]) extends WorkflowSourceFilesCollection { + warnings: Seq[String], + requestedWorkflowId: Option[WorkflowId]) extends WorkflowSourceFilesCollection { override def toString = { s"WorkflowSourceFilesWithDependenciesZip($workflowSource, $workflowUrl, $workflowType, $workflowTypeVersion," + s""" $inputsJson, ${workflowOptions.asPrettyJson}, $labelsJson, <>, $warnings)""" diff --git a/core/src/test/scala/cromwell/util/SampleWdl.scala b/core/src/test/scala/cromwell/util/SampleWdl.scala index d040a883e11..4b2f773bf63 100644 --- a/core/src/test/scala/cromwell/util/SampleWdl.scala +++ b/core/src/test/scala/cromwell/util/SampleWdl.scala @@ -44,7 +44,8 @@ trait SampleWdl extends TestFileUtil { workflowTypeVersion = workflowTypeVersion, warnings = Vector.empty, workflowOnHold = workflowOnHold, - importsZip = zip) + importsZip = zip, + requestedWorkflowId = None) case None => WorkflowSourceFilesWithoutImports( workflowSource = Option(workflowSource(runtime)), @@ -56,7 +57,8 @@ trait SampleWdl extends TestFileUtil { workflowType = workflowType, workflowTypeVersion = workflowTypeVersion, warnings = Vector.empty, - workflowOnHold = workflowOnHold) + workflowOnHold = workflowOnHold, + requestedWorkflowId = None) } } diff --git a/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala index e0e19ee97aa..5d88d0dbec8 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala @@ -154,4 +154,8 @@ trait WorkflowStoreSlickDatabase extends WorkflowStoreSqlDatabase { override def findWorkflows(cromwellId: String)(implicit ec: ExecutionContext): Future[Iterable[String]] = { runTransaction(dataAccess.findWorkflows(cromwellId).result) } + + override def checkWhetherWorkflowExists(workflowId: String)(implicit ec: ExecutionContext): Future[Boolean] = { + runTransaction(dataAccess.checkExists(workflowId).result.map(_.nonEmpty)) + } } diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala index 829911af13e..3fb6ec48cdd 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala @@ -178,6 +178,7 @@ trait WorkflowStoreEntryComponent { } ) + // Find workflows running on a given Cromwell instance with abort requested: val findWorkflowsWithAbortRequested = Compiled( (cromwellId: Rep[String]) => for { workflowStoreEntry <- workflowStoreEntries @@ -185,10 +186,18 @@ trait WorkflowStoreEntryComponent { } yield workflowStoreEntry.workflowExecutionUuid ) + // Find workflows running on a given Cromwell instance: val findWorkflows = Compiled( (cromwellId: Rep[String]) => for { workflowStoreEntry <- workflowStoreEntries if workflowStoreEntry.cromwellId === cromwellId } yield workflowStoreEntry.workflowExecutionUuid ) + + val checkExists = Compiled( + (workflowId: Rep[String]) => (for { + workflowStoreEntry <- workflowStoreEntries + if workflowStoreEntry.workflowExecutionUuid === workflowId + } yield 1) + ) } diff --git a/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala index 821db0ac9c6..a8312f6d685 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala @@ -104,4 +104,6 @@ ____ __ ____ ______ .______ __ ___ _______ __ ______ def findWorkflows(cromwellId: String)(implicit ec: ExecutionContext): Future[Iterable[String]] + def checkWhetherWorkflowExists(cromwellId: String)(implicit ec: ExecutionContext): Future[Boolean] + } diff --git a/docs/api/RESTAPI.md b/docs/api/RESTAPI.md index 5731f363a09..d74f09a00d7 100644 --- a/docs/api/RESTAPI.md +++ b/docs/api/RESTAPI.md @@ -1,5 +1,5 @@