From 2df9dd8d69c01824ee9003e7adb756a30e36dd27 Mon Sep 17 00:00:00 2001 From: Christina Ahrens Roberts Date: Thu, 14 Oct 2021 20:18:41 +0000 Subject: [PATCH 01/29] Update cromwell version from 70 to 71 --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index 96f52ce5bd9..98751b51319 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -5,7 +5,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "70" + val cromwellVersion = "71" /** * Returns true if this project should be considered a snapshot. From bb43985418f273ca44bf35c7402b1c477d78b0c0 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Wed, 20 Oct 2021 14:23:39 -0400 Subject: [PATCH 02/29] Remove AWS logging that could error with multiple compute environments [BT-429] (#6547) --- project/Dependencies.scala | 1 - .../backend/impl/aws/AwsBatchJob.scala | 41 +------------------ 2 files changed, 1 insertion(+), 41 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 66adf0b41b5..be85b02b4df 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -307,7 +307,6 @@ object Dependencies { "cloudwatchlogs", "s3", "sts", - "ecs" ).map(artifactName => "software.amazon.awssdk" % artifactName % awsSdkV) private val googleCloudDependencies = List( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 90f1918b9da..a22cf25f307 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -49,8 +49,6 @@ import software.amazon.awssdk.services.batch.BatchClient import software.amazon.awssdk.services.batch.model._ import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient import software.amazon.awssdk.services.cloudwatchlogs.model.{GetLogEventsRequest, OutputLogEvent} -import software.amazon.awssdk.services.ecs.EcsClient -import software.amazon.awssdk.services.ecs.model.DescribeContainerInstancesRequest import software.amazon.awssdk.services.s3.S3Client import software.amazon.awssdk.services.s3.model.{GetObjectRequest, HeadObjectRequest, NoSuchKeyException, PutObjectRequest} import wdl4s.parser.MemoryUnit @@ -234,7 +232,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL //find or create the script in s3 to execute for s3 fileSystem val scriptKey = runtimeAttributes.fileSystem match { case AWSBatchStorageSystems.s3 => findOrCreateS3Script(reconfiguredScript, runtimeAttributes.scriptS3BucketName) - case _ => "" + case _ => "" } if(runtimeAttributes.fileSystem == AWSBatchStorageSystems.s3) { @@ -445,8 +443,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL */ def status(jobId: String): Try[RunStatus] = for { statusString <- Try(detail(jobId).status) - batchJobContainerContext <- Try(batchJobContainerContext(jobId)) - _ <- Try(Log.debug(s"Task ${jobDescriptor.key.call.fullyQualifiedName + "-" + jobDescriptor.key.index + "-" + jobDescriptor.key.attempt} in container context $batchJobContainerContext")) runStatus <- RunStatus.fromJobStatus(statusString, jobId) } yield runStatus @@ -460,41 +456,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobDetail } - /** - * Return information about the container, ECS Cluster and EC2 instance that is (or was) hosting this job - * @param jobId the id of the job for which you want the context - * @return the context - */ - def batchJobContainerContext(jobId: String): BatchJobContainerContext ={ - if (jobId == null) return BatchJobContainerContext("","",Seq.empty, Seq.empty) - - val containerInstanceArn = detail(jobId).container().containerInstanceArn() - if(containerInstanceArn == null || containerInstanceArn.isEmpty) return BatchJobContainerContext(jobId,"",Seq.empty, Seq.empty) - - val describeJobQueuesResponse = batchClient.describeJobQueues( DescribeJobQueuesRequest.builder().jobQueues( runtimeAttributes.queueArn ).build()) - val computeEnvironments = describeJobQueuesResponse.jobQueues().asScala.head.computeEnvironmentOrder().asScala.map(_.computeEnvironment()) - val describeComputeEnvironmentsResponse = batchClient.describeComputeEnvironments( DescribeComputeEnvironmentsRequest.builder().computeEnvironments(computeEnvironments.asJava).build()) - val ecsClusterArns = describeComputeEnvironmentsResponse.computeEnvironments().asScala.map(_.ecsClusterArn()) - - val ecsClient = configureClient(EcsClient.builder(), optAwsAuthMode, configRegion) - - val instanceIds: Seq[String] = ecsClusterArns.map(containerArn => ecsClient.describeContainerInstances(DescribeContainerInstancesRequest.builder().containerInstances(containerInstanceArn).cluster(containerArn).build())) - .map(r => r.containerInstances().asScala).flatMap(_.map(_.ec2InstanceId())) - - BatchJobContainerContext(jobId, containerInstanceArn, ecsClusterArns, instanceIds) - } - - case class BatchJobContainerContext(jobId: String, containerInstanceArn: String, ecsClusterArns: Seq[String], ec2InstanceIds: Seq[String]) { - override def toString: String = { - new ToStringBuilder(this, ToStringStyle.JSON_STYLE) - .append("jobId", this.jobId) - .append("containerInstanceArn", containerInstanceArn) - .append("ecsClusterArns", ecsClusterArns) - .append("ec2InstanceIds", ec2InstanceIds) - .build() - } - } - def rc(detail: JobDetail): Integer = { detail.container.exitCode } From fb85d8ee2be0fe48110b86dfd119e2a7943d43e0 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Wed, 20 Oct 2021 16:17:29 -0400 Subject: [PATCH 03/29] Choose a more stable reference file for reference disk test [BT-428] (#6546) --- .../reference_disk/reference_disk_test.inputs | 2 +- src/ci/resources/papi_v2_reference_image_manifest.conf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs index ffcb0c717c7..8e28a8ef541 100644 --- a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs +++ b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs @@ -1,3 +1,3 @@ { - "wf_reference_disk_test.check_if_localized_as_symlink.reference_file_input": "gs://gcp-public-data--broad-references/hg19/v0/README" + "wf_reference_disk_test.check_if_localized_as_symlink.reference_file_input": "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.tile_db_header.vcf" } diff --git a/src/ci/resources/papi_v2_reference_image_manifest.conf b/src/ci/resources/papi_v2_reference_image_manifest.conf index c1f005b8c4e..8c15fc44636 100644 --- a/src/ci/resources/papi_v2_reference_image_manifest.conf +++ b/src/ci/resources/papi_v2_reference_image_manifest.conf @@ -1660,7 +1660,7 @@ reference-disk-localization-manifests = [ "crc32c" : 2685902650 }, { "path" : "gcp-public-data--broad-references/hg19/v0/README", - "crc32c" : 1479759245 + "crc32c" : 2793498556 }, { "path" : "gcp-public-data--broad-references/hg19/v0/WholeGenomeShotgunContam.vcf.idx", "crc32c" : 3311935743 @@ -4815,5 +4815,5 @@ reference-disk-localization-manifests = [ "path" : "gcp-public-data--broad-references/hg38/v0/CrossSpeciesContamination/CrossSpeciesContaminant/pathseq_microbe.fa.img", "crc32c" : 2373170979 } ] - } + } ] From e3964c5280fd1d6e6adfa28d929f970258631f4f Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Oct 2021 14:25:17 -0400 Subject: [PATCH 04/29] Add backend params with identity field to TES task --- .../backend/impl/tes/TesResponseJsonFormatter.scala | 2 +- .../scala/cromwell/backend/impl/tes/TesTask.scala | 12 ++++++++++-- .../backend/impl/tes/TesWorkflowOptionKeys.scala | 6 ++++++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala index 9391b379cd8..02cf8e4988a 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala @@ -7,7 +7,7 @@ final case class MinimalTaskView(id: String, state: String) final case class CancelTaskResponse() object TesResponseJsonFormatter extends DefaultJsonProtocol { - implicit val resourcesFormat = jsonFormat5(Resources) + implicit val resourcesFormat = jsonFormat6(Resources) implicit val inputFormat = jsonFormat6(Input) implicit val outputFormat = jsonFormat5(Output) implicit val executorFormat = jsonFormat7(Executor) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala index f46dabad739..41d38c36a92 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -40,6 +40,8 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, workflowDescriptor.workflowOptions.getOrElse("project", "") } + val executorIdentity: Option[String] = workflowDescriptor.workflowOptions.get("identity").toOption + // contains the script to be executed private val commandScript = Input( name = Option("commandScript"), @@ -219,12 +221,17 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, None } + // This was added in BT-409 to let us pass information to an Azure + // TES server about which user identity to run tasks as. + private val backendParameters = executorIdentity.map(i => Map(TesWorkflowOptionKeys.Identity -> i)) + val resources = Resources( cpu_cores = runtimeAttributes.cpu.map(_.value), ram_gb = ram, disk_gb = disk, preemptible = Option(runtimeAttributes.preemptible), - zones = None + zones = None, + backend_parameters = backendParameters ) val executors = Seq(Executor( @@ -276,7 +283,8 @@ final case class Resources(cpu_cores: Option[Int], ram_gb: Option[Double], disk_gb: Option[Double], preemptible: Option[Boolean], - zones: Option[Seq[String]]) + zones: Option[Seq[String]], + backend_parameters: Option[Map[String, String]]) final case class OutputFileLog(url: String, path: String, diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala new file mode 100644 index 00000000000..030fa618b04 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala @@ -0,0 +1,6 @@ +package cromwell.backend.impl.tes + +object TesWorkflowOptionKeys { + // Communicates to the TES server which identity the task should execute as + val Identity = "identity" +} From cfa707b8fc34c65a2b78a180dc0272542f8365c0 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Oct 2021 11:05:49 -0400 Subject: [PATCH 05/29] Reorganize to enable unit test --- .../cromwell/backend/impl/tes/TesTask.scala | 52 +++++++++++-------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala index 41d38c36a92..caa700aa530 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -3,7 +3,7 @@ package cromwell.backend.impl.tes import common.collections.EnhancedCollections._ import common.util.StringUtil._ import cromwell.backend.impl.tes.OutputMode.OutputMode -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} +import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendWorkflowDescriptor} import cromwell.core.logging.JobLogger import cromwell.core.path.{DefaultPathBuilder, Path} import wdl.draft2.model.FullyQualifiedName @@ -40,8 +40,6 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, workflowDescriptor.workflowOptions.getOrElse("project", "") } - val executorIdentity: Option[String] = workflowDescriptor.workflowOptions.get("identity").toOption - // contains the script to be executed private val commandScript = Input( name = Option("commandScript"), @@ -214,25 +212,7 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, result } - private val disk :: ram :: _ = Seq(runtimeAttributes.disk, runtimeAttributes.memory) map { - case Some(x) => - Option(x.to(MemoryUnit.GB).amount) - case None => - None - } - - // This was added in BT-409 to let us pass information to an Azure - // TES server about which user identity to run tasks as. - private val backendParameters = executorIdentity.map(i => Map(TesWorkflowOptionKeys.Identity -> i)) - - val resources = Resources( - cpu_cores = runtimeAttributes.cpu.map(_.value), - ram_gb = ram, - disk_gb = disk, - preemptible = Option(runtimeAttributes.preemptible), - zones = None, - backend_parameters = backendParameters - ) + val resources = TesTask.makeResources(runtimeAttributes, workflowDescriptor) val executors = Seq(Executor( image = dockerImageUsed, @@ -245,6 +225,34 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, )) } +object TesTask { + def makeResources(runtimeAttributes: TesRuntimeAttributes, + workflowDescriptor: BackendWorkflowDescriptor): Resources = { + + val executorIdentity: Option[String] = workflowDescriptor.workflowOptions.get("identity").toOption + + // This was added in BT-409 to let us pass information to an Azure + // TES server about which user identity to run tasks as. + val backendParameters = executorIdentity.map(i => Map(TesWorkflowOptionKeys.Identity -> i)) + + val disk :: ram :: _ = Seq(runtimeAttributes.disk, runtimeAttributes.memory) map { + case Some(x) => + Option(x.to(MemoryUnit.GB).amount) + case None => + None + } + + Resources( + cpu_cores = runtimeAttributes.cpu.map(_.value), + ram_gb = ram, + disk_gb = disk, + preemptible = Option(runtimeAttributes.preemptible), + zones = None, + backend_parameters = backendParameters + ) + } +} + // Field requirements in classes below based off GA4GH schema final case class Task(id: Option[String], state: Option[String], From 4feed6306ff244859df029fe5af0fafbc29dca6a Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Oct 2021 11:05:56 -0400 Subject: [PATCH 06/29] Add tests --- .../backend/impl/tes/TesTaskSpec.scala | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala new file mode 100644 index 00000000000..0a2e33c42da --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -0,0 +1,52 @@ +package cromwell.backend.impl.tes + +import common.assertion.CromwellTimeoutSpec +import cromwell.backend.BackendSpec +import cromwell.backend.validation.ContinueOnReturnCodeSet +import cromwell.core.WorkflowOptions +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import spray.json.{JsObject, JsString, JsValue} + +class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with BackendSpec { + + val runtimeAttributes = new TesRuntimeAttributes( + ContinueOnReturnCodeSet(Set(0)), + "ubuntu:latest", + None, + false, + None, + None, + None, + false + ) + + def workflowDescriptorWithIdentity(excIdentity: Option[String]) = { + val optionsMap: Map[String, JsValue] = excIdentity.map(i => TesWorkflowOptionKeys.Identity -> JsString(i)).toMap + buildWdlWorkflowDescriptor(TestWorkflows.HelloWorld, None, WorkflowOptions(JsObject(optionsMap))) + } + + "TesTask" should "create the correct resources when an identity is passed in WorkflowOptions" in { + val wd = workflowDescriptorWithIdentity(Some("abc123")) + assert( + TesTask.makeResources(runtimeAttributes, wd) + == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> "abc123"))) + ) + } + + "TesTask" should "create the correct resources when an empty identity is passed in WorkflowOptions" in { + val wd = workflowDescriptorWithIdentity(Some("")) + assert( + TesTask.makeResources(runtimeAttributes, wd) + == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> ""))) + ) + } + + "TesTask" should "create the correct resources when no identity is passed in WorkflowOptions" in { + val wd = workflowDescriptorWithIdentity(None) + assert( + TesTask.makeResources(runtimeAttributes, wd) + == Resources(None, None, None, Some(false), None, None) + ) + } +} From 51aedeb9f523237b61cd0f8a02f2c0a0b6df53ad Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Oct 2021 11:43:42 -0400 Subject: [PATCH 07/29] Add test for numeric identity --- .../cromwell/backend/impl/tes/TesTaskSpec.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index 0a2e33c42da..5aed0efd830 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -6,7 +6,7 @@ import cromwell.backend.validation.ContinueOnReturnCodeSet import cromwell.core.WorkflowOptions import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import spray.json.{JsObject, JsString, JsValue} +import spray.json.{JsNumber, JsObject, JsString, JsValue} class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with BackendSpec { @@ -49,4 +49,18 @@ class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers wit == Resources(None, None, None, Some(false), None, None) ) } + + "TesTask" should "create the correct response when a numeric identity is passed in WorkflowOptions" in { + val wd = buildWdlWorkflowDescriptor( + TestWorkflows.HelloWorld, + None, + WorkflowOptions( + JsObject(Map(TesWorkflowOptionKeys.Identity -> JsNumber(5))) + ) + ) + assert( + TesTask.makeResources(runtimeAttributes, wd) + == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> "5"))) + ) + } } From 7f035201f793c1b4c48ead59b4fe02651ea8196c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Oct 2021 11:49:06 -0400 Subject: [PATCH 08/29] Add test for object identity --- .../cromwell/backend/impl/tes/TesTaskSpec.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index 5aed0efd830..00ae0b02ecc 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -63,4 +63,19 @@ class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers wit == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> "5"))) ) } + + // TODO this isn't actually the behavior we want + "TesTask" should "silently do nothing when the identity passed in WorkflowOptions is an object" in { + val wd = buildWdlWorkflowDescriptor( + TestWorkflows.HelloWorld, + None, + WorkflowOptions( + JsObject(Map(TesWorkflowOptionKeys.Identity -> JsObject(Map("hi" -> JsString("there"))))) + ) + ) + assert( + TesTask.makeResources(runtimeAttributes, wd) + == Resources(None, None, None, Some(false), None, None) + ) + } } From 8755e47fbe1f7df2ac6f14748bd846c30bf82999 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Sat, 16 Oct 2021 14:02:39 -0400 Subject: [PATCH 09/29] PR feedback, plus include empty map when no backend params present --- .../cromwell/backend/impl/tes/TesTask.scala | 13 ++++---- .../impl/tes/TesWorkflowOptionKeys.scala | 4 +-- .../backend/impl/tes/TesTaskSpec.scala | 30 +++++++++---------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala index caa700aa530..fc7c6bbe640 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -212,7 +212,7 @@ final case class TesTask(jobDescriptor: BackendJobDescriptor, result } - val resources = TesTask.makeResources(runtimeAttributes, workflowDescriptor) + val resources: Resources = TesTask.makeResources(runtimeAttributes, workflowDescriptor) val executors = Seq(Executor( image = dockerImageUsed, @@ -229,11 +229,14 @@ object TesTask { def makeResources(runtimeAttributes: TesRuntimeAttributes, workflowDescriptor: BackendWorkflowDescriptor): Resources = { - val executorIdentity: Option[String] = workflowDescriptor.workflowOptions.get("identity").toOption - // This was added in BT-409 to let us pass information to an Azure // TES server about which user identity to run tasks as. - val backendParameters = executorIdentity.map(i => Map(TesWorkflowOptionKeys.Identity -> i)) + val backendParameters = workflowDescriptor + .workflowOptions + .get(TesWorkflowOptionKeys.WorkflowExecutionIdentity) + .toOption + .map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> _) + .toMap val disk :: ram :: _ = Seq(runtimeAttributes.disk, runtimeAttributes.memory) map { case Some(x) => @@ -248,7 +251,7 @@ object TesTask { disk_gb = disk, preemptible = Option(runtimeAttributes.preemptible), zones = None, - backend_parameters = backendParameters + backend_parameters = Option(backendParameters) ) } } diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala index 030fa618b04..75cd3e13847 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowOptionKeys.scala @@ -1,6 +1,6 @@ package cromwell.backend.impl.tes object TesWorkflowOptionKeys { - // Communicates to the TES server which identity the task should execute as - val Identity = "identity" + // Communicates to the TES server which identity the tasks should execute as + val WorkflowExecutionIdentity = "workflow_execution_identity" } diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index 00ae0b02ecc..19c5d74fb70 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -22,60 +22,60 @@ class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers wit ) def workflowDescriptorWithIdentity(excIdentity: Option[String]) = { - val optionsMap: Map[String, JsValue] = excIdentity.map(i => TesWorkflowOptionKeys.Identity -> JsString(i)).toMap + val optionsMap: Map[String, JsValue] = excIdentity.map(i => TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsString(i)).toMap buildWdlWorkflowDescriptor(TestWorkflows.HelloWorld, None, WorkflowOptions(JsObject(optionsMap))) } - "TesTask" should "create the correct resources when an identity is passed in WorkflowOptions" in { - val wd = workflowDescriptorWithIdentity(Some("abc123")) + it should "create the correct resources when an identity is passed in WorkflowOptions" in { + val wd = workflowDescriptorWithIdentity(Option("abc123")) assert( TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> "abc123"))) + == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "abc123"))) ) } - "TesTask" should "create the correct resources when an empty identity is passed in WorkflowOptions" in { - val wd = workflowDescriptorWithIdentity(Some("")) + it should "create the correct resources when an empty identity is passed in WorkflowOptions" in { + val wd = workflowDescriptorWithIdentity(Option("")) assert( TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> ""))) + == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> ""))) ) } - "TesTask" should "create the correct resources when no identity is passed in WorkflowOptions" in { + it should "create the correct resources when no identity is passed in WorkflowOptions" in { val wd = workflowDescriptorWithIdentity(None) assert( TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Some(false), None, None) + == Resources(None, None, None, Option(false), None, Option(Map.empty[String, String])) ) } - "TesTask" should "create the correct response when a numeric identity is passed in WorkflowOptions" in { + it should "create the correct response when a numeric identity is passed in WorkflowOptions" in { val wd = buildWdlWorkflowDescriptor( TestWorkflows.HelloWorld, None, WorkflowOptions( - JsObject(Map(TesWorkflowOptionKeys.Identity -> JsNumber(5))) + JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsNumber(5))) ) ) assert( TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Some(false), None, Some(Map(TesWorkflowOptionKeys.Identity -> "5"))) + == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "5"))) ) } // TODO this isn't actually the behavior we want - "TesTask" should "silently do nothing when the identity passed in WorkflowOptions is an object" in { + it should "silently do nothing when the identity passed in WorkflowOptions is an object" in { val wd = buildWdlWorkflowDescriptor( TestWorkflows.HelloWorld, None, WorkflowOptions( - JsObject(Map(TesWorkflowOptionKeys.Identity -> JsObject(Map("hi" -> JsString("there"))))) + JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsObject(Map("hi" -> JsString("there"))))) ) ) assert( TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Some(false), None, None) + == Resources(None, None, None, Option(false), None, Option(Map.empty[String, String])) ) } } From 18a19cb315ca66f3047cbaac5e86b49a446029f6 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Sat, 16 Oct 2021 15:48:42 -0400 Subject: [PATCH 10/29] Add workflow options validation step to StandardInitializationActor --- .../StandardInitializationActor.scala | 35 ++++++++++++------- .../PipelinesApiInitializationActor.scala | 5 +-- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala index 9d84493732b..89420f95525 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala @@ -82,20 +82,29 @@ class StandardInitializationActor(val standardParams: StandardInitializationActo RuntimeAttributesDefault.workflowOptionsDefault(options, runtimeAttributesBuilder.coercionMap) } - override def validate(): Future[Unit] = { - Future.fromTry(Try { - calls foreach { call => - val runtimeAttributeKeys = call.callable.runtimeAttributes.attributes.keys.toList - val notSupportedAttributes = runtimeAttributesBuilder.unsupportedKeys(runtimeAttributeKeys).toList - - if (notSupportedAttributes.nonEmpty) { - val notSupportedAttrString = notSupportedAttributes mkString ", " - workflowLogger.warn( - s"Key/s [$notSupportedAttrString] is/are not supported by backend. " + - s"Unsupported attributes will not be part of job executions.") - } + def validateWorkflowOptions(): Try[Unit] = Try(()) + + def checkForUnsupportedRuntimeAttributes(): Try[Unit] = Try { + calls foreach { call => + val runtimeAttributeKeys = call.callable.runtimeAttributes.attributes.keys.toList + val notSupportedAttributes = runtimeAttributesBuilder.unsupportedKeys(runtimeAttributeKeys).toList + + if (notSupportedAttributes.nonEmpty) { + val notSupportedAttrString = notSupportedAttributes mkString ", " + workflowLogger.warn( + s"Key/s [$notSupportedAttrString] is/are not supported by backend. " + + s"Unsupported attributes will not be part of job executions.") } - }) + } + } + + override def validate(): Future[Unit] = { + Future.fromTry( + for { + _ <- validateWorkflowOptions() + _ <- checkForUnsupportedRuntimeAttributes() + } yield () + ) } override protected lazy val workflowDescriptor: BackendWorkflowDescriptor = standardParams.workflowDescriptor diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiInitializationActor.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiInitializationActor.scala index 73cc0f45a96..a97b8f7f9df 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiInitializationActor.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiInitializationActor.scala @@ -31,6 +31,7 @@ import spray.json.{JsObject, JsString} import wom.graph.CommandCallNode import scala.concurrent.Future +import scala.util.Try case class PipelinesApiInitializationActorParams ( @@ -219,12 +220,12 @@ class PipelinesApiInitializationActor(pipelinesParams: PipelinesApiInitializatio privateDockerEncryptedToken = privateDockerEncryptedToken, vpcNetworkAndSubnetworkProjectLabels = vpcNetworkAndSubnetworkProjectLabels) + override def validateWorkflowOptions(): Try[Unit] = GoogleLabels.fromWorkflowOptions(workflowOptions).map(_ => ()) + override def beforeAll(): Future[Option[BackendInitializationData]] = { for { paths <- workflowPaths _ = publishWorkflowRoot(paths.workflowRoot.pathAsString) - // Validate the google-labels workflow options, and only succeed initialization if they're good: - _ <- Future.fromTry(GoogleLabels.fromWorkflowOptions(workflowOptions)) data <- initializationData } yield Option(data) } From c50ceb46822edeb63f35da76bb975e36b4ced0e3 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Sat, 16 Oct 2021 15:49:07 -0400 Subject: [PATCH 11/29] Validate WorkflowExecutionIdentity option --- .../impl/tes/TesInitializationActor.scala | 13 ++++++- .../cromwell/backend/impl/tes/TesTask.scala | 2 + .../impl/tes/TesInitializationActorSpec.scala | 38 ++++++++++++++++++- .../backend/impl/tes/TesTaskSpec.scala | 31 +-------------- 4 files changed, 51 insertions(+), 33 deletions(-) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala index 63e261e0f0e..e470f6671fb 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala @@ -4,9 +4,11 @@ import akka.actor.ActorRef import cromwell.backend.standard._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor} import cromwell.core.path.PathBuilder +import spray.json.JsString import wom.graph.CommandCallNode import scala.concurrent.Future +import scala.util.{Failure, Success, Try} case class TesInitializationActorParams ( @@ -22,7 +24,7 @@ class TesInitializationActor(params: TesInitializationActorParams) extends StandardInitializationActor(params) { private val tesConfiguration = params.tesConfiguration - + override lazy val pathBuilders: Future[List[PathBuilder]] = { standardParams.configurationDescriptor.pathBuildersWithDefault(workflowDescriptor.workflowOptions) } @@ -34,6 +36,15 @@ class TesInitializationActor(params: TesInitializationActorParams) override lazy val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder = TesRuntimeAttributes.runtimeAttributesBuilder(tesConfiguration.runtimeConfig) + override def validateWorkflowOptions(): Try[Unit] = + workflowDescriptor.workflowOptions.toMap.get(TesWorkflowOptionKeys.WorkflowExecutionIdentity) match { + case None => Success(()) + case Some(_: JsString) => Success(()) + case _ => Failure( + new Exception(s"Workflow option ${TesWorkflowOptionKeys.WorkflowExecutionIdentity} must be a string.") + ) + } + override def beforeAll(): Future[Option[BackendInitializationData]] = { workflowPaths map { paths => publishWorkflowRoot(paths.workflowRoot.toString) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala index fc7c6bbe640..e87090da0e8 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -231,6 +231,8 @@ object TesTask { // This was added in BT-409 to let us pass information to an Azure // TES server about which user identity to run tasks as. + // Note that we validate the type of WorkflowExecutionIdentity + // in TesInitializationActor. val backendParameters = workflowDescriptor .workflowOptions .get(TesWorkflowOptionKeys.WorkflowExecutionIdentity) diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala index 488efbbdba4..4fa8073c5e3 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala @@ -1,7 +1,6 @@ package cromwell.backend.impl.tes import java.util.UUID - import akka.actor.Props import akka.testkit.{EventFilter, ImplicitSender, TestDuration} import com.typesafe.config.{Config, ConfigFactory} @@ -10,11 +9,12 @@ import cromwell.backend.BackendWorkflowInitializationActor.{InitializationFailed import cromwell.backend.async.RuntimeAttributeValidationFailures import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} import cromwell.core.Tags.PostWomTest -import cromwell.core.TestKitSuite +import cromwell.core.{TestKitSuite, WorkflowOptions} import cromwell.core.filesystem.CromwellFileSystems import cromwell.core.logging.LoggingTest._ import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpecLike +import spray.json.{JsNumber, JsObject, JsString} import wom.graph.CommandCallNode import scala.concurrent.duration._ @@ -99,6 +99,40 @@ class TesInitializationActorSpec extends TestKitSuite } } + "fail to start when WorkflowExecutionIdentity is not a string" in { + within(Timeout) { + val workflowOptionsWithNumber = WorkflowOptions( + JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsString("5"))) + ) + val workflowDescriptor = buildWdlWorkflowDescriptor(HelloWorld, + runtime = """runtime { docker: "ubuntu/latest" test: true }""", + options = workflowOptionsWithNumber) + val backend = getActorRef(workflowDescriptor, workflowDescriptor.callable.taskCallNodes, conf) + backend ! Initialize + expectMsgPF() { + case InitializationSuccess(_) => + case InitializationFailed(f) => fail(s"InitializationSuccess was expected but got $f") + } + } + } + + "successfully start when WorkflowExecutionIdentity is a string" in { + within(Timeout) { + val workflowOptionsWithNumber = WorkflowOptions( + JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsNumber(5))) + ) + val workflowDescriptor = buildWdlWorkflowDescriptor(HelloWorld, + runtime = """runtime { docker: "ubuntu/latest" test: true }""", + options = workflowOptionsWithNumber) + val backend = getActorRef(workflowDescriptor, workflowDescriptor.callable.taskCallNodes, conf) + backend ! Initialize + expectMsgPF() { + case InitializationSuccess(s) => fail(s"InitializationFailed was expected but got $s") + case InitializationFailed(_) => + } + } + } + "return InitializationFailed when docker runtime attribute key is not present" taggedAs PostWomTest ignore { within(Timeout) { val workflowDescriptor = buildWdlWorkflowDescriptor(HelloWorld, runtime = """runtime { }""") diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index 19c5d74fb70..d27e3234a8f 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -6,7 +6,7 @@ import cromwell.backend.validation.ContinueOnReturnCodeSet import cromwell.core.WorkflowOptions import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import spray.json.{JsNumber, JsObject, JsString, JsValue} +import spray.json.{JsObject, JsString, JsValue} class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with BackendSpec { @@ -49,33 +49,4 @@ class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers wit == Resources(None, None, None, Option(false), None, Option(Map.empty[String, String])) ) } - - it should "create the correct response when a numeric identity is passed in WorkflowOptions" in { - val wd = buildWdlWorkflowDescriptor( - TestWorkflows.HelloWorld, - None, - WorkflowOptions( - JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsNumber(5))) - ) - ) - assert( - TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "5"))) - ) - } - - // TODO this isn't actually the behavior we want - it should "silently do nothing when the identity passed in WorkflowOptions is an object" in { - val wd = buildWdlWorkflowDescriptor( - TestWorkflows.HelloWorld, - None, - WorkflowOptions( - JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsObject(Map("hi" -> JsString("there"))))) - ) - ) - assert( - TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Option(false), None, Option(Map.empty[String, String])) - ) - } } From c163d81e92d4c5001cb48c3c22efbb2c464d7bed Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Wed, 20 Oct 2021 13:14:24 -0400 Subject: [PATCH 12/29] Fix test names --- .../backend/impl/tes/TesInitializationActorSpec.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala index 4fa8073c5e3..b15599c8b4c 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala @@ -99,14 +99,14 @@ class TesInitializationActorSpec extends TestKitSuite } } - "fail to start when WorkflowExecutionIdentity is not a string" in { + "successfully start when WorkflowExecutionIdentity is a string" in { within(Timeout) { - val workflowOptionsWithNumber = WorkflowOptions( + val workflowOptions = WorkflowOptions( JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsString("5"))) ) val workflowDescriptor = buildWdlWorkflowDescriptor(HelloWorld, runtime = """runtime { docker: "ubuntu/latest" test: true }""", - options = workflowOptionsWithNumber) + options = workflowOptions) val backend = getActorRef(workflowDescriptor, workflowDescriptor.callable.taskCallNodes, conf) backend ! Initialize expectMsgPF() { @@ -116,7 +116,7 @@ class TesInitializationActorSpec extends TestKitSuite } } - "successfully start when WorkflowExecutionIdentity is a string" in { + "fail to start when WorkflowExecutionIdentity is not a string" in { within(Timeout) { val workflowOptionsWithNumber = WorkflowOptions( JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsNumber(5))) From 0f6ab3dd4b5b4d053012ac6c0ff7be6f1e8ce8f2 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 21 Oct 2021 09:35:21 -0400 Subject: [PATCH 13/29] PR feedback --- .../standard/StandardInitializationActor.scala | 4 ++-- .../backend/impl/tes/TesInitializationActor.scala | 4 ++-- .../impl/tes/TesInitializationActorSpec.scala | 10 ++++++++-- .../cromwell/backend/impl/tes/TesTaskSpec.scala | 15 ++++++--------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala index 89420f95525..95e898d6711 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala @@ -12,7 +12,7 @@ import wom.graph.CommandCallNode import wom.values.WomValue import scala.concurrent.Future -import scala.util.Try +import scala.util.{Success, Try} trait StandardInitializationActorParams { def workflowDescriptor: BackendWorkflowDescriptor @@ -82,7 +82,7 @@ class StandardInitializationActor(val standardParams: StandardInitializationActo RuntimeAttributesDefault.workflowOptionsDefault(options, runtimeAttributesBuilder.coercionMap) } - def validateWorkflowOptions(): Try[Unit] = Try(()) + def validateWorkflowOptions(): Try[Unit] = Success(()) def checkForUnsupportedRuntimeAttributes(): Try[Unit] = Try { calls foreach { call => diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala index e470f6671fb..d71d89ab8ae 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala @@ -40,8 +40,8 @@ class TesInitializationActor(params: TesInitializationActorParams) workflowDescriptor.workflowOptions.toMap.get(TesWorkflowOptionKeys.WorkflowExecutionIdentity) match { case None => Success(()) case Some(_: JsString) => Success(()) - case _ => Failure( - new Exception(s"Workflow option ${TesWorkflowOptionKeys.WorkflowExecutionIdentity} must be a string.") + case Some(v) => Failure( + new Exception(s"Workflow option ${TesWorkflowOptionKeys.WorkflowExecutionIdentity} must be a string, was ${v}.") ) } diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala index b15599c8b4c..a559d376d61 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala @@ -116,7 +116,7 @@ class TesInitializationActorSpec extends TestKitSuite } } - "fail to start when WorkflowExecutionIdentity is not a string" in { + "return InitializationFailed when WorkflowExecutionIdentity is not a string" in { within(Timeout) { val workflowOptionsWithNumber = WorkflowOptions( JsObject(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> JsNumber(5))) @@ -128,7 +128,13 @@ class TesInitializationActorSpec extends TestKitSuite backend ! Initialize expectMsgPF() { case InitializationSuccess(s) => fail(s"InitializationFailed was expected but got $s") - case InitializationFailed(_) => + case InitializationFailed(failure) => { + val expectedMsg = s"Workflow option ${TesWorkflowOptionKeys.WorkflowExecutionIdentity} must be a string" + Option(failure.getMessage) match { + case Some(m) if m.contains(expectedMsg) => + case _ => fail(s"Exception message does not contain '${expectedMsg}'") + } + } } } } diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index d27e3234a8f..47a29f705fb 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -28,25 +28,22 @@ class TesTaskSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers wit it should "create the correct resources when an identity is passed in WorkflowOptions" in { val wd = workflowDescriptorWithIdentity(Option("abc123")) - assert( - TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "abc123"))) + TesTask.makeResources(runtimeAttributes, wd) shouldEqual + Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "abc123")) ) } it should "create the correct resources when an empty identity is passed in WorkflowOptions" in { val wd = workflowDescriptorWithIdentity(Option("")) - assert( - TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> ""))) + TesTask.makeResources(runtimeAttributes, wd) shouldEqual + Resources(None, None, None, Option(false), None, Option(Map(TesWorkflowOptionKeys.WorkflowExecutionIdentity -> "")) ) } it should "create the correct resources when no identity is passed in WorkflowOptions" in { val wd = workflowDescriptorWithIdentity(None) - assert( - TesTask.makeResources(runtimeAttributes, wd) - == Resources(None, None, None, Option(false), None, Option(Map.empty[String, String])) + TesTask.makeResources(runtimeAttributes, wd) shouldEqual + Resources(None, None, None, Option(false), None, Option(Map.empty[String, String]) ) } } From 7aeffd9c8d84cfbebf8d15921e0db9803355148b Mon Sep 17 00:00:00 2001 From: Christina Ahrens Roberts Date: Thu, 21 Oct 2021 10:52:32 -0400 Subject: [PATCH 14/29] Add some clarifying notes for newbies (BW-856). (#6544) * Add some clarifying notes for newbies. --- processes/release_processes/README.MD | 32 ++++++++++++++------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/processes/release_processes/README.MD b/processes/release_processes/README.MD index d05bbf243a1..ab23d1f37a6 100644 --- a/processes/release_processes/README.MD +++ b/processes/release_processes/README.MD @@ -15,8 +15,8 @@ will need to be on the Broad internal network or VPN to open the following links 1. Tests for various backends supported by Cromwell. Log into Jenkins [here](https://fc-jenkins.dsp-techops.broadinstitute.org), check the tests [here](https://fc-jenkins.dsp-techops.broadinstitute.org/job/cromwell-cron-parent/). 1. Tests for Cromwell in Terra environment. Log into Jenkins [here](https://fc-jenkins.dsp-techops.broadinstitute.org), check the tests [here](https://fc-jenkins.dsp-techops.broadinstitute.org/view/Batch/). 1. [Run the publish script to create a new version of Cromwell](#how-to-publish-a-new-cromwell-version) -1. [Run through the "How to Release Cromwell into Firecloud" process](#how-to-release-cromwell-into-firecloud) -1. [Run through the "How to Deploy Cromwell in CAAS prod" process](#how-to-deploy-cromwell-in-caas-prod) +1. [Run through the "How to Release Cromwell into Firecloud/Terra" process](#how-to-release-cromwell-into-firecloud--terra) +1. [Run through the "How to Deploy Cromwell in CaaS prod" process](#how-to-deploy-cromwell-in-caas-staging-and-caas-prod) ### How to publish a new Cromwell version @@ -40,7 +40,6 @@ The release WDL uses a github token to perform actions on your behalf. This is optional, but I find it useful. Make or copy the following files into some temporary `releases/` directory: -* A cromwell jar file, preferably the most recent Cromwell version. * A copy of the workflow file to run (https://github.com/broadinstitute/cromwell/blob/develop/publish/publish_workflow.wdl) * An inputs json like this: @@ -56,8 +55,8 @@ This is optional, but I find it useful. Make or copy the following files into so #### Make sure Docker will have enough memory -I had to follow the instructions [here](https://docs.docker.com/docker-for-mac/#resources) to increase my Docker memory. -I chose to increase it from 2GB to 8GB; 4GB is not sufficient. +Follow the instructions [here](https://docs.docker.com/docker-for-mac/#resources) to increase Docker memory. +Ensure you have at least 8GB; 4GB is not sufficient. #### Let people know the publish is underway @@ -66,12 +65,13 @@ the release is published. #### Run the `publish_workflow.wdl` Workflow -* Run the Cromwell instance using the Local backend. The publish workflow is quite resource intensive; it's a good idea to - shut down other resource intensive apps before launching it to avoid painfully slow or failed executions. - * In server mode with a persistent backing database is probably a good idea - it will allow call caching to happen if you need to restart for any reason. - Some instructions for using a Dockerized MySQL server and CI config [here](#cromwell-setup-for-publishing). +Run Cromwell in server mode with a persistent backing database, using Docker containers. This allows call caching to happen if you need to restart for any reason. +See instructions for using a Dockerized MySQL server and CI config [here](#cromwell-setup-for-publishing). -* Submit the workflow to Cromwell along with the inputs file. +Note that the publish workflow is quite resource intensive; it's a good idea to +shut down other resource intensive apps before launching it to avoid painfully slow or failed executions. + +Using the Swagger API, submit the workflow to Cromwell along with the inputs file. #### Make sure it all went swimmingly @@ -83,25 +83,27 @@ the release is published. ### How to Release Cromwell into Firecloud / Terra -**Note:** If the Cromwell CHANGELOG indicates that the upgrade might take some time (e.g. because of a database migration), checking in with the release engineer +**Note:** If the Cromwell CHANGELOG indicates that the upgrade might take some time (e.g., because of a database migration), checking in with the release engineer and user support/comms to let them know that the upgrade may involve downtime is also required. You may need to help draft an impact statement and co-ordinate timing the deploy to make sure user impact in minimized. -**Note:** How to accomplish some of these steps might be non-obvious to you (e.g. generating the release notes). +**Note:** How to accomplish some of these steps might be non-obvious to you (e.g., generating the release notes). If so, refer to the additional details in the [full document](https://docs.google.com/document/d/1EEzwemE8IedCplIwL506fiqXr0262Pz4G0x6Cr6V-5E). ![firecloud-develop](firecloud-develop.dot.png) ### How to Deploy Cromwell in CaaS staging and CaaS prod -**Note:** If the Cromwell CHANGELOG indicates that the upgrade might take some time (eg because of a database migration), checking in with the CaaS users +CaaS is "Cromwell as a Service". It is used by a couple of Broad teams (Pipelines and Epigenomics), though the long-term plan is for those teams to migrate to using Terra. + +**Note:** If the Cromwell CHANGELOG indicates that the upgrade might take some time (e.g., because of a database migration), checking in with the CaaS users to let them know that the upgrade is about to happen is a good idea. -Deploying to CAAS is detailed in the [Quick CAAS Deployment Guide](https://docs.google.com/document/d/1s0YC-oohJ7o-OGcgnH_-YBtIEKmLIPTRpG36yvWxUpE) +Deploying to CaaS is detailed in the [Quick CaaS Deployment Guide](https://docs.google.com/document/d/1s0YC-oohJ7o-OGcgnH_-YBtIEKmLIPTRpG36yvWxUpE) ## Bonus Processes -The swagger client library is not part of our core publish/release process but can be performed from time to time, as required. +The Swagger client library is not part of our core publish/release process but can be performed from time to time, as required. ### How to Generate and Publish Swagger Client Library From 7a1eac0da48f0bf3009af3e7f152888316390c5c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 25 Oct 2021 11:01:29 -0400 Subject: [PATCH 15/29] Add Azure package dependencies --- project/Dependencies.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index be85b02b4df..c6f967df1d9 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -11,6 +11,11 @@ object Dependencies { private val ammoniteOpsV = "2.4.0" private val apacheHttpClientV = "4.5.13" private val awsSdkV = "2.17.29" + // We would like to use the BOM to manage Azure SDK versions, but SBT doesn't support it. + // https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/boms/azure-sdk-bom + // https://github.com/sbt/sbt/issues/4531 + private val azureIdentitySdkV = "1.4.0" + private val azureKeyVaultSdkV = "4.3.4" private val betterFilesV = "3.9.1" /* cats-effect, fs2, http4s, and sttp (also to v3) should all be upgraded at the same time to use cats-effect 3.x. @@ -514,7 +519,10 @@ object Dependencies { val bcsBackendDependencies: List[ModuleID] = commonDependencies ++ refinedTypeDependenciesList ++ aliyunBatchComputeDependencies - val tesBackendDependencies: List[ModuleID] = akkaHttpDependencies + val tesBackendDependencies: List[ModuleID] = List( + "com.azure" % "azure-identity" % azureIdentitySdkV, + "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV + ) ++ akkaHttpDependencies val sfsBackendDependencies = List ( "org.lz4" % "lz4-java" % lz4JavaV From 5ab2277454222e671253073c6de986108b3c5627 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 26 Oct 2021 13:42:55 -0400 Subject: [PATCH 16/29] AzureKeyVaultClient --- .../impl/tes/AzureKeyVaultClient.scala | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala new file mode 100644 index 00000000000..bce3aca0e14 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala @@ -0,0 +1,29 @@ +package cromwell.backend.impl.tes + +import com.azure.identity.DefaultAzureCredentialBuilder +import com.azure.security.keyvault.secrets.{SecretClient, SecretClientBuilder} +import common.validation.ErrorOr +import common.validation.ErrorOr._ + +class AzureKeyVaultClient(client: SecretClient) { + def getSecret(secretName: String): ErrorOr[String] = { + ErrorOr(client.getSecret(secretName)).map(_.getValue) + } +} + +object AzureKeyVaultClient { + def apply(vaultName: String, identityClientId: Option[String]): ErrorOr[AzureKeyVaultClient] = ErrorOr { + val defaultCreds = identityClientId.map(identityId => + new DefaultAzureCredentialBuilder().managedIdentityClientId(identityId) + ).getOrElse( + new DefaultAzureCredentialBuilder() + ).build() + + val client = new SecretClientBuilder() + .vaultUrl(s"https://${vaultName}.vault.azure.net") + .credential(defaultCreds) + .buildClient() + + new AzureKeyVaultClient(client) + } +} \ No newline at end of file From 803872dcc568eb517650f3ccba708d6108b3b600 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 26 Oct 2021 13:43:25 -0400 Subject: [PATCH 17/29] Access secret from KeyVault in TES backend --- .../impl/tes/TesAsyncBackendJobExecutionActor.scala | 13 ++++++++++++- .../backend/impl/tes/TesConfiguration.scala | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala index 808c255409f..4304e0334a4 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala @@ -11,7 +11,8 @@ import akka.http.scaladsl.model._ import akka.http.scaladsl.unmarshalling.{Unmarshal, Unmarshaller} import akka.stream.ActorMaterializer import akka.util.ByteString -import common.validation.ErrorOr.ErrorOr +import cats.data.Validated.{Invalid, Valid} +import common.validation.ErrorOr._ import common.validation.Validation._ import cromwell.backend.BackendJobLifecycleActor import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, PendingExecutionHandle} @@ -168,6 +169,16 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn } override def executeAsync(): Future[ExecutionHandle] = { + + // BT-426 temporary log msg to exercise KeyVault access + val executionIdentity = workflowDescriptor.workflowOptions.get(TesWorkflowOptionKeys.WorkflowExecutionIdentity).toOption + val keyVaultClient = AzureKeyVaultClient(tesConfiguration.azureKeyVaultName, executionIdentity) + val secretMessage = keyVaultClient.flatMap(_.getSecret(tesConfiguration.azureB2CTokenSecretName)) match { + case Valid(secret) => s"Successfully accessed a secret of length ${secret.length} from KeyVault" + case Invalid(err) => s"Couldn't access secret: $err" + } + jobLogger.info(secretMessage) + // create call exec dir tesJobPaths.callExecutionRoot.createPermissionedDirectories() val taskMessageFuture = createTaskMessage().fold( diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala index e6edc596266..bde71efc98c 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala @@ -5,4 +5,6 @@ import cromwell.backend.BackendConfigurationDescriptor class TesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { val endpointURL = configurationDescriptor.backendConfig.getString("endpoint") val runtimeConfig = configurationDescriptor.backendRuntimeAttributesConfig + val azureKeyVaultName = configurationDescriptor.backendConfig.getString("azure-keyvault-name") + val azureB2CTokenSecretName = configurationDescriptor.backendConfig.getString("azure-token-secret") } From 22ca1517e76be62617edc2fc6dcd44ee7a3d6cc7 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 26 Oct 2021 14:58:31 -0400 Subject: [PATCH 18/29] Fix build --- project/Dependencies.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index c6f967df1d9..60ee08f071d 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -520,8 +520,8 @@ object Dependencies { val bcsBackendDependencies: List[ModuleID] = commonDependencies ++ refinedTypeDependenciesList ++ aliyunBatchComputeDependencies val tesBackendDependencies: List[ModuleID] = List( - "com.azure" % "azure-identity" % azureIdentitySdkV, - "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV + "com.azure" % "azure-identity" % azureIdentitySdkV % "provided", + "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV % "provided" ) ++ akkaHttpDependencies val sfsBackendDependencies = List ( From 30c247e1b8b5b09e91932b9c4db3594e4af7e4b2 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Wed, 27 Oct 2021 14:46:32 -0400 Subject: [PATCH 19/29] Add newline at end of file --- .../scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala index bce3aca0e14..7c9af4bf126 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala @@ -26,4 +26,4 @@ object AzureKeyVaultClient { new AzureKeyVaultClient(client) } -} \ No newline at end of file +} From a0858faa1673256a2bbe95c71d971514a03c2ffa Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Wed, 27 Oct 2021 16:37:34 -0400 Subject: [PATCH 20/29] Don't crash on missing config --- .../src/main/scala/cromwell/backend/backend.scala | 6 ++++++ .../impl/tes/TesAsyncBackendJobExecutionActor.scala | 13 ++++++++----- .../backend/impl/tes/TesConfiguration.scala | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index ea413c10367..509a6fe58dc 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -94,6 +94,12 @@ case class BackendWorkflowDescriptor(id: WorkflowId, */ case class BackendConfigurationDescriptor(backendConfig: Config, globalConfig: Config) { + def getOptBackendConfString(configPath: String): Option[String] = + if (backendConfig.hasPath(configPath)) + Option(backendConfig.getString(configPath)) + else + None + lazy val backendRuntimeAttributesConfig = if (backendConfig.hasPath("default-runtime-attributes")) Option(backendConfig.getConfig("default-runtime-attributes")) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala index 4304e0334a4..438e90e2cea 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala @@ -172,11 +172,14 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn // BT-426 temporary log msg to exercise KeyVault access val executionIdentity = workflowDescriptor.workflowOptions.get(TesWorkflowOptionKeys.WorkflowExecutionIdentity).toOption - val keyVaultClient = AzureKeyVaultClient(tesConfiguration.azureKeyVaultName, executionIdentity) - val secretMessage = keyVaultClient.flatMap(_.getSecret(tesConfiguration.azureB2CTokenSecretName)) match { - case Valid(secret) => s"Successfully accessed a secret of length ${secret.length} from KeyVault" - case Invalid(err) => s"Couldn't access secret: $err" - } + val secretMessage: String = + (tesConfiguration.azureKeyVaultName, tesConfiguration.azureB2CTokenSecretName).mapN { case (keyVaultName, secretName) => + AzureKeyVaultClient(keyVaultName, executionIdentity) + .flatMap(_.getSecret(secretName)) match { + case Valid(secret) => s"Successfully accessed a secret of length ${secret.length} from KeyVault" + case Invalid(err) => s"Couldn't access KeyVault secret: $err" + } + }.getOrElse("Skipping KeyVault test, didn't find necessary config items") jobLogger.info(secretMessage) // create call exec dir diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala index bde71efc98c..85002be494d 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala @@ -5,6 +5,6 @@ import cromwell.backend.BackendConfigurationDescriptor class TesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { val endpointURL = configurationDescriptor.backendConfig.getString("endpoint") val runtimeConfig = configurationDescriptor.backendRuntimeAttributesConfig - val azureKeyVaultName = configurationDescriptor.backendConfig.getString("azure-keyvault-name") - val azureB2CTokenSecretName = configurationDescriptor.backendConfig.getString("azure-token-secret") + val azureKeyVaultName = configurationDescriptor.getOptBackendConfString("azure-keyvault-name") + val azureB2CTokenSecretName = configurationDescriptor.getOptBackendConfString("azure-token-secret") } From a532e2f721895208682e648af767cb1b4f6bf140 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 28 Oct 2021 14:05:14 -0400 Subject: [PATCH 21/29] PR feedback --- backend/src/main/scala/cromwell/backend/backend.scala | 6 ------ project/Dependencies.scala | 6 ++++-- .../cromwell/backend/impl/tes/AzureKeyVaultClient.scala | 4 +--- .../scala/cromwell/backend/impl/tes/TesConfiguration.scala | 5 +++-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index 509a6fe58dc..ea413c10367 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -94,12 +94,6 @@ case class BackendWorkflowDescriptor(id: WorkflowId, */ case class BackendConfigurationDescriptor(backendConfig: Config, globalConfig: Config) { - def getOptBackendConfString(configPath: String): Option[String] = - if (backendConfig.hasPath(configPath)) - Option(backendConfig.getString(configPath)) - else - None - lazy val backendRuntimeAttributesConfig = if (backendConfig.hasPath("default-runtime-attributes")) Option(backendConfig.getConfig("default-runtime-attributes")) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 60ee08f071d..e639cf65cc7 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -519,10 +519,12 @@ object Dependencies { val bcsBackendDependencies: List[ModuleID] = commonDependencies ++ refinedTypeDependenciesList ++ aliyunBatchComputeDependencies - val tesBackendDependencies: List[ModuleID] = List( + val tesBackendAzureDependencies: List[ModuleID] = List( "com.azure" % "azure-identity" % azureIdentitySdkV % "provided", "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV % "provided" - ) ++ akkaHttpDependencies + ) + + val tesBackendDependencies: List[ModuleID] = tesBackendAzureDependencies ++ akkaHttpDependencies val sfsBackendDependencies = List ( "org.lz4" % "lz4-java" % lz4JavaV diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala index 7c9af4bf126..55bae139b0b 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/AzureKeyVaultClient.scala @@ -6,9 +6,7 @@ import common.validation.ErrorOr import common.validation.ErrorOr._ class AzureKeyVaultClient(client: SecretClient) { - def getSecret(secretName: String): ErrorOr[String] = { - ErrorOr(client.getSecret(secretName)).map(_.getValue) - } + def getSecret(secretName: String): ErrorOr[String] = ErrorOr(client.getSecret(secretName).getValue) } object AzureKeyVaultClient { diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala index 85002be494d..8600379b4c1 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala @@ -1,10 +1,11 @@ package cromwell.backend.impl.tes import cromwell.backend.BackendConfigurationDescriptor +import net.ceedubs.ficus.Ficus._ class TesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { val endpointURL = configurationDescriptor.backendConfig.getString("endpoint") val runtimeConfig = configurationDescriptor.backendRuntimeAttributesConfig - val azureKeyVaultName = configurationDescriptor.getOptBackendConfString("azure-keyvault-name") - val azureB2CTokenSecretName = configurationDescriptor.getOptBackendConfString("azure-token-secret") + val azureKeyVaultName = configurationDescriptor.backendConfig.as[Option[String]]("azure-keyvault-name") + val azureB2CTokenSecretName = configurationDescriptor.backendConfig.as[Option[String]]("azure-token-secret") } From e46adf05cc96343c4b4b9f081e68160b7f178ded Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Fri, 29 Oct 2021 12:13:35 -0400 Subject: [PATCH 22/29] Enable preflight Liquibase migrations by removing custom log code [BW-875] (#6551) --- project/Dependencies.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index be85b02b4df..e8ef8682357 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -242,10 +242,7 @@ object Dependencies { ) private val liquibaseDependencies = List( - "org.liquibase" % "liquibase-core" % liquibaseV, - // This is to stop liquibase from being so noisy by default - // See: http://stackoverflow.com/questions/20880783/how-to-get-liquibase-to-log-using-slf4j - "com.mattbertolini" % "liquibase-slf4j" % liquibaseSlf4jV + "org.liquibase" % "liquibase-core" % liquibaseV ) private val akkaDependencies = List( From a9b66a1dda78df857b5f96869be11611432aab8b Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Mon, 1 Nov 2021 16:43:02 -0400 Subject: [PATCH 23/29] Apply some excludes for Azure artifacts that were causing collisions in `sbt assembly`. --- project/Dependencies.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index e639cf65cc7..b7bd70fe724 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -520,8 +520,12 @@ object Dependencies { val bcsBackendDependencies: List[ModuleID] = commonDependencies ++ refinedTypeDependenciesList ++ aliyunBatchComputeDependencies val tesBackendAzureDependencies: List[ModuleID] = List( - "com.azure" % "azure-identity" % azureIdentitySdkV % "provided", - "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV % "provided" + "com.azure" % "azure-identity" % azureIdentitySdkV + exclude("jakarta.xml.bind", "jakarta.xml.bind-api") + exclude("jakarta.activation", "jakarta.activation-api"), + "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV + exclude("jakarta.xml.bind", "jakarta.xml.bind-api") + exclude("jakarta.activation", "jakarta.activation-api") ) val tesBackendDependencies: List[ModuleID] = tesBackendAzureDependencies ++ akkaHttpDependencies From c997b3b16e64b0c89e7b221acc19e663be171c78 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 2 Nov 2021 12:45:05 -0400 Subject: [PATCH 24/29] Clarify VPC docs re literals vs labels [BT-400] (#6554) --- docs/backends/Google.md | 88 +++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/docs/backends/Google.md b/docs/backends/Google.md index 404c391b453..cf694125de6 100644 --- a/docs/backends/Google.md +++ b/docs/backends/Google.md @@ -311,48 +311,51 @@ This filesystem has two required configuration options: ### Virtual Private Network -To run your jobs in a private network add the `virtual-private-cloud` stanza in the `config` stanza of the PAPI v2 backend: +Cromwell can arrange for jobs to run in specific GCP private networks via the `config.virtual-private-cloud` stanza of a PAPI v2 backend. +There are two ways of specifying private networks: -#### Virtual Private Network via Labels +* [Literal network and subnetwork values](#virtual-private-network-via-literals) that will apply to all projects +* [Google project labels](#virtual-private-network-via-labels) whose values in a particular Google project will specify the network and subnetwork + +#### Virtual Private Network via Literals ```hocon backend { ... providers { - ... - PapiV2 { - actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory" - config { - ... - virtual-private-cloud { - network-label-key = "my-private-network" - subnetwork-label-key = "my-private-subnetwork" - auth = "reference-to-auth-scheme" - } - ... - } + ... + PapiV2 { + actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory" + config { + ... + virtual-private-cloud { + network-name = "vpc-network" + subnetwork-name = "vpc-subnetwork" + } + ... } + } } } ``` +The `network-name` and `subnetwork-name` should reference the name of your private network and subnetwork within that +network respectively. The `subnetwork-name` is an optional config. -The `network-label-key` and `subnetwork-label-key` should reference the keys in your project's labels whose value is the name of your private network -and subnetwork within that network respectively. `auth` should reference an auth scheme in the `google` stanza which will be used to get the project metadata from Google Cloud. -The `subnetwork-label-key` is an optional config. +For example, if your `virtual-private-cloud` config looks like the one above, then Cromwell will use the value of the +configuration key, which is `vpc-network` here, as the name of private network and run the jobs on this network. +If the network name is not present in the config Cromwell will fall back to trying to run jobs on the default network. -For example, if your `virtual-private-cloud` config looks like the one above, and one of the labels in your project is +If the `network-name` or `subnetwork-name` values contain the string `${projectId}` then that value will be replaced +by Cromwell with the name of the project running the Pipelines API. -``` -"my-private-network" = "vpc-network" -``` +If the `network-name` does not contain a `/` then it will be prefixed with `projects/${projectId}/global/networks/`. -Cromwell will get labels from the project's metadata and look for a label whose key is `my-private-network`. -Then it will use the value of the label, which is `vpc-network` here, as the name of private network and run the jobs on this network. -If the network key is not present in the project's metadata Cromwell will fall back to trying to run jobs using literal -network labels, and then fall back to running on the default network. +Cromwell will then pass the network and subnetwork values to the Pipelines API. See the documentation for the +[Cloud Life Sciences API](https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.pipelines/run#Network) +for more information on the various formats accepted for `network` and `subnetwork`. -#### Virtual Private Network via Literals +#### Virtual Private Network via Labels ```hocon backend { @@ -362,11 +365,12 @@ backend { PapiV2 { actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory" config { - ... - virtual-private-cloud { - network-name = "vpc-network" - subnetwork-name = "vpc-subnetwork" - } + ... + virtual-private-cloud { + network-label-key = "my-private-network" + subnetwork-label-key = "my-private-subnetwork" + auth = "reference-to-auth-scheme" + } ... } } @@ -374,21 +378,21 @@ backend { } ``` -The `network-name` and `subnetwork-name` should reference the name of your private network and subnetwork within that -network respectively. The `subnetwork-name` is an optional config. -For example, if your `virtual-private-cloud` config looks like the one above, then Cromwell will use the value of the -configuration key, which is `vpc-network` here, as the name of private network and run the jobs on this network. -If the network name is not present in the config Cromwell will fall back to trying to run jobs on the default network. +The `network-label-key` and `subnetwork-label-key` should reference the keys in your project's labels whose value is the name of your private network +and subnetwork within that network respectively. `auth` should reference an auth scheme in the `google` stanza which will be used to get the project metadata from Google Cloud. +The `subnetwork-label-key` is an optional config. -If the `network-name` or `subnetwork-name` values contain the string `${projectId}` then that value will be replaced -by Cromwell with the name of the project running the Pipelines API. +For example, if your `virtual-private-cloud` config looks like the one above, and one of the labels in your project is -If the `network-name` does not contain a `/` then it will be prefixed with `projects/${projectId}/global/networks/`. +``` +"my-private-network" = "vpc-network" +``` -Cromwell will then pass the network and subnetwork values to the Pipelines API. See the documentation for the -[Cloud Life Sciences API](https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.pipelines/run#Network) -for more information on the various formats accepted for `network` and `subnetwork`. +Cromwell will get labels from the project's metadata and look for a label whose key is `my-private-network`. +Then it will use the value of the label, which is `vpc-network` here, as the name of private network and run the jobs on this network. +If the network key is not present in the project's metadata Cromwell will fall back to trying to run jobs using literal +network labels, and then fall back to running on the default network. ### Custom Google Cloud SDK container Cromwell can't use Google's container registry if VPC Perimeter is used in project. From bd9bfc5fb98c5babaef011ef664702c0df424203 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 2 Nov 2021 12:55:05 -0400 Subject: [PATCH 25/29] Cromwell DRS Localizer: Override default localization path [BT-418] (#6550) --- .../standardTestCases/drs_usa_hca.test | 2 +- .../standardTestCases/drs_usa_jdr.test | 6 +- .../drs_usa_jdr_preresolve.test | 8 +- .../cloud/nio/impl/drs/DrsPathResolver.scala | 7 +- .../cloud/nio/impl/drs/MockDrsPaths.scala | 10 ++ .../impl/drs/MockEngineDrsPathResolver.scala | 6 ++ .../filesystems/drs/DrsResolver.scala | 98 ++++++++++--------- .../filesystems/drs/DrsResolverSpec.scala | 12 +++ runConfigurations/Cromwell_server.run.xml | 2 + 9 files changed, 100 insertions(+), 51 deletions(-) diff --git a/centaur/src/main/resources/standardTestCases/drs_usa_hca.test b/centaur/src/main/resources/standardTestCases/drs_usa_hca.test index c97a75674b2..f8efd921d21 100644 --- a/centaur/src/main/resources/standardTestCases/drs_usa_hca.test +++ b/centaur/src/main/resources/standardTestCases/drs_usa_hca.test @@ -16,7 +16,7 @@ metadata { status: Succeeded "outputs.drs_usa_hca.path" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_4641bafb-5190-425b-aea9-9c7b125515c8_e37266ba-790d-4641-aa76-854d94be2fbe/E18_20161004_Neurons_Sample_49_S048_L004_R2_005.fastq.gz" + "/cromwell_root/drs_localization_paths/hca_dev_20201217_test4/5acd55ef/E18_20161004_Neurons_Sample_49_S048_L004_R2_005.fastq.gz" "outputs.drs_usa_hca.hash" = "badf266412ff0e307232421e56d647ed" "outputs.drs_usa_hca.size" = 438932948 "outputs.drs_usa_hca.cloud" = diff --git a/centaur/src/main/resources/standardTestCases/drs_usa_jdr.test b/centaur/src/main/resources/standardTestCases/drs_usa_jdr.test index c6d523f3937..4025860ed8d 100644 --- a/centaur/src/main/resources/standardTestCases/drs_usa_jdr.test +++ b/centaur/src/main/resources/standardTestCases/drs_usa_jdr.test @@ -16,16 +16,16 @@ metadata { status: Succeeded "outputs.drs_usa_jdr.path1" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_f90f5d7f-c507-4e56-abfc-b965a66023fb_585f3f19-985f-43b0-ab6a-79fa4c8310fc/hello_jade.json" + "/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json" "outputs.drs_usa_jdr.path2" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_001afc86-da4c-4739-85be-26ca98d2693f_ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json" + "/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs2/hello_jade_2.json" "outputs.drs_usa_jdr.hash1" = "faf12e94c25bef7df62e4a5eb62573f5" "outputs.drs_usa_jdr.hash2" = "19e1b021628130fda04c79ee9a056b67" "outputs.drs_usa_jdr.size1" = 18.0 "outputs.drs_usa_jdr.size2" = 38.0 # This JDR file has a gsUri that doesn't end in /fileName so it must be downloaded with the DRS localizer "outputs.drs_usa_jdr.cloud1" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_f90f5d7f-c507-4e56-abfc-b965a66023fb_585f3f19-985f-43b0-ab6a-79fa4c8310fc/hello_jade.json" + "/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json" # This JDR file has a gsUri that can skip localization "outputs.drs_usa_jdr.cloud2" = "gs://broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json" diff --git a/centaur/src/main/resources/standardTestCases/drs_usa_jdr_preresolve.test b/centaur/src/main/resources/standardTestCases/drs_usa_jdr_preresolve.test index 4d6cfd57936..ab050aea69f 100644 --- a/centaur/src/main/resources/standardTestCases/drs_usa_jdr_preresolve.test +++ b/centaur/src/main/resources/standardTestCases/drs_usa_jdr_preresolve.test @@ -16,7 +16,11 @@ metadata { status: Succeeded "outputs.drs_usa_jdr.path1" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_f90f5d7f-c507-4e56-abfc-b965a66023fb_585f3f19-985f-43b0-ab6a-79fa4c8310fc/hello_jade.json" + "/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json" + # This JDR file has a gsUri that can be preresolved to a regular GCS file for improved localization performance. + # However this means that the file's container path is determined by the GCS localization logic and not the + # `localizationPath`-aware DRS localization logic. The GCS localization logic always uses a containerized version + # of the GCS path, which is what this expectation represents. "outputs.drs_usa_jdr.path2" = "/cromwell_root/broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json" "outputs.drs_usa_jdr.hash1" = "faf12e94c25bef7df62e4a5eb62573f5" @@ -25,7 +29,7 @@ metadata { "outputs.drs_usa_jdr.size2" = 38.0 # This JDR file has a gsUri that doesn't end in /fileName so it must be downloaded with the DRS localizer "outputs.drs_usa_jdr.cloud1" = - "/cromwell_root/jade.datarepo-dev.broadinstitute.org/v1_f90f5d7f-c507-4e56-abfc-b965a66023fb_585f3f19-985f-43b0-ab6a-79fa4c8310fc/hello_jade.json" + "/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json" # This JDR file has a gsUri that can skip localization "outputs.drs_usa_jdr.cloud2" = "gs://broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json" diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala index 2da9f0fe1f5..54bb89bcb16 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala @@ -151,6 +151,7 @@ object MarthaField extends Enumeration { val Hashes: MarthaField.Value = Value("hashes") val FileName: MarthaField.Value = Value("fileName") val AccessUrl: MarthaField.Value = Value("accessUrl") + val LocalizationPath: MarthaField.Value = Value("localizationPath") } final case class MarthaRequest(url: String, fields: NonEmptyList[MarthaField.Value]) @@ -171,6 +172,9 @@ final case class AccessUrl(url: String, headers: Option[Map[String, String]]) * @param fileName A possible different file name for the object at gsUri, ex: "gsutil cp gs://bucket/12/345 my.vcf" * @param hashes Hashes for the contents stored at gsUri * @param accessUrl URL to query for signed URL + * @param localizationPath Optional localization path. TDR is currently the sole DRS provider specifying this value in + * DRS metadata, via the `aliases` field. As this is a distinct field from `fileName` in DRS + * metadata it is also made a distinct field in this response object. */ final case class MarthaResponse(size: Option[Long] = None, timeCreated: Option[String] = None, @@ -180,7 +184,8 @@ final case class MarthaResponse(size: Option[Long] = None, googleServiceAccount: Option[SADataObject] = None, fileName: Option[String] = None, hashes: Option[Map[String, String]] = None, - accessUrl: Option[AccessUrl] = None + accessUrl: Option[AccessUrl] = None, + localizationPath: Option[String] = None ) // Adapted from https://github.com/broadinstitute/martha/blob/f31933a3a11e20d30698ec4b4dc1e0abbb31a8bc/common/helpers.js#L210-L218 diff --git a/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockDrsPaths.scala b/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockDrsPaths.scala index de61b0d7b8f..b6825dcbbd2 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockDrsPaths.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockDrsPaths.scala @@ -9,6 +9,8 @@ object MockDrsPaths { val mockToken = "mock.token" + val DrsLocalizationPathsContainer = "drs_localization_paths" + private val drsPathPrefix = "drs://drs-host" val drsRelativePath = "drs-host/4d427aa3-5640-4f00-81ae-c33443f84acf/f3b148ac-1802-4acc-a0b9-610ea266fb61" @@ -17,12 +19,20 @@ object MockDrsPaths { val gcsRelativePathWithFileName = "drs-host/d7c75399-bcd3-4762-90e9-434de005679b/file.txt" + val gcsRelativePathWithFileNameFromLocalizationPath = s"$DrsLocalizationPathsContainer/dir/subdir/file.txt" + + val gcsRelativePathWithFileNameFromAllThePaths = s"$DrsLocalizationPathsContainer/dir/subdir/file.txt" + val drsPathResolvingGcsPath = s"$drsPathPrefix/4d427aa3-5640-4f00-81ae-c33443f84acf" val drsPathWithNonPathChars = s"$drsPathPrefix/4d427aa3_5640_4f00_81ae_c33443f84acf" val drsPathResolvingWithFileName = s"$drsPathPrefix/d7c75399-bcd3-4762-90e9-434de005679b" + val drsPathResolvingWithLocalizationPath = s"$drsPathPrefix/1e7ecfa6-2a77-41d7-a251-38a2f4919842" + + val drsPathResolvingWithAllThePaths = s"$drsPathPrefix/0524678a-365e-42f3-a1e7-e4c6ac499b35" + val drsPathResolvingToNoGcsPath = s"$drsPathPrefix/226686cf-22c9-4472-9f79-7a0b0044f253" val drsPathNotExistingInMartha = s"$drsPathPrefix/5e21b8c3-8eda-48d5-9a04-2b32e1571765" diff --git a/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockEngineDrsPathResolver.scala b/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockEngineDrsPathResolver.scala index 45410656592..c6e48cb9f93 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockEngineDrsPathResolver.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/test/scala/cloud/nio/impl/drs/MockEngineDrsPathResolver.scala @@ -37,6 +37,10 @@ class MockEngineDrsPathResolver(drsConfig: DrsConfig = MockDrsPaths.mockDrsConfi private val marthaObjWithFileName = marthaObjWithGcsPath.copy(fileName = Option("file.txt")) + private val marthaObjWithLocalizationPath = marthaObjWithGcsPath.copy(localizationPath = Option("/dir/subdir/file.txt")) + + private val marthaObjWithAllThePaths = marthaObjWithLocalizationPath.copy(fileName = marthaObjWithFileName.fileName) + private val marthaObjWithNoGcsPath = marthaObjWithGcsPath.copy(gsUri = None) override def resolveDrsThroughMartha(drsPath: String, fields: NonEmptyList[MarthaField.Value]): IO[MarthaResponse] = { @@ -44,6 +48,8 @@ class MockEngineDrsPathResolver(drsConfig: DrsConfig = MockDrsPaths.mockDrsConfi case MockDrsPaths.drsPathResolvingGcsPath => IO(marthaObjWithGcsPath) case MockDrsPaths.drsPathWithNonPathChars => IO(marthaObjWithGcsPath) case MockDrsPaths.drsPathResolvingWithFileName => IO(marthaObjWithFileName) + case MockDrsPaths.drsPathResolvingWithLocalizationPath => IO.pure(marthaObjWithLocalizationPath) + case MockDrsPaths.drsPathResolvingWithAllThePaths => IO.pure(marthaObjWithAllThePaths) case MockDrsPaths.drsPathResolvingToNoGcsPath => IO(marthaObjWithNoGcsPath) case MockDrsPaths.drsPathNotExistingInMartha => IO.raiseError( diff --git a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala index 49896dacbda..0f24e803480 100644 --- a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala +++ b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala @@ -4,7 +4,7 @@ import cats.data.NonEmptyList import cats.effect.IO import cloud.nio.impl.drs.{DrsCloudNioFileSystemProvider, DrsPathResolver, MarthaField} import common.exception._ -import cromwell.core.path.DefaultPathBuilder +import cromwell.core.path.{DefaultPathBuilder, Path} import org.apache.commons.lang3.exception.ExceptionUtils import shapeless.syntax.typeable._ @@ -14,6 +14,8 @@ object DrsResolver { private val GcsProtocolLength: Int = 5 // length of 'gs://' + private val DrsLocalizationPathsContainer = "drs_localization_paths" + private def resolveError[A](pathAsString: String)(throwable: Throwable): IO[A] = { IO.raiseError( new RuntimeException( @@ -32,35 +34,39 @@ object DrsResolver { } yield drsFileSystemProvider.drsPathResolver } - private def getGsUriFileNameBondProvider(pathAsString: String, - drsPathResolver: DrsPathResolver - ): IO[(Option[String], Option[String], Option[String])] = { - val fields = NonEmptyList.of(MarthaField.GsUri, MarthaField.FileName, MarthaField.BondProvider) - for { - marthaResponse <- drsPathResolver.resolveDrsThroughMartha(pathAsString, fields) - } yield (marthaResponse.gsUri, marthaResponse.fileName, marthaResponse.bondProvider) + case class MarthaLocalizationData(gsUri: Option[String], + fileName: Option[String], + bondProvider: Option[String], + localizationPath: Option[String]) + + private def getMarthaLocalizationData(pathAsString: String, + drsPathResolver: DrsPathResolver): IO[MarthaLocalizationData] = { + val fields = NonEmptyList.of(MarthaField.GsUri, MarthaField.FileName, MarthaField.BondProvider, MarthaField.LocalizationPath) + + drsPathResolver.resolveDrsThroughMartha(pathAsString, fields) map { r => + MarthaLocalizationData(r.gsUri, r.fileName, r.bondProvider, r.localizationPath) + } } /** Returns the `gsUri` if it ends in the `fileName` and the `bondProvider` is empty. */ - private def getSimpleGsUri(gsUriOption: Option[String], - fileNameOption: Option[String], - bondProviderOption: Option[String], - ): Option[String] = { - for { - // Only return gsUri that do not use Bond - gsUri <- if (bondProviderOption.isEmpty) gsUriOption else None - // Only return the gsUri if there is no fileName or if gsUri ends in /fileName - if fileNameOption.forall(fileName => gsUri.endsWith(s"/$fileName")) - } yield gsUri + private def getSimpleGsUri(localizationData: MarthaLocalizationData): Option[String] = { + localizationData match { + // `gsUri` not defined so no gsUri can be returned. + case MarthaLocalizationData(None, _, _, _) => None + // `bondProvider` defined, cannot "preresolve" to GCS. + case MarthaLocalizationData(_, _, Some(_), _) => None + // Do not return the simple GS URI if the `fileName` from metadata is mismatched to the filename in the `gsUri`. + case MarthaLocalizationData(Some(gsUri), Some(fileName), _, _) if !gsUri.endsWith(s"/$fileName") => None + // Barring any of the situations above return the `gsUri`. + case MarthaLocalizationData(Some(gsUri), _, _, _) => Option(gsUri) + } } /** Returns the `gsUri` if it ends in the `fileName` and the `bondProvider` is empty. */ def getSimpleGsUri(pathAsString: String, drsPathResolver: DrsPathResolver): IO[Option[String]] = { - val gsUriIO = for { - tuple <- getGsUriFileNameBondProvider(pathAsString, drsPathResolver) - (gsUriOption, fileNameOption, bondProviderOption) = tuple - } yield getSimpleGsUri(gsUriOption, fileNameOption, bondProviderOption) + + val gsUriIO = getMarthaLocalizationData(pathAsString, drsPathResolver) map getSimpleGsUri gsUriIO.handleErrorWith(resolveError(pathAsString)) } @@ -76,35 +82,39 @@ object DrsResolver { def getContainerRelativePath(drsPath: DrsPath): IO[String] = { val pathIO = for { drsPathResolver <- getDrsPathResolver(drsPath) - tuple <- getGsUriFileNameBondProvider(drsPath.pathAsString, drsPathResolver) - (gsUriOption, fileNameOption, _) = tuple - /* - In the DOS/DRS spec file names are safe for file systems but not necessarily the DRS URIs. - Reuse the regex defined for ContentsObject.name, plus add "/" for directory separators. - https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_contentsobject - */ - rootPath = DefaultPathBuilder.get(drsPath.pathWithoutScheme.replaceAll("[^/A-Za-z0-9._-]", "_")) - fileName <- getFileName(fileNameOption, gsUriOption) - fullPath = rootPath.resolve(fileName) - fullPathString = fullPath.pathAsString - } yield fullPathString + localizationData <- getMarthaLocalizationData(drsPath.pathAsString, drsPathResolver) + containerRelativePath <- buildContainerRelativePath(localizationData, drsPath) + } yield containerRelativePath.pathAsString pathIO.handleErrorWith(resolveError(drsPath.pathAsString)) } - /** - * Return the file name returned from the martha response or get it from the gsUri - */ - private def getFileName(fileName: Option[String], gsUri: Option[String]): IO[String] = { - fileName match { - case Some(actualFileName) => IO.pure(actualFileName) - case None => - //Currently, Martha only supports resolving DRS paths to GCS paths + // Return the container relative path built from the Martha-specified localization path, file name, or gs URI. + private def buildContainerRelativePath(localizationData: MarthaLocalizationData, drsPath: Path): IO[Path] = { + // Return a relative path constructed from the DRS path minus the leading scheme. + // In the DOS/DRS spec file names are safe for file systems but not necessarily the DRS URIs. + // Reuse the regex defined for ContentsObject.name, plus add "/" for directory separators. + // https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_contentsobject + def drsPathRelativePath: Path = + DefaultPathBuilder.get(drsPath.pathWithoutScheme.replaceAll("[^/A-Za-z0-9._-]", "_")) + + localizationData match { + case MarthaLocalizationData(_, _, _, Some(localizationPath)) => + // TDR may return an explicit localization path and if so we should not use the `drsPathRelativePath`. + // We want to end up with something like /cromwell_root/drs_localization_paths/tdr/specified/path/foo.bam. + // Calling code will add the `/cromwell_root/`, so strip any leading slashes to make this a relative path: + val relativeLocalizationPath = if (localizationPath.startsWith("/")) localizationPath.tail else localizationPath + IO.fromTry(DefaultPathBuilder.build(DrsLocalizationPathsContainer).map(_.resolve(relativeLocalizationPath))) + case MarthaLocalizationData(_, Some(fileName), _, _) => + // Paths specified by filename only are made relative to `drsPathRelativePath`. + IO(drsPathRelativePath.resolve(fileName)) + case _ => + // If this logic is forced to fall back on the GCS path there better be a GCS path to fall back on. IO - .fromEither(gsUri.toRight(UrlNotFoundException(GcsScheme))) + .fromEither(localizationData.gsUri.toRight(UrlNotFoundException(GcsScheme))) .map(_.substring(GcsProtocolLength)) .map(DefaultPathBuilder.get(_)) - .map(_.name) + .map(path => drsPathRelativePath.resolve(path.name)) } } } diff --git a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsResolverSpec.scala b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsResolverSpec.scala index ff5b2ecbf6e..990f9eb8b5f 100644 --- a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsResolverSpec.scala +++ b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsResolverSpec.scala @@ -41,6 +41,18 @@ class DrsResolverSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers DrsResolver.getContainerRelativePath(drsPath).unsafeRunSync() should be (MockDrsPaths.gcsRelativePathWithFileName) } + it should "find DRS path from a localization path" in { + val drsPath = drsPathBuilder.build(MockDrsPaths.drsPathResolvingWithLocalizationPath).get.asInstanceOf[DrsPath] + + DrsResolver.getContainerRelativePath(drsPath).unsafeRunSync() should be (MockDrsPaths.gcsRelativePathWithFileNameFromLocalizationPath) + } + + it should "find DRS path from all the paths" in { + val drsPath = drsPathBuilder.build(MockDrsPaths.drsPathResolvingWithAllThePaths).get.asInstanceOf[DrsPath] + + DrsResolver.getContainerRelativePath(drsPath).unsafeRunSync() should be (MockDrsPaths.gcsRelativePathWithFileNameFromAllThePaths) + } + it should "throw GcsUrlNotFoundException when DRS path doesn't resolve to at least one GCS url" in { val drsPath = drsPathBuilder.build(MockDrsPaths.drsPathResolvingToNoGcsPath).get.asInstanceOf[DrsPath] diff --git a/runConfigurations/Cromwell_server.run.xml b/runConfigurations/Cromwell_server.run.xml index 3cd92753bfa..4f0cca0c434 100644 --- a/runConfigurations/Cromwell_server.run.xml +++ b/runConfigurations/Cromwell_server.run.xml @@ -8,6 +8,8 @@ + +