diff --git a/.github/issue_template.md b/.github/issue_template.md new file mode 100644 index 00000000000..b53afe08047 --- /dev/null +++ b/.github/issue_template.md @@ -0,0 +1,20 @@ + + + + + + + + + diff --git a/.sbtopts b/.sbtopts index 6e81646025c..fa87bb35c03 100644 --- a/.sbtopts +++ b/.sbtopts @@ -1,4 +1,4 @@ -J-Xms2g -J-Xmx4g -J-XX:MaxMetaspaceSize=2g - +-J-Xss8m diff --git a/CHANGELOG.md b/CHANGELOG.md index 32965198839..93358d1de00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Cromwell Change Log +## 33 Release Notes + +### Query endpoint + +#### Exclude workflows based on Labels + +This gives the ability to **filter out** workflows based on labels. Two new parameters called `excludeLabelAnd` and `excludeLabelOr` can be used for this purpose. +More details on how to use them can be found [here](http://cromwell.readthedocs.io/en/develop/api/RESTAPI/). + +#### Include/Exclude subworkflows + +Cromwell now supports excluding subworkflows from workflow query results using the `includeSubworkflows` parameter. By default they are included in the results. +More information can be found at [REST API](http://cromwell.readthedocs.io/en/develop/api/RESTAPI/). + +#### Query workflows by Submission time + +Cromwell now supports querying workflows by submission time. This will help find workflows that are submitted but not started yet (i.e. workflows which are +in On Hold state). More information can be found [here](http://cromwell.readthedocs.io/en/develop/api/RESTAPI/). + +#### Submission time in Workflow Query Response + +Submission time of a workflow is now included in WorkflowQueryResult, which is part of the response for workflow query. + +### File Localization (NIO) Hint + +Cromwell now allows tasks in WDL 1.0 can now specify an optimization in their `parameter_meta` that some `File` inputs do not need to be localized for the task to run successfully. +Full details are available in the [documentation page for this optimization](http://cromwell.readthedocs.io/en/develop/optimizations/FileLocalization). + +### Bug Fixes + +Workflows which are in 'On Hold' state can now be fetched using the query endpoint. + ## 32 Release Notes ### Backends diff --git a/CromIAM/src/main/resources/swagger/cromiam.yaml b/CromIAM/src/main/resources/swagger/cromiam.yaml index c73198d4f43..ca3df0e4c6f 100644 --- a/CromIAM/src/main/resources/swagger/cromiam.yaml +++ b/CromIAM/src/main/resources/swagger/cromiam.yaml @@ -60,9 +60,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}': post: summary: Submit a new workflow for execution @@ -143,9 +143,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/batch': post: summary: Submit a batch of new workflows for execution @@ -203,9 +203,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/{id}/outputs': get: summary: Query for workflow outputs based on workflow id @@ -234,9 +234,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/{id}/labels': patch: summary: Add new labels or update values for existing label keys by workflow id. @@ -275,9 +275,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/query': get: summary: Query workflows by start dates, end dates, names, ids, or statuses. @@ -356,9 +356,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile post: summary: Query workflows by start dates, end dates, names, ids, or statuses. parameters: @@ -391,9 +391,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/{id}/status': get: summary: Query for workflow status based on workflow id @@ -424,43 +424,45 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/{id}/releaseHold': - post: - summary: Switch a workflow from 'On Hold' to 'Submitted' status - description: Request Cromwell to release the hold on a workflow. It will switch the status of a workflow from 'On Hold' to 'Submitted' so it can be picked for running. For instance this might be necessary in cases where you have submitted a workflow with workflowOnHold = true. - parameters: - - name: version - description: API Version - required: true - type: string - in: path - default: v1 - - name: id - description: A workflow ID - required: true - type: string - in: path - tags: - - Workflows - responses: - '200': - description: Successful Request - schema: - $ref: '#/definitions/WorkflowStatusResponse' - '400': - description: Malformed Workflow ID - '403': - description: Malformed Request - '404': - description: Workflow ID Not Found - '500': - description: Internal Error - security: - - google_oauth: - - openid + post: + summary: Switch a workflow from 'On Hold' to 'Submitted' status + description: Request Cromwell to release the hold on a workflow. It will switch the status of a workflow from 'On Hold' to 'Submitted' so it can be picked for running. For instance this might be necessary in cases where you have submitted a workflow with workflowOnHold = true. + parameters: + - name: version + description: API Version + required: true + type: string + in: path + default: v1 + - name: id + description: A workflow ID + required: true + type: string + in: path + tags: + - Workflows + responses: + '200': + description: Successful Request + schema: + $ref: '#/definitions/WorkflowStatusResponse' + '400': + description: Malformed Workflow ID + '403': + description: Malformed Request + '404': + description: Workflow ID Not Found + '500': + description: Internal Error + security: + - googleoauth: + - openid + - email + - profile '/api/workflows/{version}/{id}/logs': get: summary: Query for the standard output and error of all calls in a workflow @@ -489,9 +491,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/{id}/metadata': get: summary: Query for workflow and call-level metadata for a specified workflow @@ -676,9 +678,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/callcaching/diff': get: summary: Return the hash differential between two calls @@ -732,9 +734,9 @@ paths: description: Internal Error security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/api/workflows/{version}/backends': get: summary: Returns the backends supported by this Cromwell. @@ -754,9 +756,9 @@ paths: $ref: '#/definitions/BackendResponse' security: - googleoauth: - - openid - - email - - profile + - openid + - email + - profile '/engine/{version}/version': get: summary: Returns the version of the Cromwell Engine @@ -774,11 +776,6 @@ paths: description: Successful Request schema: $ref: '#/definitions/VersionResponse' - security: - - googleoauth: - - openid - - email - - profile '/engine/{version}/status': get: summary: Returns the current health status of any monitored subsystems diff --git a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala index 862f8f16a83..c8e4f6fd545 100644 --- a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala @@ -4,8 +4,7 @@ import akka.actor.ActorLogging import akka.event.LoggingReceive import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.BackendLifecycleActor._ -import cromwell.backend.wdl.OutputEvaluator -import cromwell.backend.wdl.OutputEvaluator.EvaluatedJobOutputs +import cromwell.backend.OutputEvaluator.EvaluatedJobOutputs import cromwell.core.path.Path import cromwell.core._ import wom.expression.IoFunctionSet diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala index 6cba7ce50c9..b1c3f46b136 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala @@ -3,8 +3,8 @@ package cromwell.backend import akka.actor.{ActorRef, Props} import com.typesafe.config.Config import cromwell.backend.io.WorkflowPathsWithDocker -import cromwell.core.JobExecutionToken.JobExecutionTokenType import cromwell.core.CallOutputs +import cromwell.core.JobExecutionToken.JobExecutionTokenType import cromwell.core.path.Path import cromwell.core.path.PathFactory.PathBuilders import net.ceedubs.ficus.Ficus._ diff --git a/backend/src/main/scala/cromwell/backend/wdl/Command.scala b/backend/src/main/scala/cromwell/backend/Command.scala similarity index 95% rename from backend/src/main/scala/cromwell/backend/wdl/Command.scala rename to backend/src/main/scala/cromwell/backend/Command.scala index 1468ec06ee7..40310c68485 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/Command.scala +++ b/backend/src/main/scala/cromwell/backend/Command.scala @@ -1,8 +1,7 @@ -package cromwell.backend.wdl +package cromwell.backend import common.validation.ErrorOr._ import common.validation.Validation._ -import cromwell.backend.BackendJobDescriptor import wom.InstantiatedCommand import wom.callable.RuntimeEnvironment import wom.expression.IoFunctionSet diff --git a/backend/src/main/scala/cromwell/backend/wdl/FileSizeTooBig.scala b/backend/src/main/scala/cromwell/backend/FileSizeTooBig.scala similarity index 73% rename from backend/src/main/scala/cromwell/backend/wdl/FileSizeTooBig.scala rename to backend/src/main/scala/cromwell/backend/FileSizeTooBig.scala index a0cf7477a5a..2580d9218fc 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/FileSizeTooBig.scala +++ b/backend/src/main/scala/cromwell/backend/FileSizeTooBig.scala @@ -1,4 +1,4 @@ -package cromwell.backend.wdl +package cromwell.backend case class FileSizeTooBig(override val getMessage: String) extends Exception diff --git a/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala b/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala similarity index 98% rename from backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala rename to backend/src/main/scala/cromwell/backend/OutputEvaluator.scala index e6b966cae61..d5b927b5bbd 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala +++ b/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala @@ -1,4 +1,4 @@ -package cromwell.backend.wdl +package cromwell.backend import cats.data.EitherT._ import cats.data.Validated.{Invalid, Valid} @@ -10,7 +10,6 @@ import cats.syntax.validated._ import common.util.TryUtil import common.validation.Checked._ import common.validation.ErrorOr.ErrorOr -import cromwell.backend.BackendJobDescriptor import cromwell.core.CallOutputs import wom.expression.IoFunctionSet import wom.graph.GraphNodePort.{ExpressionBasedOutputPort, OutputPort} diff --git a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala b/backend/src/main/scala/cromwell/backend/ReadLikeFunctions.scala similarity index 94% rename from backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala rename to backend/src/main/scala/cromwell/backend/ReadLikeFunctions.scala index 6dc38e5a6d4..e7032f720e2 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/ReadLikeFunctions.scala @@ -1,4 +1,4 @@ -package cromwell.backend.wdl +package cromwell.backend import cromwell.core.io.AsyncIoFunctions import cromwell.core.path.PathFactory diff --git a/backend/src/main/scala/cromwell/backend/RuntimeEnvironment.scala b/backend/src/main/scala/cromwell/backend/RuntimeEnvironment.scala index 55cf459b7e6..3b81aa25b60 100644 --- a/backend/src/main/scala/cromwell/backend/RuntimeEnvironment.scala +++ b/backend/src/main/scala/cromwell/backend/RuntimeEnvironment.scala @@ -5,6 +5,9 @@ import java.util.UUID import cromwell.backend.io.JobPaths import cromwell.backend.validation.{CpuValidation, MemoryValidation} import cromwell.core.path.Path +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive +import eu.timepit.refined.refineMV import wdl4s.parser.MemoryUnit import wom.callable.RuntimeEnvironment import wom.format.MemorySize @@ -23,12 +26,12 @@ object RuntimeEnvironmentBuilder { callRoot.resolve(s"tmp.$hash").pathAsString } - val cores: Int = CpuValidation.instanceMin.validate(runtimeAttributes).getOrElse(minimums.cores) + val cores: Int Refined Positive = CpuValidation.instanceMin.validate(runtimeAttributes).getOrElse(minimums.cores) val memoryInMiB: Double = MemoryValidation.instance(). validate(runtimeAttributes). - map(_.to(MemoryUnit.MiB).amount). + map(_.toMebibytes). getOrElse(minimums.ram.amount) //TODO: Read these from somewhere else @@ -50,7 +53,7 @@ object RuntimeEnvironmentBuilder { } } -case class MinimumRuntimeSettings(cores: Int = 1, +case class MinimumRuntimeSettings(cores: Int Refined Positive = refineMV(1), ram: MemorySize = MemorySize(4, MemoryUnit.GiB), outputPathSize: Long = Long.MaxValue, tempPathSize: Long = Long.MaxValue) diff --git a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala b/backend/src/main/scala/cromwell/backend/WriteFunctions.scala similarity index 80% rename from backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala rename to backend/src/main/scala/cromwell/backend/WriteFunctions.scala index 897c8f99856..76bc7e9d5d0 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/WriteFunctions.scala @@ -1,4 +1,6 @@ -package cromwell.backend.wdl +package cromwell.backend + +import java.util.UUID import better.files.File.OpenOptions import cats.instances.future._ @@ -25,6 +27,13 @@ trait WriteFunctions extends PathFactory with IoFunctionSet with AsyncIoFunction private lazy val _writeDirectory = if (isDocker) writeDirectory.createPermissionedDirectories() else writeDirectory.createDirectories() + override def createTemporaryDirectory(name: Option[String]) = { + val tempDirPath = _writeDirectory / name.getOrElse(UUID.randomUUID().toString) + // This is evil, but has the added advantage to work both for cloud and local + val tempDirHiddenFile = tempDirPath / ".file" + asyncIo.writeAsync(tempDirHiddenFile, "", OpenOptions.default) as { tempDirPath.pathAsString } + } + override def writeFile(path: String, content: String): Future[WomSingleFile] = { val file = _writeDirectory / path asyncIo.existsAsync(file) flatMap { diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index 67b0c8833f3..5199bed5ee5 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -11,10 +11,11 @@ import cromwell.core.labels.Labels import cromwell.core.path.{DefaultPathBuilderFactory, PathBuilderFactory} import cromwell.core.{CallKey, WorkflowId, WorkflowOptions} import cromwell.services.keyvalue.KeyValueServiceActor.KvResponse -import wom.callable.ExecutableCallable +import wom.callable.{ExecutableCallable, MetaValueElement} import wom.graph.CommandCallNode import wom.graph.GraphNodePort.OutputPort -import wom.values.{WomEvaluatedCallInputs, WomValue} +import wom.values.WomArray.WomArrayLike +import wom.values._ import scala.util.Try @@ -37,9 +38,25 @@ case class BackendJobDescriptor(workflowDescriptor: BackendWorkflowDescriptor, evaluatedTaskInputs: WomEvaluatedCallInputs, maybeCallCachingEligible: MaybeCallCachingEligible, prefetchedKvStoreEntries: Map[String, KvResponse]) { - val fullyQualifiedInputs = evaluatedTaskInputs map { case (declaration, value) => + + val fullyQualifiedInputs: Map[String, WomValue] = evaluatedTaskInputs map { case (declaration, value) => key.call.identifier.combine(declaration.name).fullyQualifiedName.value -> value } + + def findInputFilesByParameterMeta(filter: MetaValueElement => Boolean): Set[WomFile] = evaluatedTaskInputs.collect { + case (declaration, value) if declaration.parameterMeta.exists(filter) => findFiles(value) + }.flatten.toSet + + def findFiles(v: WomValue): Set[WomFile] = v match { + case value: WomFile => Set(value) + case WomOptionalValue(_, Some(value)) => findFiles(value) + case value: WomObjectLike => value.values.values.toSet flatMap findFiles + case WomArrayLike(value) => value.value.toSet flatMap findFiles + case WomPair(left, right) => findFiles(left) ++ findFiles(right) + case WomMap(_, innerMap) => (innerMap.keySet flatMap findFiles) ++ (innerMap.values.toSet flatMap findFiles) + case _ => Set.empty + } + val localInputs = evaluatedTaskInputs map { case (declaration, value) => declaration.name -> value } val taskCall = key.call override lazy val toString = key.mkTag(workflowDescriptor.id) diff --git a/backend/src/main/scala/cromwell/backend/io/DirectoryFunctions.scala b/backend/src/main/scala/cromwell/backend/io/DirectoryFunctions.scala index 618fc0727c4..bf8a31145cd 100644 --- a/backend/src/main/scala/cromwell/backend/io/DirectoryFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/io/DirectoryFunctions.scala @@ -8,6 +8,7 @@ import common.validation.ErrorOr._ import common.validation.Validation._ import cromwell.backend.BackendJobDescriptor import cromwell.backend.io.DirectoryFunctions.listFiles +import cromwell.core.io.AsyncIoFunctions import cromwell.core.path.{Path, PathFactory} import wom.expression.IoFunctionSet import wom.expression.IoFunctionSet.{IoDirectory, IoElement, IoFile} @@ -17,7 +18,7 @@ import wom.values.{WomFile, WomGlobFile, WomMaybeListedDirectory, WomMaybePopula import scala.concurrent.Future import scala.util.Try -trait DirectoryFunctions extends IoFunctionSet with PathFactory { +trait DirectoryFunctions extends IoFunctionSet with PathFactory with AsyncIoFunctions { def findDirectoryOutputs(call: CommandCallNode, jobDescriptor: BackendJobDescriptor): ErrorOr[List[WomUnlistedDirectory]] = { @@ -28,8 +29,16 @@ trait DirectoryFunctions extends IoFunctionSet with PathFactory { } } - override def isDirectory(path: String) = Future.fromTry(Try(buildPath(path).isDirectory)) + override def isDirectory(path: String) = asyncIo.isDirectory(buildPath(path)) + /* + * Several things are wrong here. + * 1) None of this is going through the I/O Actor: https://github.com/broadinstitute/cromwell/issues/3133 + * which means no instrumentation, no throttling, no batching, and no custom retries. + * 2) The NIO implementation of "list" in GCS will list all objects with the prefix "path", unlike the unix + * implementation which lists files and directories children. What we need is the unix behavior, even for cloud filesystems. + * 3) It uses the isDirectory function directly on the path, which cannot be trusted for GCS paths. It should use asyncIo.isDirectory instead. + */ override def listDirectory(path: String)(visited: Vector[String] = Vector.empty): Future[Iterator[IoElement]] = { Future.fromTry(Try { val visitedPaths = visited.map(buildPath) diff --git a/backend/src/main/scala/cromwell/backend/standard/LocalizedAdHocValue.scala b/backend/src/main/scala/cromwell/backend/standard/LocalizedAdHocValue.scala new file mode 100644 index 00000000000..c12427ee358 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/standard/LocalizedAdHocValue.scala @@ -0,0 +1,9 @@ +package cromwell.backend.standard + +import cromwell.core.path.Path +import wom.callable.AdHocValue + +/** + * Represents an adhoc value that was moved to another location before the command is instantiated + */ +final case class LocalizedAdHocValue(originalValue: AdHocValue, localizedLocation: Path) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index a7952ae8cd0..b1b84abbf0f 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -7,8 +7,10 @@ import akka.event.LoggingReceive import cats.instances.list._ import cats.instances.option._ import cats.syntax.apply._ +import cats.syntax.either._ +import cats.syntax.functor._ import cats.syntax.traverse._ -import cats.syntax.validated._ +import common.Checked import common.exception.MessageAggregation import common.util.StringUtil._ import common.util.TryUtil @@ -16,12 +18,12 @@ import common.validation.ErrorOr.{ErrorOr, ShortCircuitingFlatMap} import common.validation.Validation._ import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobAbortedResponse, JobReconnectionNotSupportedException} import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend._ +import cromwell.backend.OutputEvaluator._ import cromwell.backend.async.AsyncBackendJobExecutionActor._ -import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionActor, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, PendingExecutionHandle, ReturnCodeIsNotAnInt, StderrNonEmpty, SuccessfulExecutionHandle, WrongReturnCode} +import cromwell.backend.async._ +import cromwell.backend.standard.StandardAdHocValue._ import cromwell.backend.validation._ -import cromwell.backend.wdl.OutputEvaluator._ -import cromwell.backend.wdl.{Command, OutputEvaluator} +import cromwell.backend.{Command, OutputEvaluator, _} import cromwell.core.io.{AsyncIoActorClient, DefaultIoCommandBuilder, IoCommandBuilder} import cromwell.core.path.Path import cromwell.core.{CromwellAggregatedException, CromwellFatalExceptionMarker, ExecutionEvent, StandardPaths} @@ -30,13 +32,16 @@ import cromwell.services.keyvalue.KvClient import cromwell.services.metadata.CallMetadataKeys import mouse.all._ import net.ceedubs.ficus.Ficus._ -import wom.callable.{AdHocValue, CommandTaskDefinition, RuntimeEnvironment} +import shapeless.Coproduct +import wom.callable.{AdHocValue, CommandTaskDefinition, ContainerizedInputExpression, RuntimeEnvironment} import wom.expression.WomExpression import wom.graph.LocalName +import wom.values.LazyWomFile._ import wom.values._ import wom.{CommandSetupSideEffectFile, InstantiatedCommand, WomFileMapper} -import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future, Promise} +import scala.concurrent._ +import scala.concurrent.duration._ import scala.util.{Failure, Success, Try} trait StandardAsyncExecutionActorParams extends StandardJobExecutionActorParams { @@ -115,30 +120,24 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta lazy val temporaryDirectory = configurationDescriptor.backendConfig.getOrElse( path = "temporary-directory", - default = s"""mkdir -p "${runtimeEnvironment.tempPath}" && echo "${runtimeEnvironment.tempPath}"""" + default = s"""$$(mkdir -p "${runtimeEnvironment.tempPath}" && echo "${runtimeEnvironment.tempPath}")""" ) - /** - * Maps WomFile objects for use in the commandLinePreProcessor. - * - * By default just calls the pass through mapper mapCommandLineWomFile. - * - * Sometimes a preprocessor may need to localize the files, etc. - * - */ - def preProcessWomFile(womFile: WomFile): WomFile = womFile + def preProcessWomFile(womFile: WomFile): WomFile = womFile + /** @see [[Command.instantiate]] */ final lazy val commandLinePreProcessor: WomEvaluatedCallInputs => Try[WomEvaluatedCallInputs] = { inputs => - TryUtil.sequenceMap(inputs mapValues WomFileMapper.mapWomFiles(preProcessWomFile)). + TryUtil.sequenceMap(inputs mapValues WomFileMapper.mapWomFiles(preProcessWomFile, inputsToNotLocalize)). recoverWith { case e => Failure(new IOException(e.getMessage) with CromwellFatalExceptionMarker) } } + + final lazy val localizedInputs: Try[WomEvaluatedCallInputs] = commandLinePreProcessor(jobDescriptor.evaluatedTaskInputs) /** * Maps WomFile to a local path, for use in the commandLineValueMapper. - * */ def mapCommandLineWomFile(womFile: WomFile): WomFile = womFile.mapFile(workflowPaths.buildPath(_).pathAsString) @@ -148,14 +147,18 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta // keeping track of the paths cleanly without so many value mappers def mapCommandLineJobInputWomFile(womFile: WomFile): WomFile = mapCommandLineWomFile(womFile) + // Allows backends to signal to the StandardAsyncExecutionActor that there's a set of input files which + // they don't intend to localize for this task. + def inputsToNotLocalize: Set[WomFile] = Set.empty + /** @see [[Command.instantiate]] */ final lazy val commandLineValueMapper: WomValue => WomValue = { - womValue => WomFileMapper.mapWomFiles(mapCommandLineWomFile)(womValue).get + womValue => WomFileMapper.mapWomFiles(mapCommandLineWomFile, inputsToNotLocalize)(womValue).get } /** @see [[Command.instantiate]] */ final lazy val commandLineJobInputValueMapper: WomValue => WomValue = { - womValue => WomFileMapper.mapWomFiles(mapCommandLineJobInputWomFile)(womValue).get + womValue => WomFileMapper.mapWomFiles(mapCommandLineJobInputWomFile, inputsToNotLocalize)(womValue).get } lazy val jobShell: String = configurationDescriptor.backendConfig.getOrElse("job-shell", @@ -244,6 +247,19 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta /** Any custom code that should be run within commandScriptContents before the instantiated command. */ def scriptPreamble: String = "" + // TODO: Discuss if this lift is appropriate, and if they should be functions + // of lazy vals + def cwd: Path = commandDirectory + def rcPath: Path = cwd./(jobPaths.returnCodeFilename) + private def absolutizeContainerPath(path: String): String = { + if (path.startsWith(cwd.pathAsString)) path else cwd.resolve(path).pathAsString + } + + def executionStdin: Option[String] = instantiatedCommand.evaluatedStdinRedirection map absolutizeContainerPath + def executionStdout: String = instantiatedCommand.evaluatedStdoutOverride.getOrElse(jobPaths.defaultStdoutFilename) |> absolutizeContainerPath + def executionStderr: String = instantiatedCommand.evaluatedStderrOverride.getOrElse(jobPaths.defaultStderrFilename) |> absolutizeContainerPath + // End added lift code + /** A bash script containing the custom preamble, the instantiated command, and output globbing behavior. */ def commandScriptContents: ErrorOr[String] = { jobLogger.info(s"`${instantiatedCommand.commandString}`") @@ -304,23 +320,20 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta lazy val environmentVariables = instantiatedCommand.environmentVariables map { case (k, v) => s"""export $k="$v"""" } mkString("", "\n", "\n") val home = jobDescriptor.taskCall.callable.homeOverride.map { _ (runtimeEnvironment) }.getOrElse("$HOME") - val shortId = jobDescriptor.workflowDescriptor.id.shortString - // Give the out and error FIFO variables names that are unlikely to conflict with anything the user is doing. - val (out, err) = (s"out$shortId", s"err$shortId") val dockerOutputDir = jobDescriptor.taskCall.callable.dockerOutputDirectory map { d => s"ln -s $cwd $d" } getOrElse "" - // The `tee` trickery below is to be able to redirect to known filenames for CWL while also streaming - // stdout and stderr for PAPI to periodically upload to cloud storage. - // https://stackoverflow.com/questions/692000/how-do-i-write-stderr-to-a-file-while-using-tee-with-a-pipe + // Only adjust the temporary directory permissions if this is executing under Docker. + val tmpDirPermissionsAdjustment = if (isDockerRun) s"""chmod 777 "$$tmpDir"""" else "" + (errorOrDirectoryOutputs, errorOrGlobFiles).mapN((directoryOutputs, globFiles) => s"""|#!$jobShell |DOCKER_OUTPUT_DIR_LINK |cd $cwd - |tmpDir=`$temporaryDirectory` - |chmod 777 "$$tmpDir" + |tmpDir=$temporaryDirectory + |$tmpDirPermissionsAdjustment |export _JAVA_OPTIONS=-Djava.io.tmpdir="$$tmpDir" |export TMPDIR="$$tmpDir" |export HOME="$home" @@ -328,18 +341,18 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta |cd $cwd |SCRIPT_PREAMBLE |) - |$out="$${tmpDir}/out.$$$$" $err="$${tmpDir}/err.$$$$" - |mkfifo "$$$out" "$$$err" - |trap 'rm "$$$out" "$$$err"' EXIT - |tee $stdoutRedirection < "$$$out" & - |tee $stderrRedirection < "$$$err" >&2 & |( |cd $cwd |ENVIRONMENT_VARIABLES |INSTANTIATED_COMMAND - |) $stdinRedirection > "$$$out" 2> "$$$err" + |) $stdinRedirection > $stdoutRedirection 2> $stderrRedirection |echo $$? > $rcTmpPath |( + |# add a .file in every empty directory to facilitate directory delocalization on the cloud + |cd $cwd + |find . -type d -empty -print | xargs -I % touch %/.file + |) + |( |cd $cwd |SCRIPT_EPILOGUE |${globScripts(globFiles)} @@ -362,30 +375,52 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta lazy val runtimeEnvironment = { RuntimeEnvironmentBuilder(jobDescriptor.runtimeAttributes, jobPaths)(standardParams.minimumRuntimeSettings) |> runtimeEnvironmentPathMapper } - - lazy val evaluatedAdHocFiles = { - val callable = jobDescriptor.taskCall.callable - - /* - NOTE: This method jumps through hoops to keep track of inputs and paths especially for ad hoc files. - If / when the [[WomFile.value]] is differentiated into cloud paths, VM paths & docker container paths then a lot of - this will need to be refactored. - */ - - def validateAdHocFile(value: WomValue): ErrorOr[List[WomFile]] = value match { - case womFile: WomFile => List(womFile).valid - case womArray: WomArray => womArray.value.toList.traverse(validateAdHocFile).map(_.flatten) - case other => - s"Ad-hoc file creation expression invalidly created a ${other.womType.toDisplayString} result.".invalidNel - } - - def validateAdHocValue(value: AdHocValue): ErrorOr[List[(WomFile, Option[String])]] = { - value.womValue match { - case womFile: WomFile => List(womFile -> value.mutableInputOption).valid - case other => validateAdHocFile(other).map(_.map(_ -> None)) + /** + * By default, ad hoc values get localized to the call directory. + * This way if running locally with docker they get mounted with the rest of the inputs in the container. + * The PAPI backend overrides this to a noop since the localization happens on the VM directly, so there's no need + * for this extra localization step. + * + * Maybe this should be the other way around: the default implementation is noop and SFS / TES override it ? + */ + lazy val localizeAdHocValues: List[AdHocValue] => ErrorOr[List[StandardAdHocValue]] = { adHocValues => + import cats.instances.future._ + + // Localize an adhoc file to the callExecutionRoot as needed + val localize: (AdHocValue, Path) => Future[LocalizedAdHocValue] = { (adHocValue, file) => + val actualName = adHocValue.alternativeName.getOrElse(file.name) + val finalPath = jobPaths.callExecutionRoot / actualName + // First check that it's not already there under execution root + asyncIo.existsAsync(finalPath) flatMap { + // If it's not then copy it + case false => asyncIo.copyAsync(file, finalPath) as { LocalizedAdHocValue(adHocValue, finalPath) } + case true => Future.successful(LocalizedAdHocValue(adHocValue, finalPath)) } } + + adHocValues.traverse[ErrorOr, (AdHocValue, Path)]({ adHocValue => + // Build an actionable Path from the ad hoc file + getPath(adHocValue.womValue.value).toErrorOr.map(adHocValue -> _) + }) + // Localize the values if necessary + .map(_.traverse[Future, LocalizedAdHocValue](localize.tupled)).toEither + // TODO: Asynchronify + // This is obviously sad but turning it into a Future has earth-shattering consequences, so synchronizing it for now + .flatMap(future => Try(Await.result(future, 1.hour)).toChecked) + .map(_.map(Coproduct[StandardAdHocValue](_))) + .toValidated + } + + def adHocValueToCommandSetupSideEffectFile(adHocValue: StandardAdHocValue) = adHocValue match { + case AsAdHocValue(AdHocValue(womValue, alternativeName, _)) => + CommandSetupSideEffectFile(womValue, alternativeName) + case AsLocalizedAdHocValue(LocalizedAdHocValue(AdHocValue(womValue, alternativeName, _), _)) => + CommandSetupSideEffectFile(womValue, alternativeName) + } + + lazy val evaluatedAdHocFiles: ErrorOr[List[AdHocValue]] = { + val callable = jobDescriptor.taskCall.callable /* * Try to map the command line values. @@ -398,34 +433,53 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta def tryCommandLineValueMapper(womValue: WomValue): WomValue = { Try(commandLineJobInputValueMapper(womValue)).getOrElse(womValue) } - + val unmappedInputs: Map[String, WomValue] = jobDescriptor.evaluatedTaskInputs.map({ case (inputDefinition, womValue) => inputDefinition.localName.value -> womValue }) - val mappedInputs: Map[String, WomValue] = unmappedInputs.mapValues(tryCommandLineValueMapper).map(identity) - callable.adHocFileCreation.toList.flatTraverse( - _.evaluate(unmappedInputs, mappedInputs, backendEngineFunctions).flatMap(validateAdHocValue) - ) + + val mappedInputs: Checked[Map[String, WomValue]] = localizedInputs.toErrorOr.map( + _.map({ + case (inputDefinition, value) => inputDefinition.localName.value -> tryCommandLineValueMapper(value) + }) + ).toEither + + val evaluateAndInitialize = (containerizedInputExpression: ContainerizedInputExpression) => for { + mapped <- mappedInputs + evaluated <- containerizedInputExpression.evaluate(unmappedInputs, mapped, backendEngineFunctions).toEither + initialized <- evaluated.traverse[ErrorOr, AdHocValue]({ adHocValue => + adHocValue.womValue.initializeWomFile(backendEngineFunctions).map(i => adHocValue.copy(womValue = i)) + }).toEither + } yield initialized + + callable.adHocFileCreation.toList + .flatTraverse[ErrorOr, AdHocValue](evaluateAndInitialize.andThen(_.toValidated)) } + + lazy val localizedAdHocValues: ErrorOr[List[StandardAdHocValue]] = evaluatedAdHocFiles.toEither + .flatMap(localizeAdHocValues.andThen(_.toEither)) + .toValidated - protected def isAdHocFile(womFile: WomFile) = evaluatedAdHocFiles map { _.exists({ - case (file, _) => file.value == womFile.value - }) - } getOrElse false + protected def asAdHocFile(womFile: WomFile) = evaluatedAdHocFiles map { _.find({ + case AdHocValue(file, _, _) => file.value == womFile.value + }) + } getOrElse None + + protected def isAdHocFile(womFile: WomFile) = asAdHocFile(womFile).isDefined /** The instantiated command. */ lazy val instantiatedCommand: InstantiatedCommand = { val callable = jobDescriptor.taskCall.callable - val adHocFileCreations: ErrorOr[List[(WomFile, Option[String])]] = evaluatedAdHocFiles - // Replace input files with the ad hoc updated version def adHocFilePreProcessor(in: WomEvaluatedCallInputs): Try[WomEvaluatedCallInputs] = { - adHocFileCreations.toTry("Error evaluating ad hoc files") map { adHocFiles => + localizedAdHocValues.toTry("Error evaluating ad hoc files") map { adHocFiles => in map { case (inputDefinition, originalWomValue) => inputDefinition -> adHocFiles.collectFirst({ - case (mutatedWomValue, Some(inputName)) if inputName == inputDefinition.localName.value => mutatedWomValue + case AsAdHocValue(AdHocValue(originalWomFile, _, Some(inputName))) if inputName == inputDefinition.localName.value => originalWomFile + case AsLocalizedAdHocValue(LocalizedAdHocValue(AdHocValue(originalWomFile, _, Some(inputName)), localizedPath)) if inputName == inputDefinition.localName.value => + originalWomFile.mapFile(_ => localizedPath.pathAsString) }).getOrElse(originalWomValue) } } @@ -434,7 +488,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta // Gets the inputs that will be mutated by instantiating the command. def mutatingPreProcessor(in: WomEvaluatedCallInputs): Try[WomEvaluatedCallInputs] = { for { - commandLineProcessed <- commandLinePreProcessor(in) + commandLineProcessed <- localizedInputs adHocProcessed <- adHocFilePreProcessor(commandLineProcessed) } yield adHocProcessed } @@ -447,16 +501,12 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta runtimeEnvironment ) - def adHocFileLocalization(womFile: WomFile): String = womFile.value.substring(womFile.value.lastIndexOf("/") + 1) - def makeStringKeyedMap(list: List[(LocalName, WomValue)]): Map[String, WomValue] = list.toMap map { case (k, v) => k.value -> v } val command = instantiatedCommandValidation flatMap { instantiatedCommand => val valueMappedPreprocessedInputs = instantiatedCommand.valueMappedPreprocessedInputs |> makeStringKeyedMap - val adHocFileCreationSideEffectFiles: ErrorOr[List[CommandSetupSideEffectFile]] = adHocFileCreations map { _ map { - case (womFile, _) => CommandSetupSideEffectFile(womFile, Option(adHocFileLocalization(womFile))) - }} + val adHocFileCreationSideEffectFiles: ErrorOr[List[CommandSetupSideEffectFile]] = localizedAdHocValues map { _.map(adHocValueToCommandSetupSideEffectFile) } def evaluateEnvironmentExpression(nameAndExpression: (String, WomExpression)): ErrorOr[(String, String)] = { val (name, expression) = nameAndExpression @@ -705,7 +755,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta * @return The Try wrapped and mapped WOM value. */ final def outputValueMapper(womValue: WomValue): Try[WomValue] = { - WomFileMapper.mapWomFiles(mapOutputWomFile)(womValue) + WomFileMapper.mapWomFiles(mapOutputWomFile, Set.empty)(womValue) } /** diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardExpressionFunctions.scala b/backend/src/main/scala/cromwell/backend/standard/StandardExpressionFunctions.scala index d6e3ae240fc..7782c2da901 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardExpressionFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardExpressionFunctions.scala @@ -2,7 +2,7 @@ package cromwell.backend.standard import akka.actor.ActorRef import cromwell.backend.io.{DirectoryFunctions, GlobFunctions} -import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} +import cromwell.backend.{ReadLikeFunctions, WriteFunctions} import cromwell.core.CallContext import cromwell.core.io._ import cromwell.core.path.PathFactory.PathBuilders diff --git a/backend/src/main/scala/cromwell/backend/standard/package.scala b/backend/src/main/scala/cromwell/backend/standard/package.scala new file mode 100644 index 00000000000..089efd4f614 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/standard/package.scala @@ -0,0 +1,18 @@ +package cromwell.backend + +import shapeless.{:+:, CNil} +import wom.callable.AdHocValue + +package object standard { + object StandardAdHocValue { + object AsAdHocValue { + def unapply(arg: StandardAdHocValue): Option[AdHocValue] = arg.select[AdHocValue] + } + object AsLocalizedAdHocValue { + def unapply(arg: StandardAdHocValue): Option[LocalizedAdHocValue] = arg.select[LocalizedAdHocValue] + } + } + + // This is used to represent an AdHocValue that might have been localized + type StandardAdHocValue = AdHocValue :+: LocalizedAdHocValue :+: CNil +} diff --git a/backend/src/main/scala/cromwell/backend/validation/CpuValidation.scala b/backend/src/main/scala/cromwell/backend/validation/CpuValidation.scala index fec4f61c4f5..0cc06a7dfa4 100644 --- a/backend/src/main/scala/cromwell/backend/validation/CpuValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/CpuValidation.scala @@ -1,8 +1,12 @@ package cromwell.backend.validation -import cats.syntax.validated._ +import cats.data.NonEmptyList +import cats.syntax.either._ import com.typesafe.config.Config import common.validation.ErrorOr.ErrorOr +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive +import eu.timepit.refined.refineV import wom.RuntimeAttributesKeys._ import wom.types.WomIntegerType import wom.values.{WomInteger, WomValue} @@ -18,26 +22,25 @@ import wom.values.{WomInteger, WomValue} * reference.conf file, coerced into a WomValue. */ object CpuValidation { - lazy val instance: RuntimeAttributesValidation[Int] = new CpuValidation(CpuKey) - lazy val optional: OptionalRuntimeAttributesValidation[Int] = instance.optional - lazy val instanceMin: RuntimeAttributesValidation[Int] = new CpuValidation(CpuMinKey) - lazy val optionalMin: OptionalRuntimeAttributesValidation[Int] = instanceMin.optional - lazy val instanceMax: RuntimeAttributesValidation[Int] = new CpuValidation(CpuMaxKey) - lazy val optionalMax: OptionalRuntimeAttributesValidation[Int] = instanceMax.optional + lazy val instance: RuntimeAttributesValidation[Int Refined Positive] = new CpuValidation(CpuKey) + lazy val optional: OptionalRuntimeAttributesValidation[Int Refined Positive] = instance.optional + lazy val instanceMin: RuntimeAttributesValidation[Int Refined Positive] = new CpuValidation(CpuMinKey) + lazy val optionalMin: OptionalRuntimeAttributesValidation[Int Refined Positive] = instanceMin.optional + lazy val instanceMax: RuntimeAttributesValidation[Int Refined Positive] = new CpuValidation(CpuMaxKey) + lazy val optionalMax: OptionalRuntimeAttributesValidation[Int Refined Positive] = instanceMax.optional lazy val defaultMin: WomValue = WomInteger(1) def configDefaultWomValue(config: Option[Config]): Option[WomValue] = instance.configDefaultWomValue(config) } -class CpuValidation(attributeName: String) extends IntRuntimeAttributesValidation(attributeName) { - override protected def validateValue: PartialFunction[WomValue, ErrorOr[Int]] = { +class CpuValidation(attributeName: String) extends PositiveIntRuntimeAttributesValidation(attributeName) { + override protected def validateValue: PartialFunction[WomValue, ErrorOr[Int Refined Positive]] = { case womValue if WomIntegerType.coerceRawValue(womValue).isSuccess => WomIntegerType.coerceRawValue(womValue).get match { case WomInteger(value) => - if (value.toInt <= 0) - s"Expecting $key runtime attribute value greater than 0".invalidNel - else - value.toInt.validNel + refineV[Positive](value.toInt) + .leftMap(_ => NonEmptyList.one(s"Expecting $key runtime attribute value greater than 0")) + .toValidated } } diff --git a/backend/src/main/scala/cromwell/backend/validation/MemoryValidation.scala b/backend/src/main/scala/cromwell/backend/validation/MemoryValidation.scala index 3063172d786..d0decdec85f 100644 --- a/backend/src/main/scala/cromwell/backend/validation/MemoryValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/MemoryValidation.scala @@ -3,9 +3,9 @@ package cromwell.backend.validation import cats.syntax.validated._ import com.typesafe.config.Config import common.validation.ErrorOr._ -import wdl4s.parser.MemoryUnit +import squants.QuantityParseException +import squants.information.{Bytes, Information} import wom.RuntimeAttributesKeys -import wom.format.MemorySize import wom.types.{WomIntegerType, WomLongType, WomStringType} import wom.values.{WomInteger, WomLong, WomString, WomValue} @@ -26,15 +26,15 @@ import scala.util.{Failure, Success} * `withDefaultMemory` can be used to create a memory validation that defaults to a particular memory size. */ object MemoryValidation { - def instance(attributeName: String = RuntimeAttributesKeys.MemoryKey): RuntimeAttributesValidation[MemorySize] = + def instance(attributeName: String = RuntimeAttributesKeys.MemoryKey): RuntimeAttributesValidation[Information] = new MemoryValidation(attributeName) - def optional(attributeName: String = RuntimeAttributesKeys.MemoryKey): OptionalRuntimeAttributesValidation[MemorySize] = + def optional(attributeName: String = RuntimeAttributesKeys.MemoryKey): OptionalRuntimeAttributesValidation[Information] = instance(attributeName).optional def configDefaultString(attributeName: String = RuntimeAttributesKeys.MemoryKey, config: Option[Config]): Option[String] = instance(attributeName).configDefaultValue(config) - def withDefaultMemory(attributeName: String = RuntimeAttributesKeys.MemoryKey, memorySize: String): RuntimeAttributesValidation[MemorySize] = { - MemorySize.parse(memorySize) match { - case Success(memory) => instance(attributeName).withDefault(WomInteger(memory.bytes.toInt)) + def withDefaultMemory(attributeName: String = RuntimeAttributesKeys.MemoryKey, memorySize: String): RuntimeAttributesValidation[Information] = { + Information(memorySize) match { + case Success(memory) => instance(attributeName).withDefault(WomInteger(memory.toBytes.toInt)) case Failure(_) => instance(attributeName).withDefault(BadDefaultAttribute(WomString(memorySize.toString))) } } @@ -45,39 +45,38 @@ object MemoryValidation { "Expecting %s runtime attribute to be an Integer or String with format '8 GB'." + " Exception: %s" - private[validation] def validateMemoryString(attributeName: String, wdlString: WomString): ErrorOr[MemorySize] = + private[validation] def validateMemoryString(attributeName: String, wdlString: WomString): ErrorOr[Information] = validateMemoryString(attributeName, wdlString.value) - private[validation] def validateMemoryString(attributeName: String, value: String): ErrorOr[MemorySize] = { - MemorySize.parse(value) match { - case scala.util.Success(memorySize: MemorySize) if memorySize.amount > 0 => - memorySize.to(MemoryUnit.GB).validNel - case scala.util.Success(memorySize: MemorySize) => - wrongAmountFormat.format(attributeName, memorySize.amount).invalidNel + private[validation] def validateMemoryString(attributeName: String, value: String): ErrorOr[Information] = { + Information(value) match { + case scala.util.Success(memorySize: Information) if memorySize.value > 0D => + memorySize.validNel + case scala.util.Success(memorySize: Information) => + wrongAmountFormat.format(attributeName, memorySize.value).invalidNel + case scala.util.Failure(_: QuantityParseException) => + wrongTypeFormat.format(attributeName, s"$value should be of the form 'X Unit' where X is a number, e.g. 8 GB").invalidNel case scala.util.Failure(throwable) => wrongTypeFormat.format(attributeName, throwable.getMessage).invalidNel } } - private[validation] def validateMemoryInteger(attributeName: String, wdlInteger: WomInteger): ErrorOr[MemorySize] = - validateMemoryInteger(attributeName, wdlInteger.value) - - private[validation] def validateMemoryInteger(attributeName: String, value: Int): ErrorOr[MemorySize] = { + private[validation] def validateMemoryInteger(attributeName: String, value: Int): ErrorOr[Information] = { if (value <= 0) wrongAmountFormat.format(attributeName, value).invalidNel else - MemorySize(value.toDouble, MemoryUnit.Bytes).to(MemoryUnit.GB).validNel + Bytes(value.toDouble).validNel } - def validateMemoryLong(attributeName: String, value: Long): ErrorOr[MemorySize] = { + def validateMemoryLong(attributeName: String, value: Long): ErrorOr[Information] = { if (value <= 0) wrongAmountFormat.format(attributeName, value).invalidNel else - MemorySize(value.toDouble, MemoryUnit.Bytes).to(MemoryUnit.GB).validNel + Bytes(value.toDouble).validNel } } -class MemoryValidation(attributeName: String = RuntimeAttributesKeys.MemoryKey) extends RuntimeAttributesValidation[MemorySize] { +class MemoryValidation(attributeName: String = RuntimeAttributesKeys.MemoryKey) extends RuntimeAttributesValidation[Information] { import MemoryValidation._ @@ -85,7 +84,7 @@ class MemoryValidation(attributeName: String = RuntimeAttributesKeys.MemoryKey) override def coercion = Seq(WomIntegerType, WomLongType, WomStringType) - override protected def validateValue: PartialFunction[WomValue, ErrorOr[MemorySize]] = { + override protected def validateValue: PartialFunction[WomValue, ErrorOr[Information]] = { case WomLong(value) => MemoryValidation.validateMemoryLong(key, value) case WomInteger(value) => MemoryValidation.validateMemoryInteger(key, value) case WomString(value) => MemoryValidation.validateMemoryString(key, value) diff --git a/backend/src/main/scala/cromwell/backend/validation/PrimitiveRuntimeAttributesValidation.scala b/backend/src/main/scala/cromwell/backend/validation/PrimitiveRuntimeAttributesValidation.scala index 77b33483eaf..a5a5774d812 100644 --- a/backend/src/main/scala/cromwell/backend/validation/PrimitiveRuntimeAttributesValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/PrimitiveRuntimeAttributesValidation.scala @@ -1,7 +1,12 @@ package cromwell.backend.validation +import cats.data.NonEmptyList +import cats.syntax.either._ import cats.syntax.validated._ import common.validation.ErrorOr.ErrorOr +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive +import eu.timepit.refined.refineV import wom.types._ import wom.values._ @@ -65,6 +70,16 @@ class IntRuntimeAttributesValidation(override val key: String) extends override protected def typeString: String = "an Integer" } +class PositiveIntRuntimeAttributesValidation(override val key: String) extends + PrimitiveRuntimeAttributesValidation[Int Refined Positive, WomInteger] { + + override val womType = WomIntegerType + + override protected def validateCoercedValue(womValue: WomInteger): ErrorOr[Int Refined Positive] = refineV[Positive](womValue.value).leftMap(NonEmptyList.one).toValidated + + override protected def typeString: String = "an Integer" +} + class StringRuntimeAttributesValidation(override val key: String) extends PrimitiveRuntimeAttributesValidation[String, WomString] { diff --git a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala index f66b41a3c85..9c7daac48b1 100644 --- a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala @@ -4,13 +4,15 @@ import cats.data.Validated.{Invalid, Valid} import cats.data.{NonEmptyList, Validated} import cats.syntax.validated._ import com.typesafe.config.Config -import cromwell.backend.RuntimeAttributeDefinition import common.validation.ErrorOr._ +import cromwell.backend.RuntimeAttributeDefinition +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive import org.slf4j.Logger +import squants.information.{Information, Megabytes} import wdl.draft2.model.expression.PureStandardLibraryFunctions import wdl.draft2.model.{NoLookup, WdlExpression} import wom.expression.{NoIoFunctionSet, WomExpression} -import wom.format.MemorySize import wom.types._ import wom.values._ @@ -36,11 +38,11 @@ object RuntimeAttributesValidation { validateWithValidation(value, ContinueOnReturnCodeValidation.instance, onMissingKey) } - def validateMemory(value: Option[WomValue], onMissingKey: => ErrorOr[MemorySize]): ErrorOr[MemorySize] = { + def validateMemory(value: Option[WomValue], onMissingKey: => ErrorOr[Information]): ErrorOr[Information] = { validateWithValidation(value, MemoryValidation.instance(), onMissingKey) } - def validateCpu(cpu: Option[WomValue], onMissingKey: => ErrorOr[Int]): ErrorOr[Int] = { + def validateCpu(cpu: Option[WomValue], onMissingKey: => ErrorOr[Int Refined Positive]): ErrorOr[Int Refined Positive] = { validateWithValidation(cpu, CpuValidation.instance, onMissingKey) } @@ -72,14 +74,10 @@ object RuntimeAttributesValidation { } } - def parseMemoryString(k: String, s: WomString): ErrorOr[MemorySize] = { + def parseMemoryString(k: String, s: WomString): ErrorOr[Information] = { MemoryValidation.validateMemoryString(k, s) } - def parseMemoryInteger(k: String, i: WomInteger): ErrorOr[MemorySize] = { - MemoryValidation.validateMemoryInteger(k, i) - } - def withDefault[ValidatedType](validation: RuntimeAttributesValidation[ValidatedType], default: WomValue): RuntimeAttributesValidation[ValidatedType] = { new RuntimeAttributesValidation[ValidatedType] { @@ -140,6 +138,8 @@ object RuntimeAttributesValidation { val attributes: Map[String, String] = attributeOptions collect { case (name, Some(values: Traversable[_])) => (name, values.mkString(",")) + // For information, format it in MB instead of the default toString which prints in Bytes + case (name, Some(information: Information)) => (name, information.toString(Megabytes).toString) case (name, Some(value)) => (name, value.toString) } diff --git a/backend/src/test/scala/cromwell/backend/wdl/OutputEvaluatorSpec.scala b/backend/src/test/scala/cromwell/backend/OutputEvaluatorSpec.scala similarity index 97% rename from backend/src/test/scala/cromwell/backend/wdl/OutputEvaluatorSpec.scala rename to backend/src/test/scala/cromwell/backend/OutputEvaluatorSpec.scala index f8ef7c160bf..a6855e4479f 100644 --- a/backend/src/test/scala/cromwell/backend/wdl/OutputEvaluatorSpec.scala +++ b/backend/src/test/scala/cromwell/backend/OutputEvaluatorSpec.scala @@ -1,12 +1,11 @@ -package cromwell.backend.wdl +package cromwell.backend import java.util.concurrent.Executors import cats.data.{NonEmptyList, Validated} import cats.syntax.validated._ import common.validation.ErrorOr.ErrorOr -import cromwell.backend.wdl.OutputEvaluator.{InvalidJobOutputs, JobOutputsEvaluationException, ValidJobOutputs} -import cromwell.backend.{BackendJobDescriptor, BackendJobDescriptorKey} +import cromwell.backend.OutputEvaluator.{InvalidJobOutputs, JobOutputsEvaluationException, ValidJobOutputs} import cromwell.core.CallOutputs import cromwell.util.WomMocks import org.scalatest.{FlatSpec, Matchers} diff --git a/backend/src/test/scala/cromwell/backend/io/DirectoryFunctionsSpec.scala b/backend/src/test/scala/cromwell/backend/io/DirectoryFunctionsSpec.scala index 98a81a190d4..b522432d9c2 100644 --- a/backend/src/test/scala/cromwell/backend/io/DirectoryFunctionsSpec.scala +++ b/backend/src/test/scala/cromwell/backend/io/DirectoryFunctionsSpec.scala @@ -12,13 +12,15 @@ class DirectoryFunctionsSpec extends FlatSpec with Matchers { val functions = new DirectoryFunctions { override def pathBuilders = List(DefaultPathBuilder) - override def copyFile(source: String, destination: String) = ??? - override def glob(pattern: String) = ??? - override def size(path: String) = ??? - override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean) = ??? - override def pathFunctions = ??? - override def writeFile(path: String, content: String) = ??? - override implicit def ec = ??? + override def copyFile(source: String, destination: String) = throw new UnsupportedOperationException() + override def glob(pattern: String) = throw new UnsupportedOperationException() + override def size(path: String) = throw new UnsupportedOperationException() + override def readFile(path: String, maxBytes: Option[Int], failOnOverflow: Boolean) = throw new UnsupportedOperationException() + override def pathFunctions = throw new UnsupportedOperationException() + override def writeFile(path: String, content: String) = throw new UnsupportedOperationException() + override implicit def ec = throw new UnsupportedOperationException() + override def createTemporaryDirectory(name: Option[String]) = throw new UnsupportedOperationException() + override def asyncIo = throw new UnsupportedOperationException() } "listDirectory" should "exclude visited directories when listing" in { diff --git a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesValidationSpec.scala b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesValidationSpec.scala index 927e4a9dac7..32a2aba4075 100644 --- a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesValidationSpec.scala +++ b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesValidationSpec.scala @@ -188,7 +188,7 @@ class RuntimeAttributesValidationSpec extends WordSpecLike with Matchers with Be val result = RuntimeAttributesValidation.validateMemory(memoryValue, "Failed to get memory mandatory key from runtime attributes".invalidNel) result match { - case Valid(x) => assert(x.amount == expectedGb) + case Valid(x) => assert(x.toGigabytes == expectedGb) case Invalid(e) => fail(e.toList.mkString(" ")) } } @@ -209,7 +209,7 @@ class RuntimeAttributesValidationSpec extends WordSpecLike with Matchers with Be val result = RuntimeAttributesValidation.validateMemory(memoryValue, "Failed to get memory mandatory key from runtime attributes".invalidNel) result match { - case Valid(x) => assert(x.amount == expectedGb) + case Valid(x) => assert(x.toGigabytes == expectedGb) case Invalid(e) => fail(e.toList.mkString(" ")) } } @@ -259,7 +259,7 @@ class RuntimeAttributesValidationSpec extends WordSpecLike with Matchers with Be val result = RuntimeAttributesValidation.validateCpu(cpuValue, "Failed to get cpu mandatory key from runtime attributes".invalidNel) result match { - case Valid(x) => assert(x == 1) + case Valid(x) => assert(x.value == 1) case Invalid(e) => fail(e.toList.mkString(" ")) } } diff --git a/build.sbt b/build.sbt index 981d623270c..c85e0db71d8 100644 --- a/build.sbt +++ b/build.sbt @@ -66,6 +66,13 @@ lazy val cloudSupport = project .withLibrarySettings("cromwell-cloud-support", cloudSupportDependencies) .dependsOn(common) +lazy val awsS3FileSystem = (project in file("filesystems/s3")) + .withLibrarySettings("cromwell-aws-s3filesystem") + .dependsOn(core) + .dependsOn(cloudSupport) + .dependsOn(core % "test->test") + .dependsOn(cloudSupport % "test->test") + lazy val gcsFileSystem = (project in file("filesystems/gcs")) .withLibrarySettings("cromwell-gcsfilesystem") .dependsOn(core) @@ -140,6 +147,14 @@ lazy val jesBackend = (project in backendRoot / "jes") .withLibrarySettings("cromwell-jes-backend") .dependsOn(googlePipelinesV1Alpha2) +lazy val awsBackend = (project in backendRoot / "aws") + .withLibrarySettings("cromwell-aws-backend") + .dependsOn(backend) + .dependsOn(awsS3FileSystem) + .dependsOn(backend % "test->test") + .dependsOn(awsS3FileSystem % "test->test") + .dependsOn(services % "test->test") + lazy val sfsBackend = (project in backendRoot / "sfs") .withLibrarySettings("cromwell-sfs-backend") .dependsOn(backend) @@ -231,6 +246,7 @@ lazy val server = project .dependsOn(googlePipelinesV2Alpha1) .dependsOn(jesBackend) .dependsOn(bcsBackend) + .dependsOn(awsBackend) .dependsOn(tesBackend) .dependsOn(sparkBackend) .dependsOn(cromwellApiClient) @@ -257,12 +273,14 @@ lazy val root = (project in file(".")) .aggregate(databaseSql) .aggregate(dockerHashing) .aggregate(engine) + .aggregate(awsS3FileSystem) .aggregate(gcsFileSystem) .aggregate(googlePipelinesCommon) .aggregate(googlePipelinesV1Alpha2) .aggregate(googlePipelinesV2Alpha1) .aggregate(jesBackend) .aggregate(languageFactoryCore) + .aggregate(awsBackend) .aggregate(ossFileSystem) .aggregate(server) .aggregate(services) diff --git a/centaur/src/it/resources/application.conf b/centaur/src/it/resources/application.conf index 72f8dc9221f..91c964ab06c 100644 --- a/centaur/src/it/resources/application.conf +++ b/centaur/src/it/resources/application.conf @@ -1 +1 @@ -akka.http.host-connection-pool.max-open-requests: 128 +akka.http.host-connection-pool.max-open-requests: 1024 diff --git a/centaur/src/main/resources/integrationTestCases/SmartSeq2SingleSample.test b/centaur/src/main/resources/integrationTestCases/SmartSeq2SingleSample.test new file mode 100644 index 00000000000..550b91386df --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/SmartSeq2SingleSample.test @@ -0,0 +1,21 @@ +name: SmartSeq2SingleSample +testFormat: workflowsuccess + +files { + workflow: hca/SmartSeq2SingleSample/SmartSeq2SingleSample.wdl + inputs: hca/SmartSeq2SingleSample/SmartSeq2SingleSample.inputs.json + labels: hca/SmartSeq2SingleSample/SmartSeq2SingleSample.labels.json + imports: [ + hca/HISAT2.wdl, + hca/Picard.wdl, + hca/RSEM.wdl + ] +} + +metadata { + workflowName: SmartSeq2SingleCell + status: Succeeded + "labels.project": "mint-pipeline-dev" + "labels.pipeline": "ss2" + "labels.name": "pipeline" +} diff --git a/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Pair/cnv_somatic_pair_workflow_do_gc_wes.inputs b/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Pair/cnv_somatic_pair_workflow_do_gc_wes.inputs index 6d9e9ed634d..2eab53cfd6a 100644 --- a/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Pair/cnv_somatic_pair_workflow_do_gc_wes.inputs +++ b/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Pair/cnv_somatic_pair_workflow_do_gc_wes.inputs @@ -1,4 +1,5 @@ { + "CNVSomaticPairWorkflow.preemptible_attempts": 0, "CNVSomaticPairWorkflow.ref_fasta": "gs://gatk-test-data/cnv/somatic/human_g1k_v37.chr-20.truncated.fasta", "CNVSomaticPairWorkflow.read_count_pon": "gs://gatk-test-data/cnv/somatic/wes-do-gc-trunc.pon.hdf5", "CNVSomaticPairWorkflow.ref_fasta_fai": "gs://gatk-test-data/cnv/somatic/human_g1k_v37.chr-20.truncated.fasta.fai", diff --git a/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Panel/cnv_somatic_panel_workflow_do_gc_wes.inputs b/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Panel/cnv_somatic_panel_workflow_do_gc_wes.inputs index 86d0c5dac0e..5a6e0045f20 100644 --- a/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Panel/cnv_somatic_panel_workflow_do_gc_wes.inputs +++ b/centaur/src/main/resources/integrationTestCases/Somatic/CNV-Panel/cnv_somatic_panel_workflow_do_gc_wes.inputs @@ -1,4 +1,5 @@ { + "CNVSomaticPanelWorkflow.preemptible_attempts": 0, "CNVSomaticPanelWorkflow.do_explicit_gc_correction": "true", "CNVSomaticPanelWorkflow.gatk_docker": "us.gcr.io/broad-gatk/gatk:4.0.1.2", "CNVSomaticPanelWorkflow.intervals": "gs://gatk-test-data/cnv/somatic/ice_targets_sample-chr20.interval_list", diff --git a/centaur/src/main/resources/integrationTestCases/Somatic/Mutect2/Mutect2.inputs b/centaur/src/main/resources/integrationTestCases/Somatic/Mutect2/Mutect2.inputs index f284cc60346..fe2cfa8e9a0 100644 --- a/centaur/src/main/resources/integrationTestCases/Somatic/Mutect2/Mutect2.inputs +++ b/centaur/src/main/resources/integrationTestCases/Somatic/Mutect2/Mutect2.inputs @@ -1,4 +1,5 @@ { + "Mutect2.preemptible_attempts": 0, "Mutect2.normal_bam": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bam", "Mutect2.tumor_bam": "gs://gatk-best-practices/somatic-b37/HCC1143.bam", "Mutect2.normal_bai": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bai", diff --git a/centaur/src/main/resources/integrationTestCases/germline/joint-discovery-gatk/joint-discovery-gatk4.wdl b/centaur/src/main/resources/integrationTestCases/germline/joint-discovery-gatk/joint-discovery-gatk4.wdl index 010e36d2364..3bef6161001 100644 --- a/centaur/src/main/resources/integrationTestCases/germline/joint-discovery-gatk/joint-discovery-gatk4.wdl +++ b/centaur/src/main/resources/integrationTestCases/germline/joint-discovery-gatk/joint-discovery-gatk4.wdl @@ -329,24 +329,6 @@ workflow JointGenotyping { } } -task GetNumberOfSamples { - File sample_name_map - String mem_size - Int preemptibles - - command <<< - wc -l ${sample_name_map} | awk '{print $1}' - >>> - runtime { - docker: docker_image - memory: mem_size - preemptible: preemptibles - } - output { - Int sample_count = read_int(stdout()) - } -} - task ImportGVCFs { File sample_name_map String interval @@ -988,4 +970,4 @@ task DynamicallyCombineIntervals { output { File output_intervals = "out.intervals" } -} \ No newline at end of file +} diff --git a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.inputs b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.inputs index 194a73f3e9a..a79c958a059 100644 --- a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.inputs +++ b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.inputs @@ -1,39 +1,33 @@ + { -"Arrays.sample_alias": "NA12716", -"Arrays.analysis_version_number": 1, -"Arrays.call_rate_threshold": 0.98, -"Arrays.reported_gender": "M", -"Arrays.idat_dir_name": "101342370134_R01C02", -"Arrays.file_of_idat_filenames": "gs://broad-gotc-test-storage/arrays/single_sample/idats/101342370134_R01C02.idats.txt", -"Arrays.fingerprint_gender_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/101342370134_R01C02.fingerprint_gender.txt", -"Arrays.fingerprint_genotypes_vcf_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/101342370134_R01C02.reference.fingerprint.vcf.gz", -"Arrays.fingerprint_genotypes_vcf_index_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/101342370134_R01C02.reference.fingerprint.vcf.gz.tbi", -"Arrays.params_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/params.txt", -"Arrays.bead_pool_manifest_file": "gs://broad-gotc-test-storage/arrays/single_sample/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1.bpm", -"Arrays.extended_chip_manifest_file": "gs://broad-gotc-test-storage/arrays/single_sample/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1.1.1.extended.csv", -"Arrays.cluster_file": "gs://broad-gotc-test-storage/arrays/single_sample/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1_ClusterFile.egt", -"Arrays.gender_cluster_file": "gs://broad-gotc-test-storage/arrays/single_sample/metadata/PsychChip_v1-1_15073391_A1/empty_gender_cluster", -"Arrays.zcall_thresholds_file": "gs://broad-gotc-test-storage/arrays/single_sample/metadata/PsychChip_v1-1_15073391_A1/thresholds.7.txt", -"Arrays.control_sample_vcf_file": "gs://broad-gotc-test-storage/arrays/single_sample/arrays_controldata/NA12716.vcf.gz", -"Arrays.control_sample_vcf_index_file": "gs://broad-gotc-test-storage/arrays/single_sample/arrays_controldata/NA12716.vcf.gz.tbi", -"Arrays.control_sample_intervals_file": "gs://broad-gotc-test-storage/arrays/single_sample/arrays_controldata/NA12716.interval_list", -"Arrays.control_sample_name": "NA12716", -"Arrays.disk_size": 100, -"Arrays.ref_fasta": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", -"Arrays.ref_fasta_index": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai", -"Arrays.ref_dict": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.dict", -"Arrays.dbSNP_vcf": "gs://broad-references/hg19/v0/dbsnp_138.b37.vcf.gz", -"Arrays.dbSNP_vcf_index": "gs://broad-references/hg19/v0/dbsnp_138.b37.vcf.gz.tbi", -"Arrays.haplotype_database_file": "gs://broad-references-private/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt", -"Arrays.preemptible_tries": 3, -"COMMENT_#1": "Inputs below for MultiSampleArrays.wdl", -"MultiSampleArrays.ref_fasta": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", -"MultiSampleArrays.ref_fasta_index": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai", -"MultiSampleArrays.ref_dict": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.dict", -"MultiSampleArrays.callset_name": "callset_name", -"MultiSampleArrays.samples_fofn": "gs://broad-gotc-test-storage/arrays/multisample/psych-chip-validation/vcf/101342370027/multisample_vcf_fofn", -"MultiSampleArrays.sample_indices_fofn": "gs://broad-gotc-test-storage/arrays/multisample/psych-chip-validation/vcf/101342370027/multisample_vcf_index_fofn", -"MultiSampleArrays.disk_size": 100, -"MultiSampleArrays.preemptible_tries": 0, -"END": "This line does not need a comma." + "Arrays.sample_alias": "NA12716", + "Arrays.analysis_version_number": 1, + "Arrays.call_rate_threshold": 0.98, + "Arrays.reported_gender": "Not Reported", + "Arrays.idat_dir_name": "101342370134_R01C02", + + "Arrays.file_of_idat_filenames": "gs://broad-gotc-test-storage/arrays/single_sample/idats/101342370134_R01C02.idats.txt", + "Arrays.fingerprint_genotypes_vcf_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/101342370134_R01C02.reference.fingerprint.vcf.gz", + "Arrays.fingerprint_genotypes_vcf_index_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/101342370134_R01C02.reference.fingerprint.vcf.gz.tbi", + "Arrays.params_file": "gs://broad-gotc-test-storage/arrays/single_sample/inputs/params.txt", + + "Arrays.bead_pool_manifest_file": "gs://broad-gotc-test-storage/arrays/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1.bpm", + "Arrays.extended_chip_manifest_file": "gs://broad-gotc-test-storage/arrays/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1.1.2.extended.csv", + "Arrays.cluster_file": "gs://broad-gotc-test-storage/arrays/metadata/PsychChip_v1-1_15073391_A1/PsychChip_v1-1_15073391_A1_ClusterFile.egt", + "Arrays.zcall_thresholds_file": "gs://broad-gotc-test-storage/arrays/metadata/PsychChip_v1-1_15073391_A1/thresholds.7.txt", + + "Arrays.control_sample_vcf_file" : "gs://broad-gotc-test-storage/arrays/controldata/NA12716.vcf.gz", + "Arrays.control_sample_vcf_index_file" : "gs://broad-gotc-test-storage/arrays/controldata/NA12716.vcf.gz.tbi", + "Arrays.control_sample_intervals_file" : "gs://broad-gotc-test-storage/arrays/controldata/NA12716.interval_list", + "Arrays.control_sample_name" : "NA12716", + "Arrays.disk_size": 100, + + "Arrays.ref_fasta": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", + "Arrays.ref_fasta_index": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai", + "Arrays.ref_dict": "gs://broad-references/hg19/v0/Homo_sapiens_assembly19.dict", + "Arrays.dbSNP_vcf": "gs://broad-references/hg19/v0/dbsnp_138.b37.vcf.gz", + "Arrays.dbSNP_vcf_index": "gs://broad-references/hg19/v0/dbsnp_138.b37.vcf.gz.tbi", + "Arrays.haplotype_database_file": "gs://broad-references-private/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt", + "Arrays.variant_rsids_file": "gs://broad-references-private/fingerprinting_variant_rsids_v1.list", + "Arrays.preemptible_tries": 0 } diff --git a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.options b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.options index 176c7059b6c..83d33e8594b 100644 --- a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.options +++ b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.options @@ -2,6 +2,6 @@ "read_from_cache":false, "default_runtime_attributes": { "zones": "us-central1-a us-central1-b us-central1-c us-central1-f", - "docker": "gcr.io/broad-gotc-dev/autocall:dev-2.2.3-1496943629" + "docker": "us.gcr.io/broad-gotc-dev/autocall:dev-3.0.0-1527695536" } } diff --git a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.wdl b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.wdl index 28caf2fa080..15a8ea29973 100644 --- a/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.wdl +++ b/centaur/src/main/resources/integrationTestCases/green/arrays/arrays.wdl @@ -1,36 +1,294 @@ + +workflow Arrays { + String sample_alias + Int analysis_version_number + Float call_rate_threshold + String reported_gender + + String idat_dir_name + File file_of_idat_filenames + File ref_fasta + File ref_fasta_index + File ref_dict + + File dbSNP_vcf + File dbSNP_vcf_index + + File params_file + + File bead_pool_manifest_file + String bead_pool_manifest_filename = sub(bead_pool_manifest_file, "gs://.*/", "") + String chip_type = sub(bead_pool_manifest_filename, "\\.bpm$", "") + + File extended_chip_manifest_file + File cluster_file + File? gender_cluster_file + File? zcall_thresholds_file + + # For CheckFingerprint: + File? fingerprint_genotypes_vcf_file + File? fingerprint_genotypes_vcf_index_file + File haplotype_database_file + + # For SelectVariants + File variant_rsids_file + + # For HapMap GenotypeConcordance Check: + File? control_sample_vcf_file + File? control_sample_vcf_index_file + File? control_sample_intervals_file + String? control_sample_name + + Int disk_size + Int preemptible_tries + + #call UpdateChipWellBarcodeIndex { + # input: + # params_file = params_file, + # disk_size = disk_size, + # preemptible_tries = preemptible_tries + #} + + call AutoCall { + input: + idat_dir_name = idat_dir_name, + file_of_idat_filenames = file_of_idat_filenames, + bead_pool_manifest_file = bead_pool_manifest_file, + bead_pool_manifest_filename = bead_pool_manifest_filename, + cluster_file = cluster_file, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + + if (size(AutoCall.gtc_file) == 0) { + call GetFailedAutocallVersion { + input: + autocallStdOut = AutoCall.autocall_stdout + } + + call GenerateEmptyVariantCallingMetricsFile { + input: + chip_well_barcode = idat_dir_name, + sample_alias = sample_alias, + chip_type = chip_type, + reported_gender = reported_gender, + autocall_version = GetFailedAutocallVersion.autocallVersion, + output_metrics_basename = sample_alias, + cluster_file = cluster_file, + analysis_version_number = analysis_version_number, + preemptible_tries = preemptible_tries + } + + #call UploadArraysMetrics as UploadEmptyArraysMetrics { + # input: + # arrays_variant_calling_detail_metrics = GenerateEmptyVariantCallingMetricsFile.detail_metrics, + # disk_size = disk_size, + # preemptible_tries = preemptible_tries + #} + + #call BlacklistBarcode { + # input: + # upload_metrics_output = UploadEmptyArraysMetrics.upload_metrics_empty_file, + # chip_well_barcode = idat_dir_name, + # preemptible_tries = preemptible_tries + #} + } + + if (defined(gender_cluster_file)) { + call AutoCall as GenderAutocall { + input: + idat_dir_name = idat_dir_name, + file_of_idat_filenames = file_of_idat_filenames, + bead_pool_manifest_file = bead_pool_manifest_file, + bead_pool_manifest_filename = bead_pool_manifest_filename, + cluster_file = gender_cluster_file, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + } + + if (size(AutoCall.gtc_file) > 0) { + call GtcToVcf { + input: + vcf_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".vcf.gz", + input_gtc = AutoCall.gtc_file, + gender_gtc = GenderAutocall.gtc_file, + extended_chip_manifest_file = extended_chip_manifest_file, + cluster_file = cluster_file, + normalization_manifest_file = AutoCall.bead_pool_manifest_csv_file, + zcall_thresholds_file = zcall_thresholds_file, + sample_alias = sample_alias, + analysis_version_number = analysis_version_number, + reported_gender = reported_gender, + fingerprint_genotypes_vcf_file = fingerprint_genotypes_vcf_file, + fingerprint_genotypes_vcf_index_file = fingerprint_genotypes_vcf_index_file, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fasta_index, + ref_dict = ref_dict, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + + if (defined(zcall_thresholds_file)) { + call zCall { + input: + zcall_ped_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".zcall.ped", + zcall_map_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".zcall.map", + input_gtc = AutoCall.gtc_file, + bead_pool_manifest_csv_file = AutoCall.bead_pool_manifest_csv_file, + zcall_thresholds_file = zcall_thresholds_file, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + } + + if (defined(zCall.ped_file) && defined(zCall.map_file)) { + call MergePedIntoVcf { + input: + input_vcf = GtcToVcf.output_vcf, + input_vcf_index = GtcToVcf.output_vcf_index, + output_vcf_filename = sub(GtcToVcf.output_vcf, "gs://.*/", ""), + ped_file = zCall.ped_file, + map_file = zCall.map_file, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + } + + # if zCall doesn't run, then MergePedIntoVcf doesn't run. + # if MergePedIntoVcf doesn't run, then MergePedIntoVcf.output_vcf and output_vcf_index should just be + # the MergePedIntoVcf.input_vcf and input_vcf_index + # using select_first to cast File? to File type + File MergePedIntoVcfOutputVcf = if (defined(zCall.ped_file) && defined(zCall.map_file)) then select_first([MergePedIntoVcf.output_vcf]) else GtcToVcf.output_vcf + File MergePedIntoVcfOutputVcfIndex = if (defined(zCall.ped_file) && defined(zCall.map_file)) then select_first([MergePedIntoVcf.output_vcf_index]) else GtcToVcf.output_vcf_index + + call CollectArraysVariantCallingMetrics { + input: + input_vcf_file = MergePedIntoVcfOutputVcf, + input_vcf_index_file = MergePedIntoVcfOutputVcfIndex, + dbSNP_vcf_file = dbSNP_vcf, + dbSNP_vcf_index_file = dbSNP_vcf_index, + call_rate_threshold = call_rate_threshold, + output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + + call VcfToIntervalList { + input: + vcf_file = MergePedIntoVcfOutputVcf, + interval_list_file_name = sub(sub(MergePedIntoVcfOutputVcf, "gs://.*/", ""), ".vcf.gz$", "") + ".interval_list", + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + + if (defined(fingerprint_genotypes_vcf_file) && defined(fingerprint_genotypes_vcf_index_file)) { + call CheckFingerprint { + input: + input_vcf_file = MergePedIntoVcfOutputVcf, + input_vcf_index_file = MergePedIntoVcfOutputVcfIndex, + genotypes_vcf_file = fingerprint_genotypes_vcf_file, + genotypes_vcf_index_file = fingerprint_genotypes_vcf_index_file, + haplotype_database_file = haplotype_database_file, + observed_sample_alias = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), + expected_sample_alias = sample_alias, + output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + + call SelectVariants { + input: + input_vcf_file = MergePedIntoVcfOutputVcf, + input_vcf_index_file = MergePedIntoVcfOutputVcfIndex, + variant_rsids_file = variant_rsids_file, + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + } + + if (defined(control_sample_vcf_file) && defined(control_sample_vcf_index_file) && + defined(control_sample_intervals_file) && defined(control_sample_name)) { + call GenotypeConcordance { + input: + call_vcf_file = MergePedIntoVcfOutputVcf, + call_vcf_index_file = MergePedIntoVcfOutputVcfIndex, + call_intervals_file = VcfToIntervalList.interval_list_file, + call_sample_name = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), + truth_vcf_file = control_sample_vcf_file, + truth_vcf_index_file = control_sample_vcf_index_file, + truth_intervals_file = control_sample_intervals_file, + truth_sample_name = control_sample_name, + output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), + disk_size = disk_size, + preemptible_tries = preemptible_tries + } + } + + #call UploadArraysMetrics { + # input: + # arrays_variant_calling_detail_metrics = CollectArraysVariantCallingMetrics.detail_metrics, + # arrays_variant_calling_summary_metrics = CollectArraysVariantCallingMetrics.summary_metrics, + # arrays_control_code_summary_metrics = CollectArraysVariantCallingMetrics.control_metrics, + # fingerprinting_detail_metrics = CheckFingerprint.detail_metrics, + # fingerprinting_summary_metrics = CheckFingerprint.summary_metrics, + # genotype_concordance_summary_metrics = GenotypeConcordance.summary_metrics, + # genotype_concordance_detail_metrics = GenotypeConcordance.detail_metrics, + # genotype_concordance_contingency_metrics = GenotypeConcordance.contingency_metrics, + # disk_size = disk_size, + # preemptible_tries = preemptible_tries + #} +} + File ArraysVariantCallingDetailMetrics = if (defined(CollectArraysVariantCallingMetrics.detail_metrics)) then select_first([CollectArraysVariantCallingMetrics.detail_metrics]) else select_first([GenerateEmptyVariantCallingMetricsFile.detail_metrics]) + + output { + AutoCall.gtc_file + File ArraysVariantCallingDetailMetricsFile = ArraysVariantCallingDetailMetrics + File? ArraysVariantCallingSummaryMetricsFile = CollectArraysVariantCallingMetrics.summary_metrics + File? ArraysVariantCallingControlMetricsFile = CollectArraysVariantCallingMetrics.control_metrics + File?? FingerprintDetailMetricsFile = CheckFingerprint.detail_metrics + File?? FingerprintSummaryMetricsFile = CheckFingerprint.summary_metrics + File?? GenotypeConcordanceSummaryMetricsFile = GenotypeConcordance.summary_metrics + File?? GenotypeConcordanceDetailMetricsFile = GenotypeConcordance.detail_metrics + File?? GenotypeConcordanceContingencyMetricsFile = GenotypeConcordance.contingency_metrics + File? MergePedIntoVcfOutputVcfFile = MergePedIntoVcfOutputVcf + File? MergePedIntoVcfOutputVcfIndexFile = MergePedIntoVcfOutputVcfIndex + } +} + task AutoCall { String idat_dir_name File file_of_idat_filenames File bead_pool_manifest_file String bead_pool_manifest_filename - File cluster_file + File? cluster_file Int disk_size Int preemptible_tries command <<< - + set -e rm -rf ${idat_dir_name} mkdir ${idat_dir_name} - if [ -s ${cluster_file} ]; then - RETRY_LIMIT=5 + RETRY_LIMIT=5 - until cat ${file_of_idat_filenames} | /root/google-cloud-sdk/bin/gsutil -m cp -L cp.log -c -I ${idat_dir_name}/; do - sleep 1 - ((count++)) && ((count==$RETRY_LIMIT)) && break - done + until cat ${file_of_idat_filenames} | /root/google-cloud-sdk/bin/gsutil -m cp -L cp.log -c -I ${idat_dir_name}/; do + sleep 1 + ((count++)) && ((count==$RETRY_LIMIT)) && break + done - if [ "$count" = "$RETRY_LIMIT" ]; then - echo 'Could not copy all the files from the cloud' && exit 1 - fi + if [ "$count" = "$RETRY_LIMIT" ]; then + echo 'Could not copy all the files from the cloud' && exit 1 + fi - mono /usr/gitc/autocall/AutoConvert.exe ${idat_dir_name} ${idat_dir_name} ${bead_pool_manifest_file} ${cluster_file} - #wtf autocall writes the manifest csv to the root directory with an escape character before it e.g. /\PsychChip_v1-1_15073391_A1.bpm.csv WHY!! - cp /*.bpm.csv ${bead_pool_manifest_filename}.csv - else - echo "Found 0-length cluster_file, not running autocall" + mono /usr/gitc/autocall/AutoConvert.exe ${idat_dir_name} ${idat_dir_name} ${bead_pool_manifest_file} ${cluster_file} | tee autocall_std.txt + + #wtf autocall writes the manifest csv to the root directory with an escape character before it e.g. /\PsychChip_v1-1_15073391_A1.bpm.csv WHY!! + cp /*.bpm.csv ${bead_pool_manifest_filename}.csv + + if grep -q "Normalization failed" autocall_std.txt; then + # make an empty gtc file so that jes won't fail this task and wf can move on touch ${idat_dir_name}/${idat_dir_name}.gtc - touch ${bead_pool_manifest_filename}.csv fi >>> runtime { @@ -41,6 +299,75 @@ task AutoCall { output { File gtc_file = "${idat_dir_name}/${idat_dir_name}.gtc" File bead_pool_manifest_csv_file = "${bead_pool_manifest_filename}.csv" + File autocall_stdout = "autocall_std.txt" + } +} + +task GetFailedAutocallVersion { + File autocallStdOut + + command <<< + stringContainsVersion=`grep "Calling Utility" ${autocallStdOut}` + echo "$stringContainsVersion" | cut -d ' ' -f 5- + >>> + + output { + String autocallVersion = read_string(stdout()) + } +} + +task GenerateEmptyVariantCallingMetricsFile { + String chip_well_barcode + String sample_alias + String chip_type + String reported_gender + String autocall_version + String output_metrics_basename + File? cluster_file + Int analysis_version_number + Int preemptible_tries + + command <<< + java -Xmx2g -jar /usr/gitc/picard-private.jar \ + GenerateEmptyVariantCallingMetrics \ + CHIP_WELL_BARCODE=${chip_well_barcode} \ + SAMPLE_ALIAS="${sample_alias}" \ + CHIP_TYPE=${chip_type} \ + REPORTED_GENDER="${reported_gender}" \ + CLUSTER_FILE_NAME=${cluster_file} \ + AUTOCALL_VERSION=${autocall_version} \ + ANALYSIS_VERSION=${analysis_version_number} \ + OUTPUT=${output_metrics_basename} + >>> + + runtime { + memory: "2 GB" + preemptible: preemptible_tries + } + + output { + File detail_metrics = "${output_metrics_basename}.arrays_variant_calling_detail_metrics" + } +} + +task BlacklistBarcode { + File upload_metrics_output + String chip_well_barcode + Int preemptible_tries + + command <<< + java -Xmx2g -jar /usr/gitc/picard-private.jar \ + ArraysManualBlacklistUpdate \ + CHIP_WELL_BARCODE=${chip_well_barcode} \ + REASON=DATA_QUALITY \ + DB_USERNAME_FILE=/usr/gitc/cloudsql.db_username.txt \ + DB_PASSWORD_FILE=/usr/gitc/cloudsql.db_password.txt \ + DB_JDBC_FILE=/usr/gitc/cloudsql.db_jdbc.txt \ + NOTES="Normalization failed" + >>> + runtime { + memory: "2 GB" + preemptible: preemptible_tries } } @@ -49,20 +376,14 @@ task zCall { String zcall_map_filename File input_gtc File bead_pool_manifest_csv_file - File zcall_thresholds_file # if this file is empty (0-length) the workflow will not run zCall + File? zcall_thresholds_file Int disk_size Int preemptible_tries command <<< - if [ -s ${zcall_thresholds_file} ]; then - python /usr/gitc/zcall/zCall.py -B ${bead_pool_manifest_csv_file} -G ${input_gtc} -T ${zcall_thresholds_file} > ${zcall_ped_filename} - python /usr/gitc/zcall/makeMAPfile.py -B ${bead_pool_manifest_csv_file} > ${zcall_map_filename} - else - echo "Found 0-length zCall thresholds file. Not running zCall" - touch ${zcall_ped_filename} - touch ${zcall_map_filename} - fi + python /usr/gitc/zcall/zCall.py -B ${bead_pool_manifest_csv_file} -G ${input_gtc} -T ${zcall_thresholds_file} > ${zcall_ped_filename} + python /usr/gitc/zcall/makeMAPfile.py -B ${bead_pool_manifest_csv_file} > ${zcall_map_filename} >>> runtime { disks: "local-disk " + disk_size + " HDD" @@ -70,23 +391,22 @@ task zCall { preemptible: preemptible_tries } output { - File ped_file = "${zcall_ped_filename}" - File map_file = "${zcall_map_filename}" + File? ped_file = "${zcall_ped_filename}" + File? map_file = "${zcall_map_filename}" } } task GtcToVcf { String vcf_filename File input_gtc - File gender_gtc + File? gender_gtc File extended_chip_manifest_file File cluster_file File normalization_manifest_file - File zcall_thresholds_file + File? zcall_thresholds_file String sample_alias Int analysis_version_number String reported_gender - File fingerprint_gender_file File ref_fasta File ref_fasta_index @@ -95,28 +415,23 @@ task GtcToVcf { Int disk_size Int preemptible_tries - command { - if [ -s ${gender_gtc} ]; then - gender_gtc_file='GENDER_GTC=${gender_gtc}' - fi - - if [ -s ${zcall_thresholds_file} ]; then - zcall_file='ZCALL_THRESHOLDS_FILE=${zcall_thresholds_file}' - fi + File? fingerprint_genotypes_vcf_file + File? fingerprint_genotypes_vcf_index_file + command { java -Xmx7g -jar /usr/gitc/picard-private.jar \ GtcToVcf \ INPUT=${input_gtc} \ - $gender_gtc_file \ - $zcall_file \ + ${"GENDER_GTC=" + gender_gtc} \ + ${"ZCALL_THRESHOLDS_FILE=" + zcall_thresholds_file} \ OUTPUT=${vcf_filename} \ MANIFEST=${extended_chip_manifest_file} \ CLUSTER_FILE=${cluster_file} \ ILLUMINA_NORMALIZATION_MANIFEST=${normalization_manifest_file} \ - SAMPLE_ALIAS=${sample_alias} \ + SAMPLE_ALIAS="${sample_alias}" \ ANALYSIS_VERSION_NUMBER=${analysis_version_number} \ - EXPECTED_GENDER=${reported_gender} \ - FINGERPRINT_GENDER_FILE=${fingerprint_gender_file} \ + EXPECTED_GENDER="${reported_gender}" \ + ${"FINGERPRINT_GENOTYPES_VCF_FILE=" + fingerprint_genotypes_vcf_file} \ REFERENCE_SEQUENCE=${ref_fasta} \ CREATE_INDEX=true } @@ -144,11 +459,11 @@ task CollectArraysVariantCallingMetrics { command { java -Xmx2g -jar /usr/gitc/picard-private.jar \ - CollectArraysVariantCallingMetrics \ - INPUT=${input_vcf_file} \ - DBSNP=${dbSNP_vcf_file} \ - CALL_RATE_PF_THRESHOLD=${call_rate_threshold} \ - OUTPUT=${output_metrics_basename} + CollectArraysVariantCallingMetrics \ + INPUT=${input_vcf_file} \ + DBSNP=${dbSNP_vcf_file} \ + CALL_RATE_PF_THRESHOLD=${call_rate_threshold} \ + OUTPUT=${output_metrics_basename} } runtime { disks: "local-disk " + disk_size + " HDD" @@ -188,8 +503,8 @@ task VcfToIntervalList { task CheckFingerprint { File input_vcf_file File input_vcf_index_file - File genotypes_vcf_file - File genotypes_vcf_index_file + File? genotypes_vcf_file + File? genotypes_vcf_index_file File haplotype_database_file String observed_sample_alias String expected_sample_alias @@ -198,22 +513,23 @@ task CheckFingerprint { Int disk_size Int preemptible_tries + # Paraphrased from Yossi: + # Override the default LOD threshold of 5 because if the PL field + # is missing from the VCF, CheckFingerprint will default to an error + # rate equivalent to a LOD score of 2, and we don't want to see + # confident LOD scores w/ no confident SNPs. + Float genotype_lod_threshold = 1.9 + command <<< - if [ -s ${genotypes_vcf_file} ]; then java -Xmx2g -jar /usr/gitc/picard-private.jar \ - CheckFingerprint \ - INPUT=${input_vcf_file} \ - OBSERVED_SAMPLE_ALIAS=${observed_sample_alias} \ - GENOTYPES=${genotypes_vcf_file} \ - EXPECTED_SAMPLE_ALIAS=${expected_sample_alias} \ - HAPLOTYPE_MAP=${haplotype_database_file} \ - OUTPUT=${output_metrics_basename} - else - echo "No fingerprint found. Skipping Fingerprint check" - # We touch the outputs here in order to create 0 length files. Otherwise the task will fail since the expected outputs are not to be found - touch ${output_metrics_basename}.fingerprinting_summary_metrics - touch ${output_metrics_basename}.fingerprinting_detail_metrics - fi + CheckFingerprint \ + INPUT=${input_vcf_file} \ + OBSERVED_SAMPLE_ALIAS="${observed_sample_alias}" \ + ${"GENOTYPES=" + genotypes_vcf_file} \ + EXPECTED_SAMPLE_ALIAS="${expected_sample_alias}" \ + HAPLOTYPE_MAP=${haplotype_database_file} \ + GENOTYPE_LOD_THRESHOLD=${genotype_lod_threshold} \ + OUTPUT=${output_metrics_basename} >>> runtime { disks: "local-disk " + disk_size + " HDD" @@ -226,39 +542,60 @@ task CheckFingerprint { } } +task SelectVariants { + File input_vcf_file + File input_vcf_index_file + File variant_rsids_file + + Int disk_size + Int preemptible_tries + + String fingerprints_file = "fingerprints.vcf.gz" + + command <<< + export GATK_LOCAL_JAR="/root/gatk.jar" + gatk --java-options "-Xms2g" \ + SelectVariants \ + -V ${input_vcf_file} \ + --keep-ids ${variant_rsids_file} \ + -O ${fingerprints_file} + >>> + runtime { + disks: "local-disk " + disk_size + " HDD" + docker: "us.gcr.io/broad-gatk/gatk:4.0.0.0" + memory: "3.5 GB" + preemptible: preemptible_tries + } + output { + File fingerprints = fingerprints_file + File fingerprints_index = fingerprints_file + ".tbi" + } +} + task GenotypeConcordance { File call_vcf_file File call_vcf_index_file File call_intervals_file String call_sample_name - File truth_vcf_file - File truth_vcf_index_file - File truth_intervals_file - String truth_sample_name + File? truth_vcf_file + File? truth_vcf_index_file + File? truth_intervals_file + String? truth_sample_name String output_metrics_basename Int disk_size Int preemptible_tries command { - if [ -s ${truth_vcf_file} ]; then - - java -Xmx2g -jar /usr/gitc/picard-private.jar \ - GenotypeConcordance \ - CALL_VCF=${call_vcf_file} \ - CALL_SAMPLE=${call_sample_name} \ - TRUTH_VCF=${truth_vcf_file} \ - TRUTH_SAMPLE=${truth_sample_name} \ - INTERVALS=${call_intervals_file} \ - INTERVALS=${truth_intervals_file} \ - OUTPUT=${output_metrics_basename} - - else - echo "No truth_sample_name provided. Skipping GenotypeConcordance" - touch "${output_metrics_basename}.genotype_concordance_summary_metrics" - touch "${output_metrics_basename}.genotype_concordance_detail_metrics" - touch "${output_metrics_basename}.genotype_concordance_contingency_metrics" - fi + java -Xmx2g -jar /usr/gitc/picard-private.jar \ + GenotypeConcordance \ + CALL_VCF=${call_vcf_file} \ + CALL_SAMPLE=${call_sample_name} \ + TRUTH_VCF=${truth_vcf_file} \ + TRUTH_SAMPLE=${truth_sample_name} \ + INTERVALS=${call_intervals_file} \ + INTERVALS=${truth_intervals_file} \ + OUTPUT=${output_metrics_basename} } runtime { disks: "local-disk " + disk_size + " HDD" @@ -274,13 +611,13 @@ task GenotypeConcordance { task UploadArraysMetrics { File arrays_variant_calling_detail_metrics - File arrays_variant_calling_summary_metrics - File arrays_control_code_summary_metrics - File fingerprinting_detail_metrics - File fingerprinting_summary_metrics - File genotype_concordance_summary_metrics - File genotype_concordance_detail_metrics - File genotype_concordance_contingency_metrics + File? arrays_variant_calling_summary_metrics + File? arrays_control_code_summary_metrics + File? fingerprinting_detail_metrics + File? fingerprinting_summary_metrics + File? genotype_concordance_summary_metrics + File? genotype_concordance_detail_metrics + File? genotype_concordance_contingency_metrics Int disk_size Int preemptible_tries @@ -288,27 +625,42 @@ task UploadArraysMetrics { command <<< rm -rf metrics_upload_dir && mkdir metrics_upload_dir && - cp ${arrays_variant_calling_detail_metrics} metrics_upload_dir && - cp ${arrays_variant_calling_summary_metrics} metrics_upload_dir && - cp ${arrays_control_code_summary_metrics} metrics_upload_dir && - cp ${fingerprinting_detail_metrics} metrics_upload_dir && - cp ${fingerprinting_summary_metrics} metrics_upload_dir && - cp ${genotype_concordance_summary_metrics} metrics_upload_dir && - cp ${genotype_concordance_detail_metrics} metrics_upload_dir && - cp ${genotype_concordance_contingency_metrics} metrics_upload_dir && + + # check that files are passed in before copying them -- [ -z FILE ] evaluates to true if FILE not there + ! [ -z ${genotype_concordance_summary_metrics} ] && + cp ${genotype_concordance_summary_metrics} metrics_upload_dir + ! [ -z ${genotype_concordance_detail_metrics} ] && + cp ${genotype_concordance_detail_metrics} metrics_upload_dir + ! [ -z ${genotype_concordance_contingency_metrics} ] && + cp ${genotype_concordance_contingency_metrics} metrics_upload_dir + + ! [ -z ${fingerprinting_detail_metrics} ] && + cp ${fingerprinting_detail_metrics} metrics_upload_dir + ! [ -z ${fingerprinting_summary_metrics} ] && + cp ${fingerprinting_summary_metrics} metrics_upload_dir + + cp ${arrays_variant_calling_detail_metrics} metrics_upload_dir + ! [ -z ${arrays_variant_calling_summary_metrics} ] && + cp ${arrays_variant_calling_summary_metrics} metrics_upload_dir + + ! [ -z ${arrays_control_code_summary_metrics} ] && + cp ${arrays_control_code_summary_metrics} metrics_upload_dir java -Xmx2g -jar /usr/gitc/picard-private.jar \ UploadArraysMetrics \ ANALYSIS_DIRECTORY=metrics_upload_dir \ DB_USERNAME_FILE=/usr/gitc/cloudsql.db_username.txt \ DB_PASSWORD_FILE=/usr/gitc/cloudsql.db_password.txt \ - DB_JDBC_FILE=/usr/gitc/cloudsql.db_jdbc.txt - + DB_JDBC_FILE=/usr/gitc/cloudsql.db_jdbc.txt && + touch empty_file_for_dependency >>> runtime { disks: "local-disk " + disk_size + " HDD" memory: "2 GB" preemptible: preemptible_tries } + output { + File upload_metrics_empty_file = "empty_file_for_dependency" + } } task UpdateChipWellBarcodeIndex { @@ -335,8 +687,8 @@ task UpdateChipWellBarcodeIndex { task MergePedIntoVcf { File input_vcf File input_vcf_index - File ped_file - File map_file + File? ped_file + File? map_file String output_vcf_filename @@ -344,21 +696,13 @@ task MergePedIntoVcf { Int preemptible_tries command { - if [ -s ${ped_file} ]; then - - java -Xmx3g -jar /usr/gitc/picard-private.jar \ - MergePedIntoVcf \ - ORIGINAL_VCF=${input_vcf} \ - PED_FILE=${ped_file} \ - MAP_FILE=${map_file} \ - OUTPUT=${output_vcf_filename} \ - CREATE_INDEX=true - - else - echo "0-length ped file found, not running MergePedIntoVcf" - cp ${input_vcf} ${output_vcf_filename} - cp ${input_vcf_index} ${output_vcf_filename}.tbi - fi + java -Xmx3g -jar /usr/gitc/picard-private.jar \ + MergePedIntoVcf \ + ORIGINAL_VCF=${input_vcf} \ + PED_FILE=${ped_file} \ + MAP_FILE=${map_file} \ + OUTPUT=${output_vcf_filename} \ + CREATE_INDEX=true } runtime { @@ -372,168 +716,3 @@ task MergePedIntoVcf { File output_vcf_index = "${output_vcf_filename}.tbi" } } - -workflow Arrays { - String sample_alias - Int analysis_version_number - Float call_rate_threshold - String reported_gender - - String idat_dir_name - File file_of_idat_filenames - File ref_fasta - File ref_fasta_index - File ref_dict - - File dbSNP_vcf - File dbSNP_vcf_index - - File params_file - - File bead_pool_manifest_file - String bead_pool_manifest_filename = sub(bead_pool_manifest_file, "gs://.*/", "") - - File extended_chip_manifest_file - File cluster_file - File gender_cluster_file - File zcall_thresholds_file - - # For GtcToVcf - File fingerprint_gender_file - - # For CheckFingerprint: - File fingerprint_genotypes_vcf_file # if this file is empty (0-length) the workflow should not do fingerprint comparison (as there are no fingerprints for the sample) - File fingerprint_genotypes_vcf_index_file - File haplotype_database_file - - # For HapMap GenotypeConcordance Check: - File control_sample_vcf_file - File control_sample_vcf_index_file - File control_sample_intervals_file - String control_sample_name - - - Int disk_size - Int preemptible_tries - - call AutoCall { - input: - idat_dir_name = idat_dir_name, - file_of_idat_filenames = file_of_idat_filenames, - bead_pool_manifest_file = bead_pool_manifest_file, - bead_pool_manifest_filename = bead_pool_manifest_filename, - cluster_file = cluster_file, - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call AutoCall as GenderAutocall { - input: - idat_dir_name = idat_dir_name, - file_of_idat_filenames = file_of_idat_filenames, - bead_pool_manifest_file = bead_pool_manifest_file, - bead_pool_manifest_filename = bead_pool_manifest_filename, - cluster_file = gender_cluster_file, - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call GtcToVcf { - input: - vcf_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".vcf.gz", - input_gtc = AutoCall.gtc_file, - gender_gtc = GenderAutocall.gtc_file, - extended_chip_manifest_file = extended_chip_manifest_file, - cluster_file = cluster_file, - normalization_manifest_file = AutoCall.bead_pool_manifest_csv_file, - zcall_thresholds_file = zcall_thresholds_file, - sample_alias = sample_alias, - analysis_version_number = analysis_version_number, - reported_gender = reported_gender, - fingerprint_gender_file = fingerprint_gender_file, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call zCall { - input: - zcall_ped_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".zcall.ped", - zcall_map_filename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", "") + ".zcall.map", - input_gtc = AutoCall.gtc_file, - bead_pool_manifest_csv_file = AutoCall.bead_pool_manifest_csv_file, - zcall_thresholds_file = zcall_thresholds_file, - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call MergePedIntoVcf { - input: - input_vcf = GtcToVcf.output_vcf, - input_vcf_index = GtcToVcf.output_vcf_index, - output_vcf_filename = sub(GtcToVcf.output_vcf, "gs://.*/", ""), - ped_file = zCall.ped_file, - map_file = zCall.map_file, - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call CollectArraysVariantCallingMetrics { - input: - input_vcf_file = MergePedIntoVcf.output_vcf, - input_vcf_index_file = MergePedIntoVcf.output_vcf_index, - dbSNP_vcf_file = dbSNP_vcf, - dbSNP_vcf_index_file = dbSNP_vcf_index, - call_rate_threshold = call_rate_threshold, - output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call VcfToIntervalList { - input: - vcf_file = MergePedIntoVcf.output_vcf, - interval_list_file_name = sub(sub(MergePedIntoVcf.output_vcf, "gs://.*/", ""), ".vcf.gz$", "") + ".interval_list", - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call CheckFingerprint { - input: - input_vcf_file = MergePedIntoVcf.output_vcf, - input_vcf_index_file = MergePedIntoVcf.output_vcf_index, - genotypes_vcf_file = fingerprint_genotypes_vcf_file, - genotypes_vcf_index_file = fingerprint_genotypes_vcf_index_file, - haplotype_database_file = haplotype_database_file, - observed_sample_alias = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), - expected_sample_alias = sample_alias, - output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - call GenotypeConcordance { - input: - call_vcf_file = MergePedIntoVcf.output_vcf, - call_vcf_index_file = MergePedIntoVcf.output_vcf_index, - call_intervals_file = VcfToIntervalList.interval_list_file, - call_sample_name = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), - truth_vcf_file = control_sample_vcf_file, - truth_vcf_index_file = control_sample_vcf_index_file, - truth_intervals_file = control_sample_intervals_file, - truth_sample_name = control_sample_name, - output_metrics_basename = sub(sub(AutoCall.gtc_file, "gs://.*/", ""), ".gtc$", ""), - disk_size = disk_size, - preemptible_tries = preemptible_tries - } - - output { - AutoCall.gtc_file - MergePedIntoVcf.* - CollectArraysVariantCallingMetrics.* - CheckFingerprint.* - GenotypeConcordance.* - } -} diff --git a/centaur/src/main/resources/integrationTestCases/hca/HISAT2.wdl b/centaur/src/main/resources/integrationTestCases/hca/HISAT2.wdl new file mode 100644 index 00000000000..72cca225c3d --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/HISAT2.wdl @@ -0,0 +1,296 @@ +task HISAT2PairedEnd { + File hisat2_ref + File fastq1 + File fastq2 + String ref_name + String output_basename + String sample_name + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + Int machine_mem_mb = 15000 + Int cpu = 4 + # use provided disk number or dynamically size on our own, 10 is our zipped fastq -> bam conversion with 50GB of additional disk + Int disk = ceil((size(fastq1, "GB") + size(fastq2, "GB") * 10) + size(hisat2_ref, "GB") + 50) + Int preemptible = 5 + + meta { + description: "HISAT2 alignment task will align paired-end fastq reads to reference genome." + } + + parameter_meta { + hisat2_ref: "HISAT2 reference" + fastq1: "gz forward fastq file" + fastq2: "gz reverse fastq file" + ref_name: "the basename of the index for the reference genome" + output_basename: "basename used for output files" + sample_name: "sample name of input" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + # Note that files MUST be gzipped or the module will not function properly + # This will be addressed in the future either by a change in how Hisat2 functions or a more + # robust test for compression type. + + set -e + + # fix names if necessary. + if [ "${fastq1}" != *.fastq.gz ]; then + FQ1=${fastq1}.fastq.gz + mv ${fastq1} ${fastq1}.fastq.gz + else + FQ1=${fastq1} + fi + if [ "${fastq2}" != *.fastq.gz ]; then + FQ2=${fastq2}.fastq.gz + mv ${fastq2} ${fastq2}.fastq.gz + else + FQ2=${fastq2} + fi + + tar --no-same-owner -xvf "${hisat2_ref}" + + # run HISAT2 to genome reference with dedault parameters + # --seed to fix pseudo-random number and in order to produce deterministics results + # -k --secondary to output multiple mapping reads. --keep 10 will output up to 10 multiple mapping reads, which is default in HISAT2 + hisat2 -t \ + -x ${ref_name}/${ref_name} \ + -1 $FQ1 \ + -2 $FQ2 \ + --rg-id=${sample_name} --rg SM:${sample_name} --rg LB:${sample_name} \ + --rg PL:ILLUMINA --rg PU:${sample_name} \ + --new-summary --summary-file ${output_basename}.log \ + --met-file ${output_basename}.hisat2.met.txt --met 5 \ + --seed 12345 \ + -k 10 \ + --secondary \ + -p ${cpu} -S ${output_basename}.sam + samtools sort -@ ${cpu} -O bam -o "${output_basename}.bam" "${output_basename}.sam" + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File log_file = "${output_basename}.log" + File met_file = "${output_basename}.hisat2.met.txt" + File output_bam = "${output_basename}.bam" + } +} + +task HISAT2RSEM { + File hisat2_ref + File fastq1 + File fastq2 + String ref_name + String output_basename + String sample_name + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + Int machine_mem_mb = 15000 + Int cpu = 4 + # use provided disk number or dynamically size on our own, 10 is our zipped fastq -> bam conversion with 50GB of additional disk + Int disk = ceil((size(fastq1, "GB") + size(fastq2, "GB") * 10) + size(hisat2_ref, "GB") + 50) + Int preemptible = 5 + + meta { + description: "This HISAT2 alignment task will align paired-end fastq reads to transcriptome only. " + } + + parameter_meta { + hisat2_ref: "HISAT2 reference" + fastq1: "gz forward fastq file" + fastq2: "gz reverse fastq file" + ref_name: "the basename of the index for the reference genome" + output_basename: "basename used for output files" + sample_name: "sample name of input" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + set -e + + # fix names if necessary. + if [ "${fastq1}" != *.fastq.gz ]; then + FQ1=${fastq1}.fastq.gz + mv ${fastq1} ${fastq1}.fastq.gz + else + FQ1=${fastq1} + fi + + if [ "${fastq2}" != *.fastq.gz ]; then + FQ2=${fastq2}.fastq.gz + mv ${fastq2} ${fastq2}.fastq.gz + else + FQ2=${fastq2} + fi + + tar --no-same-owner -xvf "${hisat2_ref}" + + # increase gap alignment penalty to avoid gap alignment + # --mp 1,1 --np 1 --score-min L,0,-0.1 is default paramesters when rsem runs alignment by using bowtie2/Bowtie + # --mp 1,1 and --np 1 will reduce mismatching penalty to 1 for all. + # with no-splice-alignment no-softclip no-mixed options on, HISAT2 will only output concordant alignment without soft-cliping + # --rdg 99999999,99999999 and --rfg 99999999,99999999 will give an infinity penalty to alignment with indel.As results + # no indel/gaps in alignments + hisat2 -t \ + -x ${ref_name}/${ref_name} \ + -1 $FQ1 \ + -2 $FQ2 \ + --rg-id=${sample_name} --rg SM:${sample_name} --rg LB:${sample_name} \ + --rg PL:ILLUMINA --rg PU:${sample_name} \ + --new-summary --summary-file ${output_basename}.log \ + --met-file ${output_basename}.hisat2.met.txt --met 5 \ + -k 10 \ + --mp 1,1 \ + --np 1 \ + --score-min L,0,-0.1 \ + --secondary \ + --no-mixed \ + --no-softclip \ + --no-discordant \ + --rdg 99999999,99999999 \ + --rfg 99999999,99999999 \ + --no-spliced-alignment \ + --seed 12345 \ + -p ${cpu} -S ${output_basename}.sam + samtools view -bS "${output_basename}.sam" > "${output_basename}.bam" + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File log_file = "${output_basename}.log" + File met_file = "${output_basename}.hisat2.met.txt" + File output_bam = "${output_basename}.bam" + } +} + +task HISAT2SingleEnd { + File hisat2_ref + File fastq + String ref_name + String output_basename + String sample_name + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + Int machine_mem_mb = 15000 + Int cpu = 4 + # use provided disk number or dynamically size on our own, 10 is our zipped fastq -> bam conversion with 50GB of additional disk + Int disk = ceil((size(fastq, "GB") * 10) + size(hisat2_ref, "GB") + 50) + Int preemptible = 5 + + meta { + description: "This HISAT2 alignment task will align single-end fastq reads to reference genome." + } + + parameter_meta { + hisat2_ref: "HISAT2 reference" + fastq: "input fastq from single ended data" + ref_name: "the basename of the index for the reference genome" + output_basename: "basename used for output files" + sample_name: "sample name of input" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + set -e + tar --no-same-owner -xvf "${hisat2_ref}" + hisat2 -t \ + -x ${ref_name}/${ref_name} \ + -U ${fastq} \ + --rg-id=${sample_name} --rg SM:${sample_name} --rg LB:${sample_name} \ + --rg PL:ILLUMINA --rg PU:${sample_name} \ + --new-summary --summary-file "${output_basename}.log" \ + --met-file ${output_basename}.hisat2.met.txt --met 5 \ + --seed 12345 \ + -p ${cpu} -S ${output_basename}.sam + samtools sort -@ ${cpu} -O bam -o "${output_basename}.bam" "${output_basename}.sam" + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File log_file ="${output_basename}.log" + File met_file ="${output_basename}.hisat2.met.txt" + File output_bam = "${output_basename}.bam" + } +} + +task HISAT2InspectIndex { + File hisat2_ref + String ref_name + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + Int machine_mem_mb = 3500 + Int cpu = 1 + # use provided disk number or dynamically size on our own, with 10GB of additional disk + Int disk = ceil(size(hisat2_ref, "GB") + 10) + Int preemptible = 5 + + meta { + description: "This task will test reference indexing files built for HISAT2 aligner." + } + + parameter_meta { + hisat2_ref: "HISAT2 reference" + ref_name: "the basename of the index for the reference genome" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + set -e + tar --no-same-owner -xvf "${hisat2_ref}" + hisat2-inspect --ss --snp \ + -s ${ref_name}/${ref_name} > hisat2_inspect.log + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File log_file ="hisat2_inspect.log" + } +} diff --git a/centaur/src/main/resources/integrationTestCases/hca/Picard.wdl b/centaur/src/main/resources/integrationTestCases/hca/Picard.wdl new file mode 100644 index 00000000000..b5a32222b6b --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/Picard.wdl @@ -0,0 +1,192 @@ +task CollectMultipleMetrics { + File aligned_bam + File genome_ref_fasta + String output_basename + + # runtime values + String docker ="quay.io/humancellatlas/secondary-analysis-picard:v0.2.2-2.10.10" + Int machine_mem_mb = 7500 + # give the command 1 GB of overhead + Int command_mem_mb = machine_mem_mb - 1000 + Int cpu = 1 + # use provided disk number or dynamically size on our own, with 10GB of additional disk + Int disk = ceil(size(aligned_bam, "GB") + size(genome_ref_fasta, "GB") + 10) + Int preemptible = 5 + + meta { + description: "This Picard task will collect multiple QC metrics, such as CollectAlignmentSummaryMetrics and CollectInsertSizeMetrics." + } + + parameter_meta { + aligned_bam: "input aligned bam" + genome_ref_fasta: "genome reference fasta" + output_basename: "basename used for output files" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + java -Xmx${command_mem_mb}m -jar /usr/picard/picard.jar CollectMultipleMetrics \ + VALIDATION_STRINGENCY=SILENT \ + METRIC_ACCUMULATION_LEVEL=ALL_READS \ + INPUT="${aligned_bam}" \ + OUTPUT="${output_basename}" \ + FILE_EXTENSION=".txt" \ + PROGRAM=null \ + PROGRAM=CollectAlignmentSummaryMetrics \ + PROGRAM=CollectInsertSizeMetrics \ + PROGRAM=CollectGcBiasMetrics \ + PROGRAM=CollectBaseDistributionByCycle \ + PROGRAM=QualityScoreDistribution \ + PROGRAM=MeanQualityByCycle \ + PROGRAM=CollectSequencingArtifactMetrics \ + PROGRAM=CollectQualityYieldMetrics \ + REFERENCE_SEQUENCE="${genome_ref_fasta}" \ + ASSUME_SORTED=true + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File alignment_summary_metrics = "${output_basename}.alignment_summary_metrics.txt" + File base_call_dist_metrics = "${output_basename}.base_distribution_by_cycle_metrics.txt" + File base_call_pdf = "${output_basename}.base_distribution_by_cycle.pdf" + File gc_bias_detail_metrics = "${output_basename}.gc_bias.detail_metrics.txt" + File gc_bias_dist_pdf = "${output_basename}.gc_bias.pdf" + File gc_bias_summary_metrics = "${output_basename}.gc_bias.summary_metrics.txt" + File insert_size_hist = "${output_basename}.insert_size_histogram.pdf" + File insert_size_metrics = "${output_basename}.insert_size_metrics.txt" + File quality_distribution_metrics = "${output_basename}.quality_distribution_metrics.txt" + File quality_distribution_dist_pdf = "${output_basename}.quality_distribution.pdf" + File quality_by_cycle_metrics = "${output_basename}.quality_by_cycle_metrics.txt" + File quality_by_cycle_pdf = "${output_basename}.quality_by_cycle.pdf" + File pre_adapter_details_metrics = "${output_basename}.pre_adapter_detail_metrics.txt" + File pre_adapter_summary_metrics = "${output_basename}.pre_adapter_summary_metrics.txt" + File bait_bias_detail_metrics = "${output_basename}.bait_bias_detail_metrics.txt" + File bait_bias_summary_metrics = "${output_basename}.bait_bias_summary_metrics.txt" + File error_summary_metrics = "${output_basename}.error_summary_metrics.txt" + } +} + +task CollectRnaMetrics { + File aligned_bam + File ref_flat + File rrna_intervals + String output_basename + String stranded + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-picard:v0.2.2-2.10.10" + Int machine_mem_mb = 3500 + # give the command 500 MB of overhead + Int command_mem_mb = machine_mem_mb - 500 + Int cpu = 1 + # use provided disk number or dynamically size on our own, with 10GB of additional disk + Int disk = ceil(size(aligned_bam, "GB") + size(ref_flat, "GB") + size(rrna_intervals, "GB") + 10) + Int preemptible = 5 + + meta { + description: "This Picard task will collect RnaSeqMetrics." + } + + parameter_meta { + aligned_bam: "input aligned file" + ref_flat: "reference flat file" + rrna_intervals: "ribosomal intervals" + output_basename: "basename used for output files" + stranded: "whether or not to use strand specificity" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + set -e + java -Xmx${command_mem_mb}m -jar /usr/picard/picard.jar CollectRnaSeqMetrics \ + VALIDATION_STRINGENCY=SILENT \ + METRIC_ACCUMULATION_LEVEL=ALL_READS \ + INPUT="${aligned_bam}" \ + OUTPUT="${output_basename}.rna_metrics.txt" \ + REF_FLAT="${ref_flat}" \ + RIBOSOMAL_INTERVALS="${rrna_intervals}" \ + STRAND_SPECIFICITY=${stranded} \ + CHART_OUTPUT="${output_basename}.rna.coverage.pdf" + touch "${output_basename}.rna.coverage.pdf" + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File rna_metrics = "${output_basename}.rna_metrics.txt" + File rna_coverage_pdf = "${output_basename}.rna.coverage.pdf" + } +} + +# Here we use "-XX:ParallelGCThreads=2" to run MarkDuplication on multiple threads +task CollectDuplicationMetrics { + File aligned_bam + String output_basename + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-picard:v0.2.2-2.10.10" + Int machine_mem_mb = 7500 + # give the command 1 GB of overhead + Int command_mem_mb = machine_mem_mb - 1000 + Int cpu = 2 + # use provided disk number or dynamically size on our own, with 10GB of additional disk + Int disk = ceil(size(aligned_bam, "GB") + 10) + Int preemptible = 5 + + meta { + description: "This Picard task will collect alignment DuplicationMetrics." + } + + parameter_meta { + aligned_bam: "input aligned bam" + output_basename: "basename used for output files" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + java -Xmx${command_mem_mb}m -XX:ParallelGCThreads=${cpu} -jar /usr/picard/picard.jar MarkDuplicates \ + VALIDATION_STRINGENCY=SILENT \ + INPUT=${aligned_bam} \ + OUTPUT="${output_basename}.MarkDuplicated.bam" \ + ASSUME_SORTED=true \ + METRICS_FILE="${output_basename}.duplicate_metrics.txt" \ + REMOVE_DUPLICATES=false + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File dedup_metrics = "${output_basename}.duplicate_metrics.txt" + } +} diff --git a/centaur/src/main/resources/integrationTestCases/hca/RSEM.wdl b/centaur/src/main/resources/integrationTestCases/hca/RSEM.wdl new file mode 100644 index 00000000000..44b4e5f2a6b --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/RSEM.wdl @@ -0,0 +1,61 @@ +task RSEMExpression { + File trans_aligned_bam + File rsem_genome + String output_basename + + # runtime values + String docker = "quay.io/humancellatlas/secondary-analysis-rsem:v0.2.2-1.3.0" + Int machine_mem_mb = 3500 + Int cpu = 4 + # use provided disk number or dynamically size on our own, with 20GB of additional disk + Int disk = ceil(size(trans_aligned_bam, "GB") + size(rsem_genome, "GB") + 20) + Int preemptible = 5 + + meta { + description: "This task will quantify gene expression matrix by using RSEM. The output include gene-level and isoform-level results." + } + + parameter_meta { + trans_aligned_bam: "input transcriptome aligned bam" + rsem_genome: "tar'd RSEM genome" + output_basename: "basename used for output files" + docker: "(optional) the docker image containing the runtime environment for this task" + machine_mem_mb: "(optional) the amount of memory (MB) to provision for this task" + cpu: "(optional) the number of cpus to provision for this task" + disk: "(optional) the amount of disk space (GB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command { + set -e + + tar --no-same-owner -xvf ${rsem_genome} + rsem-calculate-expression \ + --bam \ + --paired-end \ + -p ${cpu} \ + --time --seed 555 \ + --calc-pme \ + --single-cell-prior \ + ${trans_aligned_bam} \ + rsem/rsem_trans_index \ + "${output_basename}" + } + + runtime { + docker: docker + memory: "${machine_mem_mb} MB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible + } + + output { + File rsem_gene = "${output_basename}.genes.results" + File rsem_isoform = "${output_basename}.isoforms.results" + File rsem_time = "${output_basename}.time" + File rsem_cnt = "${output_basename}.stat/${output_basename}.cnt" + File rsem_model = "${output_basename}.stat/${output_basename}.model" + File rsem_theta = "${output_basename}.stat/${output_basename}.theta" + } +} diff --git a/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.inputs.json b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.inputs.json new file mode 100644 index 00000000000..bdcce3dc11a --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.inputs.json @@ -0,0 +1,17 @@ +{ + "SmartSeq2SingleCell.gtf_file": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode.v27.primary_assembly.annotation.gtf", + "SmartSeq2SingleCell.hisat2_ref_trans_name": "gencode_v27_trans_rsem", + "SmartSeq2SingleCell.rrna_intervals": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode.v27.rRNA.interval_list", + "SmartSeq2SingleCell.star_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_GencodeV27_Primary.tar", + "SmartSeq2SingleCell.hisat2_ref_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/genome_snp_tran.tar.gz", + "SmartSeq2SingleCell.genome_ref_fasta": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38.primary_assembly.genome.fa", + "SmartSeq2SingleCell.hisat2_ref_trans_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/gencode_v27_trans_rsem.tar.gz", + "SmartSeq2SingleCell.rsem_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode_v27_primary.tar", + "SmartSeq2SingleCell.gene_ref_flat": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_gencode.v27.refFlat.txt", + "SmartSeq2SingleCell.hisat2_ref_name": "genome_snp_tran", + "SmartSeq2SingleCell.stranded":"NONE", + "SmartSeq2SingleCell.fastq1":"gs://hca-dcp-mint-test-data/smartseq2_single_sample/patel_ap/SRR1294925_1.fastq.gz", + "SmartSeq2SingleCell.fastq2":"gs://hca-dcp-mint-test-data/smartseq2_single_sample/patel_ap/SRR1294925_2.fastq.gz", + "SmartSeq2SingleCell.sample_name":"SRR1294925", + "SmartSeq2SingleCell.output_name":"SRR1294925" +} diff --git a/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.labels.json b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.labels.json new file mode 100644 index 00000000000..40a3a0206e7 --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.labels.json @@ -0,0 +1,5 @@ +{ + "project":"mint-pipeline-dev", + "pipeline":"ss2", + "name":"pipeline" +} diff --git a/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.wdl b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.wdl new file mode 100644 index 00000000000..5f7c7033a61 --- /dev/null +++ b/centaur/src/main/resources/integrationTestCases/hca/SmartSeq2SingleSample/SmartSeq2SingleSample.wdl @@ -0,0 +1,138 @@ +import "HISAT2.wdl" as HISAT2 +import "Picard.wdl" as Picard +import "RSEM.wdl" as RSEM + +workflow SmartSeq2SingleCell { + meta { + description: "Process SmartSeq2 scRNA-Seq data, include reads alignment, QC metrics collection, and gene expression quantitication" + } + + # load annotation + File gtf_file + File genome_ref_fasta + File rrna_intervals + File gene_ref_flat + + # load index + File hisat2_ref_index + File hisat2_ref_trans_index + File rsem_ref_index + + # ref index name + String hisat2_ref_name + String hisat2_ref_trans_name + + # samples + String stranded + String sample_name + String output_name + File fastq1 + File fastq2 + + parameter_meta { + gtf_file: "Gene annotation file in gtf format" + genome_ref_fasta: "Genome reference in fasta format" + rrna_intervals: "rRNA interval file required by Picard" + gene_ref_flat: "Gene refflat file required by Picard" + hisat2_ref_index: "HISAT2 reference index file in tarball" + hisat2_ref_trans_index: "HISAT2 transcriptome index file in tarball" + rsem_ref_index: "RSEM reference index file in tarball" + hisat2_ref_name: "HISAT2 reference index name" + hisat2_ref_trans_name: "HISAT2 transcriptome index file name" + stranded: "Library strand information example values: FR RF NONE" + sample_name: "Sample name or Cell ID" + output_name: "Output name, can include path" + fastq1: "R1 in paired end reads" + fastq2: "R2 in paired end reads" + } + + String quality_control_output_basename = output_name + "_qc" + + call HISAT2.HISAT2PairedEnd { + input: + hisat2_ref = hisat2_ref_index, + fastq1 = fastq1, + fastq2 = fastq2, + ref_name = hisat2_ref_name, + sample_name = sample_name, + output_basename = quality_control_output_basename + } + + call Picard.CollectMultipleMetrics { + input: + aligned_bam = HISAT2PairedEnd.output_bam, + genome_ref_fasta = genome_ref_fasta, + output_basename = quality_control_output_basename + } + + call Picard.CollectRnaMetrics { + input: + aligned_bam = HISAT2PairedEnd.output_bam, + ref_flat = gene_ref_flat, + rrna_intervals = rrna_intervals, + output_basename = quality_control_output_basename, + stranded = stranded, + } + + call Picard.CollectDuplicationMetrics { + input: + aligned_bam = HISAT2PairedEnd.output_bam, + output_basename = quality_control_output_basename + } + + String data_output_basename = output_name + "_rsem" + + call HISAT2.HISAT2RSEM as HISAT2Transcriptome { + input: + hisat2_ref = hisat2_ref_trans_index, + fastq1 = fastq1, + fastq2 = fastq2, + ref_name = hisat2_ref_trans_name, + sample_name = sample_name, + output_basename = data_output_basename, + } + + call RSEM.RSEMExpression { + input: + trans_aligned_bam = HISAT2Transcriptome.output_bam, + rsem_genome = rsem_ref_index, + output_basename = data_output_basename, + } + + output { + # quality control outputs + File aligned_bam = HISAT2PairedEnd.output_bam + File hisat2_met_file = HISAT2PairedEnd.met_file + File hisat2_log_file = HISAT2PairedEnd.log_file + File alignment_summary_metrics = CollectMultipleMetrics.alignment_summary_metrics + File base_call_dist_metrics = CollectMultipleMetrics.base_call_dist_metrics + File base_call_pdf = CollectMultipleMetrics.base_call_pdf + File gc_bias_detail_metrics = CollectMultipleMetrics.gc_bias_detail_metrics + File gc_bias_dist_pdf = CollectMultipleMetrics.gc_bias_dist_pdf + File gc_bias_summary_metrics = CollectMultipleMetrics.gc_bias_summary_metrics + File insert_size_hist = CollectMultipleMetrics.insert_size_hist + File insert_size_metrics = CollectMultipleMetrics.insert_size_metrics + File quality_distribution_metrics = CollectMultipleMetrics.quality_distribution_metrics + File quality_distribution_dist_pdf = CollectMultipleMetrics.quality_distribution_dist_pdf + File quality_by_cycle_metrics = CollectMultipleMetrics.quality_by_cycle_metrics + File quality_by_cycle_pdf = CollectMultipleMetrics.quality_by_cycle_pdf + File pre_adapter_details_metrics = CollectMultipleMetrics.pre_adapter_details_metrics + File bait_bias_detail_metrics = CollectMultipleMetrics.bait_bias_detail_metrics + File bait_bias_summary_metrics = CollectMultipleMetrics.bait_bias_summary_metrics + File error_summary_metrics = CollectMultipleMetrics.error_summary_metrics + File rna_metrics = CollectRnaMetrics.rna_metrics + File rna_coverage = CollectRnaMetrics.rna_coverage_pdf + File dedup_metrics = CollectDuplicationMetrics.dedup_metrics + + # data outputs + File aligned_transcriptome_bam = HISAT2Transcriptome.output_bam + File hisat2_transcriptome_met_file = HISAT2Transcriptome.met_file + File hisat2_transcriptome_log_file = HISAT2Transcriptome.log_file + File rsem_gene_results = RSEMExpression.rsem_gene + File rsem_isoform_results = RSEMExpression.rsem_isoform + File rsem_time_log = RSEMExpression.rsem_time + File rsem_cnt_log = RSEMExpression.rsem_cnt + File rsem_model_log = RSEMExpression.rsem_model + File rsem_theta_log = RSEMExpression.rsem_theta + } +} diff --git a/centaur/src/main/resources/standardTestCases/custom_mount_point.test b/centaur/src/main/resources/standardTestCases/custom_mount_point.test index c47ac44e498..1b0a787450a 100644 --- a/centaur/src/main/resources/standardTestCases/custom_mount_point.test +++ b/centaur/src/main/resources/standardTestCases/custom_mount_point.test @@ -9,4 +9,7 @@ files { metadata { "calls.custom_mount_point.t.backend": "Papi" "calls.custom_mount_point.t.backendStatus": "Success" + + "outputs.custom_mount_point.o1": "bazqux" + "outputs.custom_mount_point.o2": "foobar" } diff --git a/centaur/src/main/resources/standardTestCases/custom_mount_point/custom_mount_point.wdl b/centaur/src/main/resources/standardTestCases/custom_mount_point/custom_mount_point.wdl index 0001d38f31d..3a713b22859 100644 --- a/centaur/src/main/resources/standardTestCases/custom_mount_point/custom_mount_point.wdl +++ b/centaur/src/main/resources/standardTestCases/custom_mount_point/custom_mount_point.wdl @@ -1,30 +1,35 @@ -# 1) Mounting a SSD to a custom location +# 1) Mounting a SSD to a custom location and default location with custom disk size # 2) Write a file to that mount # 3) Changing directory within a command shouldn't break Cromwell # 4) Use the file that was written on the mount as an output +# ChrisTM task t { - String version + String v command { + echo "bazqux" > some_file + cd /some/mnt echo "foobar" > some_file } output { + String out1 = read_string("some_file") String out2 = read_string("/some/mnt/some_file") } runtime { - docker: "ubuntu:" + version + docker: "ubuntu:" + v disks: "local-disk 20 SSD, /some/mnt 20 SSD" } } workflow custom_mount_point { - call t {input: version="latest"} + call t {input: v="latest"} output { - t.out2 + String o1 = t.out1 + String o2 = t.out2 } } diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_json.test b/centaur/src/main/resources/standardTestCases/cwl_input_json.test new file mode 100644 index 00000000000..73799c89c49 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/cwl_input_json.test @@ -0,0 +1,17 @@ +name: cwl_input_json +testFormat: workflowsuccess +workflowType: CWL +workflowTypeVersion: v1.0 +backendsMode: "only" +workflowRoot: cwl_input_json +backends: [Local, LocalNoDocker] +tags: [localdockertest] + +files { + workflow: cwl_input_json/cwl_input_json.yaml +} + +metadata { + "submittedFiles.workflowType": CWL + "submittedFiles.workflowTypeVersion": v1.0 +} diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_json/cwl_input_json.yaml b/centaur/src/main/resources/standardTestCases/cwl_input_json/cwl_input_json.yaml new file mode 100644 index 00000000000..f6e5616c6c4 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/cwl_input_json/cwl_input_json.yaml @@ -0,0 +1,70 @@ +cwlVersion: v1.0 +$graph: +- id: cwl_input_json + class: Workflow + inputs: [] + outputs: + final_output: + type: File + outputSource: round/output_file + steps: + make: + run: "#makefile" + in: [] + out: [fileoutput] + round: + run: "#roundtrip" + in: + input_record: + source: "make/fileoutput" + out: [output_file] + +- id: makefile + class: CommandLineTool + requirements: + - class: ShellCommandRequirement + - class: InlineJavascriptRequirement + - class: DockerRequirement + dockerPull: "ubuntu:latest" + inputs: [] + outputs: + fileoutput: + type: + fields: + - name: input_file + type: File + name: input_record + type: record + arguments: + - valueFrom: > + echo foo > foo && echo '{ "fileoutput": { "input_file": {"path": "$(runtime.outdir)/foo", "class": "File"} } }' > cwl.output.json + shellQuote: false + +- id: roundtrip + class: CommandLineTool + hints: + - class: DockerRequirement + dockerPull: "stedolan/jq:latest" + inputs: + - id: input_record + type: + fields: + - name: input_file + type: File + name: input_record + type: record + outputs: + - id: output_file + type: File + requirements: + - class: ShellCommandRequirement + - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + - entry: $(JSON.stringify(inputs)) + entryname: cwl.inputs.json + arguments: + # Round-trips the file referenced in cwl.input.json to cwl.output.json. Also ls it in the command to make sure it's there. + - valueFrom: > + INPUT_FILE=\$(cat cwl.inputs.json | jq -r '.. | .path? // empty') && ls $INPUT_FILE && echo "{\"output_file\": {\"path\": \"\$INPUT_FILE\", \"class\": \"File\"} }" > cwl.output.json + shellQuote: false diff --git a/centaur/src/main/resources/standardTestCases/cwl_resources_papiv2.test b/centaur/src/main/resources/standardTestCases/cwl_resources_papiv2.test index 5bae975003c..1fa83c566af 100644 --- a/centaur/src/main/resources/standardTestCases/cwl_resources_papiv2.test +++ b/centaur/src/main/resources/standardTestCases/cwl_resources_papiv2.test @@ -8,7 +8,7 @@ files { metadata { status: Succeeded - "outputs.machineTypeTool.machine_type": "projects/1005074806481/machineTypes/custom-2-7168" + "outputs.machineTypeTool.machine_type": "projects/1005074806481/machineTypes/custom-2-6912" } workflowType: CWL diff --git a/centaur/src/main/resources/standardTestCases/draft3_nio_file.test b/centaur/src/main/resources/standardTestCases/draft3_nio_file.test new file mode 100644 index 00000000000..b6d0bb1751f --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/draft3_nio_file.test @@ -0,0 +1,24 @@ +name: draft3_nio_file +testFormat: workflowsuccess +workflowType: WDL +workflowTypeVersion: 1.0 +tags: ["wdl_1.0"] +backends: [Papi] + +files { + workflow: wdl_draft3/draft3_nio_file/draft3_nio_file.wdl +} + +metadata { + workflowName: draft3_nio_file + status: Succeeded + "outputs.draft3_nio_file.f_path_prefix": "gs://" + "outputs.draft3_nio_file.g_path_prefix": "gs://" + "outputs.draft3_nio_file.h_path_prefix": "gs://" + "outputs.draft3_nio_file.x_path_prefix": "gs://" + "outputs.draft3_nio_file.y_path_prefix": "gs://" + "outputs.draft3_nio_file.errors": "" + + "calls.draft3_nio_file.cc_nio_file.callCaching.hit": true + "calls.draft3_nio_file.non_cc_nio_file.callCaching.hit": false +} diff --git a/centaur/src/main/resources/standardTestCases/draft3_optional_input_from_scatter.test b/centaur/src/main/resources/standardTestCases/draft3_optional_input_from_scatter.test new file mode 100644 index 00000000000..3ec4921484e --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/draft3_optional_input_from_scatter.test @@ -0,0 +1,17 @@ +name: draft3_optional_input_from_scatter +testFormat: workflowsuccess +workflowType: WDL +workflowTypeVersion: 1.0 +tags: ["wdl_1.0"] + +files { + workflow: wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.wdl + inputs: wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.inputs.json +} + +metadata { + workflowName: draft3_optional_input_from_scatter + status: Succeeded + + "outputs.draft3_optional_input_from_scatter.unpacked_out": "hello1 hello2 hello3 hello4" +} diff --git a/centaur/src/main/resources/standardTestCases/draft3_short_circuit.test b/centaur/src/main/resources/standardTestCases/draft3_short_circuit.test new file mode 100644 index 00000000000..8688978fa68 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/draft3_short_circuit.test @@ -0,0 +1,19 @@ +name: draft3_short_circuit +testFormat: workflowsuccess + +backends: [Local] +tags: [localdockertest] + + +files { + workflow: wdl_draft3/draft3_short_circuit/draft3_short_circuit.wdl +} + +metadata { + workflowName: draft3_short_circuit + status: Succeeded + "outputs.draft3_short_circuit.a_out": null, + "outputs.draft3_short_circuit.b_out": 1, + "outputs.draft3_short_circuit.c_out": 1, + "outputs.draft3_short_circuit.d_out": 1 +} diff --git a/centaur/src/main/resources/standardTestCases/final_call_logs_dir_jes.test b/centaur/src/main/resources/standardTestCases/final_call_logs_dir_jes.test index fabcac974a6..0dcd5c8cc8b 100644 --- a/centaur/src/main/resources/standardTestCases/final_call_logs_dir_jes.test +++ b/centaur/src/main/resources/standardTestCases/final_call_logs_dir_jes.test @@ -12,8 +12,8 @@ files { fileSystemCheck: "gcs" outputExpectations { - "gs://cloud-cromwell-dev/callLogs/wf_hello/<>/call-hello/hello-stdout.log": 1 - "gs://cloud-cromwell-dev/callLogs/wf_hello/<>/call-hello/hello-stderr.log": 1 + "gs://cloud-cromwell-dev/callLogs/wf_hello/<>/call-hello/stdout": 1 + "gs://cloud-cromwell-dev/callLogs/wf_hello/<>/call-hello/stderr": 1 "gs://cloud-cromwell-dev/callLogs/wf_hello/<>/call-hello/hello.log": 1 "gs://cloud-cromwell-dev/workflowLogDir/workflow.<>.log": 1 "gs://cloud-cromwell-dev/outputs/wf_hello/<>/call-hello/test.out": 1 diff --git a/centaur/src/main/resources/standardTestCases/input_expressions.test b/centaur/src/main/resources/standardTestCases/input_expressions.test new file mode 100644 index 00000000000..f26a0a35aeb --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/input_expressions.test @@ -0,0 +1,11 @@ +name: input_expressions +testFormat: workflowsuccess +backends: [Papi, Local] + +files { + workflow: input_expressions/input_expressions.wdl +} + +metadata { + "outputs.input_expressions.wf_out": 256.0 +} diff --git a/centaur/src/main/resources/standardTestCases/input_expressions/input_expressions.wdl b/centaur/src/main/resources/standardTestCases/input_expressions/input_expressions.wdl new file mode 100644 index 00000000000..e3ac91ddab1 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/input_expressions/input_expressions.wdl @@ -0,0 +1,6 @@ +workflow input_expressions { + Float size256k = size("gs://cloud-cromwell-dev/file_256k", "KiB") + output { + Float wf_out = size256k + } +} diff --git a/centaur/src/main/resources/standardTestCases/read_tsv.test b/centaur/src/main/resources/standardTestCases/read_tsv.test index f1a0c90ca6a..75e8bd0260c 100644 --- a/centaur/src/main/resources/standardTestCases/read_tsv.test +++ b/centaur/src/main/resources/standardTestCases/read_tsv.test @@ -1,5 +1,8 @@ name: read_tsv testFormat: workflowsuccess +backendsMode: "only" +backends: [Local, LocalNoDocker] +tags: [localdockertest] files { workflow: read_tsv/read_tsv.wdl diff --git a/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_nio_file/draft3_nio_file.wdl b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_nio_file/draft3_nio_file.wdl new file mode 100644 index 00000000000..08a05fbccf0 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_nio_file/draft3_nio_file.wdl @@ -0,0 +1,145 @@ +version 1.0 + +workflow draft3_nio_file { + call mk_files { input: salt = 0 } + call mk_files as mk_more_files { input: salt = 1 } + + call nio_file { input: + ready = true, + f = mk_files.f, + h = mk_files.h, + x = mk_files.x, + y = mk_files.y + } + + call nio_file as cc_nio_file { input: + ready = nio_file.done, + f = mk_files.f, + h = mk_files.h, + x = mk_files.x, + y = mk_files.y + } + + call nio_file as non_cc_nio_file { input: + ready = nio_file.done, + f = mk_more_files.f, + h = mk_more_files.h, + x = mk_more_files.x, + y = mk_more_files.y + } + + output { + String f_path_prefix = nio_file.result[0] + String g_path_prefix = nio_file.result[1] + String h_path_prefix = nio_file.result[2] + String x_path_prefix = nio_file.result[3] + String y_path_prefix = nio_file.result[4] + + String errors = nio_file.errors + } +} + +struct FileBox { + File val0 + File val1 +} + +task mk_files { + + parameter_meta { + salt: "Shakes things up a little bit!" + } + + input { + Int salt + } + + command { + echo "f~{salt}" > f + echo "h~{salt}" > h + echo "x0~{salt}" > x0 + echo "x1~{salt}" > x1 + echo "y0~{salt}" > y0 + echo "y1~{salt}" > y1 + } + + runtime { + docker: "ubuntu:latest" + } + + output { + File f = "f" + File h = "h" + Array[File] x = ["x0", "x1"] + FileBox y = object { val0: "y0", val1: "y1" } + } +} + +task nio_file { + + meta { + description: "Analyzes whether (a) the interpolated paths for NIO files start with 'gs://' and (b) the inputs are localized" + } + parameter_meta { + ready: "Allows us to delay until a previous task is done" + f: { localization_optional: true } + g: { localization_optional: true } + h: { + description: "Only here to check that we can have fields before the 'nio'", + localization_optional: true, + after: "... and after it..." + } + x: { localization_optional: true } + y: { localization_optional: true } + done: "Always true, can be chained into the 'ready' of a subsequent invocation" + } + + input { + Boolean ready + File f + File g = f + File? h + Array[File] x + FileBox y + } + + command { + + # Check that the paths provided were cloud: + echo ~{f} | cut -c 1-5 + echo ~{g} | cut -c 1-5 + echo ~{h} | cut -c 1-5 + echo ~{x[0]} | cut -c 1-5 + echo ~{y.val1} | cut -c 1-5 + + # Check that the NIO files were not localized + INSTANCE=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/name" -H "Metadata-Flavor: Google") + TOKEN=$(gcloud auth application-default print-access-token) + INSTANCE_INFO=$(curl "https://www.googleapis.com/compute/v1/projects/broad-dsde-cromwell-dev/zones/us-central1-c/instances/$INSTANCE" -H "Authorization: Bearer $TOKEN" -H 'Accept: application/json') + OPERATION_LINE=$(grep Operation <<< $INSTANCE_INFO) + OPERATION_ID=$(sed 's/.*Operation: \([^ ]*\)".*/\1/' <<< $OPERATION_LINE) + gcloud components install alpha + PAPI_METADATA=$(gcloud --quiet alpha genomics operations describe operations/$OPERATION_ID) + echo $PAPI_METADATA > papi_metadata + + touch errors.txt + grep -q "draft3_nio_file.nio_file.f" <<< $PAPI_METADATA && echo "f was incorrectly localized" >> errors.txt + grep -q "draft3_nio_file.nio_file.g" <<< $PAPI_METADATA && echo "g was incorrectly localized" >> errors.txt + grep -q "draft3_nio_file.nio_file.__g" <<< $PAPI_METADATA && echo "__g was incorrectly localized" >> errors.txt + grep -q "draft3_nio_file.nio_file.h" <<< $PAPI_METADATA && echo "h was incorrectly localized" >> errors.txt + grep -q "draft3_nio_file.nio_file.x" <<< $PAPI_METADATA && echo "x was incorrectly localized" >> errors.txt + grep -q "draft3_nio_file.nio_file.y" <<< $PAPI_METADATA && echo "y was incorrectly localized" >> errors.txt + } + runtime { + docker: "google/cloud-sdk:alpine" + zones: ["us-central1-c"] + + # Depending on the final 'grep', the return code is probably going to be '1'... which is fine! + continueOnReturnCode: true + } + output { + Array[String] result = read_lines(stdout()) + String errors = read_string("errors.txt") + Boolean done = true + } +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.inputs.json b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.inputs.json new file mode 100644 index 00000000000..36081acf4cd --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.inputs.json @@ -0,0 +1,4 @@ +{ + "draft3_optional_input_from_scatter.input_default_not_used.greeting1": "hello1", + "draft3_optional_input_from_scatter.input_default_not_used.greeting3": "hello3" +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.wdl b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.wdl new file mode 100644 index 00000000000..58a1c4a4625 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_optional_input_from_scatter/draft3_optional_input_from_scatter.wdl @@ -0,0 +1,35 @@ +version 1.0 + +workflow draft3_optional_input_from_scatter { + scatter (x in [0]) { + scatter (y in [0]) { + scatter (z in [0]) { + call input_default_not_used + } + } + } + + output { + String unpacked_out = input_default_not_used.out[0][0][0] + } +} + +task input_default_not_used { + parameter_meta { + greeting1: "Input with default; replaced in inputs.json" + greeting1: "Unsupplied input, no default; set on the command line" + greeting3: "Input with upstream default; replaced in inputs.json" + greeting4: "Input with default; default is not replaced" + } + input { + String greeting1 = "replace me" + String? greeting2 + String greeting3 = greeting1 + String greeting4 = "hello4" + } + command { echo ~{greeting1} ~{default="hello2" greeting2} ~{greeting3} ~{greeting4} } + runtime { docker: "ubuntu:latest" } + output { + String out = read_string(stdout()) + } +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_short_circuit/draft3_short_circuit.wdl b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_short_circuit/draft3_short_circuit.wdl new file mode 100644 index 00000000000..36d11e4ba78 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_draft3/draft3_short_circuit/draft3_short_circuit.wdl @@ -0,0 +1,25 @@ +version 1.0 + +## +## A set of expressions which would cause errors, except luckily they're short-circuited +## +workflow draft3_short_circuit { + + if (false && 1/0 == 0) { + Int a = 1 + } + + if (true || 1/0 == 0) { + Int b = 1 + } + + Int c = if true then 1 else 1/0 + Int d = if false then 1/0 else 1 + + output { + Int? a_out = a + Int? b_out = b + Int c_out = c + Int d_out = d + } +} diff --git a/centaurCwlRunner/src/main/scala/centaur/cwl/OutputManipulator.scala b/centaurCwlRunner/src/main/scala/centaur/cwl/OutputManipulator.scala index ea7ee79513b..7b1e208cf64 100644 --- a/centaurCwlRunner/src/main/scala/centaur/cwl/OutputManipulator.scala +++ b/centaurCwlRunner/src/main/scala/centaur/cwl/OutputManipulator.scala @@ -1,5 +1,6 @@ package centaur.cwl +import common.util.StringUtil._ import cromwell.core.path.{Path, PathBuilder} import cwl.command.ParentName import cwl.ontology.Schema @@ -31,11 +32,15 @@ object OutputManipulator extends Poly1 { private def stringToFile(pathAsString: String, pathBuilder: PathBuilder): Json = { val path = pathBuilder.build(pathAsString).get - CwlFile( - location = Option(path.name), - checksum = hashFile(path), - size = sizeFile(path) - ).asJson + if (path.exists()) { + CwlFile( + location = Option(path.name), + checksum = hashFile(path), + size = sizeFile(path) + ).asJson + } else { + Json.Null + } } /** @@ -60,7 +65,6 @@ object OutputManipulator extends Poly1 { } def populateInnerFiles(json: Json, isInsideDirectory: Boolean): Option[Json] = { - import mouse.boolean._ def populateInnerFile(file: Json): Json = { file.asObject @@ -70,10 +74,12 @@ object OutputManipulator extends Poly1 { .getOrElse(file) } - // Assume the json is an array ("secondaryFiles" and "listing" are both arrays) + // Assume the json is an array ("secondaryFiles" and "listing" are both arrays). val innerFiles = json.asArray.get - // the cwl test runner doesn't expect a "secondaryFiles" or "listing" field at all if it's empty - innerFiles.nonEmpty.option(Json.arr(innerFiles.map(populateInnerFile): _*)) + // Remove any files that don't actually exist. + val filteredFiles = innerFiles.map(populateInnerFile).filterNot(_.isNull) + // The cwl test runner doesn't expect a "secondaryFiles" or "listing" field at all if it's empty. + if (filteredFiles.nonEmpty) Option(Json.arr(filteredFiles: _*)) else None } def updateFileOrDirectoryWithNestedFiles(obj: JsonObject, fieldName: String, isInsideDirectory: Boolean) = { @@ -161,44 +167,52 @@ object OutputManipulator extends Poly1 { } else throw new RuntimeException(s"${path.pathAsString} is neither a valid file or a directory") } - private def resolveOutputViaInnerType(mot: MyriadOutputInnerType) + private def resolveOutputViaInnerType(moits: Array[MyriadOutputInnerType]) (jsValue: JsValue, pathBuilder: PathBuilder, schemaOption: Option[Schema]): Json = { - (jsValue, mot) match { + + (jsValue, moits) match { //CWL expects quite a few enhancements to the File structure, hence... - case (JsString(metadata), Inl(CwlType.File)) => stringToFile(metadata, pathBuilder) + case (JsString(metadata), Array(Inl(CwlType.File))) => stringToFile(metadata, pathBuilder) // If it's a JsObject it means it's already in the right format, we just want to fill in some values that might not // have been populated like "checksum" and "size" - case (obj: JsObject, Inl(CwlType.File) | Inl(CwlType.Directory)) => + case (obj: JsObject, a) if a.contains(Inl(CwlType.File)) || a.contains(Inl(CwlType.Directory)) => import io.circe.parser._ - val json = parse(obj.compactPrint).right.getOrElse(throw new Exception("Failed to parse Json output as Json... something is very wrong")) - json.mapObject(populateFileFields(pathBuilder, isInsideDirectory = false, schemaOption)) - case (JsNumber(metadata), Inl(CwlType.Long)) => metadata.longValue.asJson - case (JsNumber(metadata), Inl(CwlType.Float)) => metadata.floatValue.asJson - case (JsNumber(metadata), Inl(CwlType.Double)) => metadata.doubleValue.asJson - case (JsNumber(metadata), Inl(CwlType.Int)) => metadata.intValue.asJson - case (JsString(metadata), Inl(CwlType.String)) => metadata.asJson + val json: Json = parse(obj.compactPrint).right.getOrElse(throw new Exception("Failed to parse Json output as Json... something is very wrong")) + val fileExists = (for { + o <- json.asObject + l <- o.kleisli("location") + s <- l.asString + c = if (a.contains(Inl(CwlType.Directory))) s.ensureSlashed else s + p <- pathBuilder.build(c).toOption + } yield p.exists).getOrElse(false) + if (fileExists) json.mapObject(populateFileFields(pathBuilder, isInsideDirectory = false, schemaOption)) else Json.Null + case (JsNumber(metadata), Array(Inl(CwlType.Long))) => metadata.longValue.asJson + case (JsNumber(metadata), Array(Inl(CwlType.Float))) => metadata.floatValue.asJson + case (JsNumber(metadata), Array(Inl(CwlType.Double))) => metadata.doubleValue.asJson + case (JsNumber(metadata), Array(Inl(CwlType.Int))) => metadata.intValue.asJson + case (JsString(metadata), Array(Inl(CwlType.String))) => metadata.asJson //The Anys. They have to be done for each type so that the asJson can use this type information when going back to Json representation - case (JsString(metadata), Inl(CwlType.Any)) => metadata.asJson - case (JsNumber(metadata), Inl(CwlType.Any)) => metadata.asJson - case (obj: JsObject, Inl(CwlType.Any)) => + case (JsString(metadata), Array(Inl(CwlType.Any))) => metadata.asJson + case (JsNumber(metadata), Array(Inl(CwlType.Any))) => metadata.asJson + case (obj: JsObject, Array(Inl(CwlType.Any))) => import io.circe.parser._ parse(obj.compactPrint).right.getOrElse(throw new Exception("Failed to parse Json output as Json... something is very wrong")) - case (JsBoolean(metadata), Inl(CwlType.Any)) => metadata.asJson - case (JsArray(metadata), Inl(CwlType.Any)) => metadata.asJson - case (JsNull, Inl(CwlType.Any)) => Json.Null + case (JsBoolean(metadata), Array(Inl(CwlType.Any))) => metadata.asJson + case (JsArray(metadata), Array(Inl(CwlType.Any))) => metadata.asJson + case (JsNull, a) if a.contains(Inl(CwlType.Any)) || a.contains(Inl(CwlType.Null)) => Json.Null - case (JsArray(metadata), tpe) if tpe.select[OutputArraySchema].isDefined => + case (JsArray(metadata), Array(tpe)) if tpe.select[OutputArraySchema].isDefined => (for { schema <- tpe.select[OutputArraySchema] items = schema.items innerType <- items.select[MyriadOutputInnerType] outputJson = metadata.map(m => - resolveOutputViaInnerType(innerType)(m, pathBuilder, schemaOption)).asJson + resolveOutputViaInnerType(Array(innerType))(m, pathBuilder, schemaOption)).asJson } yield outputJson).getOrElse(throw new RuntimeException(s"We currently do not support output arrays with ${tpe.select[OutputArraySchema].get.items} inner type")) - case (JsObject(metadata), tpe) if tpe.select[OutputRecordSchema].isDefined => + case (JsObject(metadata), Array(tpe)) if tpe.select[OutputRecordSchema].isDefined => def processField(field: OutputRecordField) = { val parsedName = FullyQualifiedName(field.name)(ParentName.empty).id field.`type`.select[MyriadOutputInnerType] map { parsedName -> _ } @@ -209,21 +223,19 @@ object OutputManipulator extends Poly1 { fields <- schema.fields typeMap = fields.flatMap(processField).toMap outputJson = metadata.map({ - case (k, v) => k -> resolveOutputViaInnerType(typeMap(k))(v, pathBuilder, schemaOption) + case (k, v) => k -> resolveOutputViaInnerType(Array(typeMap(k)))(v, pathBuilder, schemaOption) }).asJson } yield outputJson).getOrElse(throw new RuntimeException(s"We currently do not support output record schemas with ${tpe.select[OutputArraySchema].get.items} inner type")) - case (JsNull, Inl(CwlType.Null)) => Json.Null case (json, tpe) => throw new RuntimeException(s"We currently do not support outputs (${json.getClass.getSimpleName}) of $json and type $tpe") } } - implicit val moit: Case.Aux[MyriadOutputInnerType, (JsValue, PathBuilder, Option[Schema]) => Json] = at { - resolveOutputViaInnerType + implicit val moit: Case.Aux[MyriadOutputInnerType, (JsValue, PathBuilder, Option[Schema]) => Json] = at { t => + resolveOutputViaInnerType(Array(t)) } implicit val amoit: Case.Aux[Array[MyriadOutputInnerType], (JsValue, PathBuilder, Option[Schema]) => Json] = - at { - amoit => - resolveOutputViaInnerType(amoit.head) + at { amoit => + resolveOutputViaInnerType(amoit) } } diff --git a/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/AwsConfiguration.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/AwsConfiguration.scala new file mode 100644 index 00000000000..bcad6e413c6 --- /dev/null +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/AwsConfiguration.scala @@ -0,0 +1,176 @@ +/* + * Copyright 2018 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.cloudsupport.aws + +import cats.data.Validated._ +import cats.instances.list._ +import cats.syntax.traverse._ +import cats.syntax.validated._ +import com.typesafe.config.{Config, ConfigException} +import common.exception.MessageAggregation +import common.validation.ErrorOr._ +import common.validation.Validation._ + +import cromwell.cloudsupport.aws.auth.{AwsAuthMode, CustomKeyMode, DefaultMode, AssumeRoleMode} + +import net.ceedubs.ficus.Ficus._ +import org.slf4j.LoggerFactory + +final case class AwsConfiguration private (applicationName: String, + authsByName: Map[String, AwsAuthMode], + region: String) { + + def auth(name: String): ErrorOr[AwsAuthMode] = { + authsByName.get(name) match { + case None => + val knownAuthNames = authsByName.keys.mkString(", ") + s"`aws` configuration stanza does not contain an auth named '$name'. Known auth names: $knownAuthNames".invalidNel + case Some(a) => a.validNel + } + } +} + +object AwsConfiguration { + import scala.concurrent.duration._ + import scala.language.postfixOps + + lazy val DefaultConnectionTimeout = 3 minutes + lazy val DefaultReadTimeout = 3 minutes + + private val log = LoggerFactory.getLogger("AwsConfiguration") + + final case class AwsConfigurationException(errorMessages: List[String]) extends MessageAggregation { + override val exceptionContext = "AWS configuration" + } + + def apply(config: Config): AwsConfiguration = { + + val awsConfig = config.getConfig("aws") + + val appName = validate { awsConfig.as[String]("application-name") } + + val region = validate { + (awsConfig.getAs[String]("region")) match { + case Some(region) => region + case _ => "us-east-1" + } + } + val regionStr = region.getOrElse("us-east-1") + + def buildAuth(authConfig: Config): ErrorOr[AwsAuthMode] = { + + def customKeyAuth(authConfig: Config, name: String, region: String): ErrorOr[AwsAuthMode] = validate { + (authConfig.getAs[String]("access-key"), authConfig.getAs[String]("secret-key")) match { + case (Some(accessKey), Some(secretKey)) => + CustomKeyMode(name, accessKey, secretKey, region) + case _ => throw new ConfigException.Generic(s"""Access key and/or secret """ + + s"""key missing for service account "$name". See reference.conf under the aws.auth, """ + + s"""custom key section for details of required configuration.""") + } + } + + def defaultAuth(authConfig: Config, name: String, region: String): ErrorOr[AwsAuthMode] = validate { + DefaultMode(name, region) + } + + def assumeRoleAuth(authConfig: Config, name: String, region: String): ErrorOr[AwsAuthMode] = validate { + val externalId = authConfig.hasPath("external-id") match { + case true => authConfig.getString("external-id") + case _ => "" + } + AssumeRoleMode( + name, + // We won't do anything with this now, but it is required for + // assignment later + authConfig.getString("base-auth"), + authConfig.getString("role-arn"), + externalId, + region + ) + } + val name = authConfig.getString("name") + val scheme = authConfig.getString("scheme") + + scheme match { + case "default" => defaultAuth(authConfig, name, regionStr) + case "custom_keys" => customKeyAuth(authConfig, name, regionStr) + case "assume_role" => assumeRoleAuth(authConfig, name, regionStr) + case wut => s"Unsupported authentication scheme: $wut".invalidNel + } + } + + def assignDependency(dependentAuth: AssumeRoleMode, auths: List[ErrorOr[AwsAuthMode]]): Unit = { + // We only care here about valid auth blocks. If something is invalid + // we need to throw at some point anyway. This helps unwrap some of the + // validation type wrappers that are involved at this point in the code + val validAuths = auths.collect { case Valid(v) => v } + + // Look for the base auth from the config. If we find it, we'll assign + // here. Unfortunately, we will rely on a runtime error if the base auth + // does not end up getting assigned to the AssumeRoleMode object + val baseAuth = validAuths.collectFirst { case a if a.name == dependentAuth.baseAuthName => a } + baseAuth foreach dependentAuth.assign + } + + def assignDependencies(auths: List[ErrorOr[AwsAuthMode]]): List[ErrorOr[AwsAuthMode]] = { + // Assume role is somewhat special. We need to process assume role type + // auths after its base auth is created. As such, we'll wire in the + // base auth element after the list is created + auths.collect { case Valid(arm: AssumeRoleMode) => assignDependency(arm, auths) } + auths + } + val listOfErrorOrAuths: List[ErrorOr[AwsAuthMode]] = + assignDependencies(awsConfig.as[List[Config]]("auths").map(buildAuth)) + val errorOrAuthList: ErrorOr[List[AwsAuthMode]] = listOfErrorOrAuths.sequence[ErrorOr, AwsAuthMode] + + def uniqueAuthNames(list: List[AwsAuthMode]): ErrorOr[Unit] = { + val duplicateAuthNames = list.groupBy(_.name) collect { case (n, as) if as.size > 1 => n } + if (duplicateAuthNames.nonEmpty) { + ("Duplicate auth names: " + duplicateAuthNames.mkString(", ")).invalidNel + } else { + ().validNel + } + } + + (appName, errorOrAuthList, region).flatMapN { (name, list, region) => + uniqueAuthNames(list) map { _ => + AwsConfiguration(name, list map { a => a.name -> a } toMap, region) + } + } match { + case Valid(r) => r + case Invalid(f) => + val errorMessages = f.toList.mkString(", ") + log.error(errorMessages) + throw AwsConfigurationException(f.toList) + } + } +} diff --git a/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/auth/AwsAuthMode.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/auth/AwsAuthMode.scala new file mode 100644 index 00000000000..7e8795743da --- /dev/null +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/auth/AwsAuthMode.scala @@ -0,0 +1,194 @@ +/* + * Copyright 2018 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package cromwell.cloudsupport.aws.auth + +import software.amazon.awssdk.core.auth.{AwsCredentials, + AwsSessionCredentials, + AnonymousCredentialsProvider, + DefaultCredentialsProvider, + StaticCredentialsProvider} +import software.amazon.awssdk.core.regions.Region +import software.amazon.awssdk.services.sts.{STSClient} +import software.amazon.awssdk.services.sts.model.{GetCallerIdentityRequest, + AssumeRoleRequest} + +import cromwell.cloudsupport.aws.auth.AwsAuthMode.OptionLookup + +import org.slf4j.LoggerFactory +import com.google.api.client.json.jackson2.JacksonFactory +import scala.util.{Failure, Success, Try} + +object AwsAuthMode { + type OptionLookup = String => String + lazy val jsonFactory = JacksonFactory.getDefaultInstance +} + +sealed trait AwsAuthMode { + protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) + + /** + * Validate the auth mode against provided options + */ + def validate(options: OptionLookup): Unit = { + () + } + + def name: String + + def credential(options: OptionLookup): AwsCredentials + + /** + * Enables swapping out credential validation for various testing purposes ONLY. + * + * All traits in this file are sealed, all classes final, meaning things + * like Mockito or other java/scala overrides cannot work. + */ + private[auth] var credentialValidation: ((AwsCredentials, String) => Unit) = + (credentials: AwsCredentials, region: String) => { + STSClient + .builder + .region(Region.of(region)) + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .build + .getCallerIdentity(GetCallerIdentityRequest.builder.build) + () + } + + protected def validateCredential(credential: AwsCredentials, region: String) = { + Try(credentialValidation(credential, region)) match { + case Failure(ex) => throw new RuntimeException(s"Credentials are invalid: ${ex.getMessage}", ex) + case Success(_) => credential + } + } +} + +case object MockAuthMode extends AwsAuthMode { + override val name = "no_auth" + + lazy val _credential = AnonymousCredentialsProvider.create.getCredentials + + override def credential(options: OptionLookup): AwsCredentials = _credential +} + +object CustomKeyMode + +final case class CustomKeyMode(override val name: String, + accessKey: String, + secretKey: String, + region: String + ) extends AwsAuthMode { + private lazy val _credential: AwsCredentials = { + // Validate credentials synchronously here, without retry. + // It's very unlikely to fail as it should not happen more than a few times + // (one for the engine and for each backend using it) per Cromwell instance. + validateCredential(AwsCredentials.create(accessKey, secretKey), region) + } + + override def credential(options: OptionLookup): AwsCredentials = _credential +} + +final case class DefaultMode(override val name: String, region: String) extends AwsAuthMode { + private lazy val _credential: AwsCredentials = { + // + // The ProfileCredentialsProvider will return your [default] + // credential profile by reading from the credentials file located at + // (~/.aws/credentials). + // + + // Validate credentials synchronously here, without retry. + // It's very unlikely to fail as it should not happen more than a few times + // (one for the engine and for each backend using it) per Cromwell instance. + validateCredential(DefaultCredentialsProvider.create.getCredentials, region) + } + + override def credential(options: OptionLookup): AwsCredentials = _credential +} + + +final case class AssumeRoleMode(override val name: String, + baseAuthName: String, + roleArn: String, + externalId: String, + region: String + ) extends AwsAuthMode { + + private lazy val _credential: AwsCredentials = { + val requestBuilder = AssumeRoleRequest + .builder + .roleSessionName("cromwell") + .roleArn(roleArn) + .durationSeconds(3600) + + // The builder is simply mutating itself (TODO: find good ref, as v2 + // uses generated code) + // So we can get away with a val and discard the return value + if (! externalId.isEmpty) requestBuilder.externalId(externalId) + val request = requestBuilder.build + + val builder = STSClient.builder.region(Region of region) + // See comment above regarding builder + baseAuthObj match{ + case Some(auth) => builder.credentialsProvider(StaticCredentialsProvider.create(auth.credential(_ => ""))) + case _ => throw new RuntimeException(s"Base auth configuration required for assume role") + } + + val stsCredentials = builder.build.assumeRole(request).credentials + + val sessionCredentials = AwsSessionCredentials.create( + stsCredentials.accessKeyId, + stsCredentials.secretAccessKey, + stsCredentials.sessionToken) + + validateCredential(sessionCredentials, region) + } + + override def credential(options: OptionLookup): AwsCredentials = _credential + + private var baseAuthObj : Option[AwsAuthMode] = None + + def assign(baseAuth: AwsAuthMode) : Unit = { + baseAuthObj match { + case None => baseAuthObj = Some(baseAuth) + case _ => throw new RuntimeException(s"Base auth object has already been assigned") + } + } + + // We want to allow our tests access to the value + // of the baseAuthObj + def baseAuthentication() : AwsAuthMode = { + baseAuthObj match { + case Some(o) => o + case _ => throw new RuntimeException(s"Base auth object has not been set") + } + } +} + +class OptionLookupException(val key: String, cause: Throwable) extends RuntimeException(key, cause) diff --git a/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/s3/S3Storage.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/s3/S3Storage.scala new file mode 100644 index 00000000000..905e933ae5f --- /dev/null +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/aws/s3/S3Storage.scala @@ -0,0 +1,73 @@ +/* + * Copyright 2018 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package cromwell.cloudsupport.aws.s3 + +import software.amazon.awssdk.services.s3.S3AdvancedConfiguration +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.core.auth.{AwsCredentials, StaticCredentialsProvider} +import com.typesafe.config.ConfigFactory +import net.ceedubs.ficus.Ficus._ // scalastyle:ignore + +object S3Storage { + val DefaultConfiguration = { + val accelerateModeEnabled = ConfigFactory.load().as[Option[Boolean]]("s3.accelerate-mode").getOrElse(false) + val dualstackEnabled = ConfigFactory.load().as[Option[Boolean]]("s3.dual-stack").getOrElse(false) + val pathStyleAccessEnabled = ConfigFactory.load().as[Option[Boolean]]("s3.path-style-access").getOrElse(false) + + S3AdvancedConfiguration.builder + .accelerateModeEnabled(accelerateModeEnabled) + .dualstackEnabled(dualstackEnabled) + .pathStyleAccessEnabled(pathStyleAccessEnabled) + .build + } + + def s3Client(configuration: S3AdvancedConfiguration, credentials: AwsCredentials): S3Client = { + S3Client.builder + .advancedConfiguration(configuration) + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .build + } + + def s3Client(credentials: AwsCredentials): S3Client = { + s3Client(s3AdvancedConfiguration(), credentials) + } + + def s3AdvancedConfiguration(accelerateModeEnabled: Boolean = false, + dualstackEnabled: Boolean = false, + pathStyleAccessEnabled: Boolean = false): S3AdvancedConfiguration = { + + S3AdvancedConfiguration.builder + .accelerateModeEnabled(accelerateModeEnabled) + .dualstackEnabled(dualstackEnabled) + .pathStyleAccessEnabled(pathStyleAccessEnabled) + .build + } +} diff --git a/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/AwsConfigurationSpec.scala b/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/AwsConfigurationSpec.scala new file mode 100644 index 00000000000..23739870ba6 --- /dev/null +++ b/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/AwsConfigurationSpec.scala @@ -0,0 +1,336 @@ +/* + * Copyright 2018 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.cloudsupport.aws + +import cats.implicits._ +import com.typesafe.config.{ConfigException, ConfigFactory} +import cromwell.cloudsupport.aws.AwsConfiguration.AwsConfigurationException +import cromwell.cloudsupport.aws.auth.{AssumeRoleMode,CustomKeyMode,DefaultMode} +import org.scalatest.{FlatSpec, Matchers} + + +class AwsConfigurationSpec extends FlatSpec with Matchers { + + behavior of "AwsConfiguration" + + it should "parse all manner of well-formed auths" in { + val righteousAwsConfig = + s""" + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "default" + | scheme = "default" + | }, + | { + | name = "custom-keys" + | scheme = "custom_keys" + | access-key = "access_key_id" + | secret-key = "secret_key" + | }, + | { + | name = "assume-role-based-on-another-with-external" + | scheme = "assume_role" + | base-auth = "default" + | role-arn = "my-role-arn" + | external-id = "my-external-id" + | }, + | { + | name = "assume-role-based-on-another" + | scheme = "assume_role" + | base-auth = "default" + | role-arn = "my-role-arn" + | } + | ] + | + | region = "region" + |} + | + """.stripMargin + + val conf = AwsConfiguration(ConfigFactory.parseString(righteousAwsConfig)) + + conf.applicationName shouldBe "cromwell" + conf.region shouldBe "region" + conf.authsByName should have size 4 + + val auths = conf.authsByName.values + + val default = (auths collectFirst { case a: DefaultMode => a }).get + default.name shouldBe "default" + + val customKey = (auths collectFirst { case a: CustomKeyMode => a }).get + customKey.name shouldBe "custom-keys" + customKey.accessKey shouldBe "access_key_id" + customKey.secretKey shouldBe "secret_key" + + val assumeRoleWithId = (auths collectFirst { case a: AssumeRoleMode => a }).get + assumeRoleWithId.name shouldBe "assume-role-based-on-another-with-external" + assumeRoleWithId.baseAuthName shouldBe "default" + assumeRoleWithId.baseAuthentication.name shouldBe "default" + assumeRoleWithId.roleArn shouldBe "my-role-arn" + assumeRoleWithId.externalId shouldBe "my-external-id" + + val assumeRole = (auths.takeRight(1) collectFirst { case a: AssumeRoleMode => a }).get + assumeRole.name shouldBe "assume-role-based-on-another" + assumeRole.baseAuthName shouldBe "default" + assumeRole.baseAuthentication.name shouldBe "default" + assumeRole.roleArn shouldBe "my-role-arn" + assumeRole.externalId shouldBe "" + } + + it should "default region to us-east-1" in { + val config = + """|aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-default" + | scheme = "default" + | } + | ] + |} + |""".stripMargin + + val conf = AwsConfiguration(ConfigFactory.parseString(config)) + conf.region shouldBe "us-east-1" + } + + it should "return a known auth" in { + val config = + """|aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-default" + | scheme = "default" + | } + | ] + |} + |""".stripMargin + + val conf = AwsConfiguration(ConfigFactory.parseString(config)) + conf.auth("name-default").map(_.name) should be("name-default".valid) + } + + it should "not return an unknown auth" in { + val config = + """|aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-default" + | scheme = "default" + | } + | ] + |} + |""".stripMargin + + val conf = AwsConfiguration(ConfigFactory.parseString(config)) + conf.auth("name-botched") should be( + "`aws` configuration stanza does not contain an auth named 'name-botched'. Known auth names: name-default" + .invalidNel) + } + + it should "not parse a configuration stanza without applicationName" in { + val applessAwsConfig = + """ + |aws { + | auths = [ + | { + | name = "name-default" + | scheme = "default" + | } + | ] + |} + """.stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(applessAwsConfig)) + } should have message "AWS configuration:\nNo configuration setting found for key 'application-name'" + } + + it should "not parse a configuration stanza without service account credentials" in { + val noServiceAccountCredentials = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "service-account" + | scheme = "custom_keys" + | } + | ] + |} + """.stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(noServiceAccountCredentials)) + } should have message "AWS configuration:\n" + + "Access key and/or secret key missing for service account \"service-account\". See reference.conf under the " + + "aws.auth, custom key section for details of required configuration." + } + + it should "not parse a configuration stanza with an unsupported authentication scheme" in { + val unsupported = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "unsupported-auth" + | scheme = "not supported" + | } + | ] + |} + """.stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(unsupported)) + } should have message "AWS configuration:\nUnsupported authentication scheme: not supported" + } + + it should "not parse a configuration stanza without a schema" in { + val schemeless = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "scheme-unspecified" + | } + | ] + |} + """.stripMargin + + the[ConfigException.Missing] thrownBy { + AwsConfiguration(ConfigFactory.parseString(schemeless)) + } should have message "No configuration setting found for key 'scheme'" + } + + it should "not parse a configuration stanza without an auth name" in { + val nameless = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | scheme = "default" + | } + | ] + |} + """.stripMargin + + the[ConfigException.Missing] thrownBy { + AwsConfiguration(ConfigFactory.parseString(nameless)) + } should have message "No configuration setting found for key 'name'" + } + + it should "not parse a configuration stanza with a bad access-key in custom keys mode" in { + // The various AwsAuthModes actually don't complain about spurious keys in their + // configurations as long as all the keys they do care about are present. That's not + // necessarily ideal behavior. + val badKeyInRefreshTokenMode = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-refresh" + | scheme = "custom_keys" + | access-key-botched-key = "secret_id" + | secret-key = "secret_secret" + | } + | ] + |} + """.stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(badKeyInRefreshTokenMode)) + } should have message "AWS configuration:\nAccess key and/or secret key missing for service account \"name-refresh\". See reference.conf under the aws.auth, custom key section for details of required configuration." + } + + it should "not parse a configuration stanza without a role-arn in assume-role mode" in { + val badKeyInUserMode = + """ + |aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-user" + | scheme = "assume_role" + | role-arn-botched = "my-role-arn" + | base-auth = "default" + | } + | ] + |} + """.stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(badKeyInUserMode)) + } should have message "AWS configuration:\nNo configuration setting found for key 'role-arn'" + } + + it should "not parse a configuration stanza with a duplicate auth name" in { + val duplicateAuthName = + """|aws { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "name-default" + | scheme = "default" + | } + | { + | name = "name-default" + | scheme = "default" + | } + | ] + |} + |""".stripMargin + + the[AwsConfigurationException] thrownBy { + AwsConfiguration(ConfigFactory.parseString(duplicateAuthName)) + } should have message "AWS configuration:\nDuplicate auth names: name-default" + } +} diff --git a/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/s3/S3StorageSpec.scala b/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/s3/S3StorageSpec.scala new file mode 100644 index 00000000000..9a1a3c27642 --- /dev/null +++ b/cloudSupport/src/test/scala/cromwell/cloudsupport/aws/s3/S3StorageSpec.scala @@ -0,0 +1,61 @@ +/* + * Copyright 2018 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package cromwell.cloudsupport.aws.s3 +import software.amazon.awssdk.core.auth.AnonymousCredentialsProvider +import org.scalatest.{FlatSpec, Matchers, Tag} + +class S3StorageSpec extends FlatSpec with Matchers { + + behavior of "S3Storage" + + it should "build the default cloud storage configuration" taggedAs S3StorageSpecUtils.AwsTest in { + val configuration = S3Storage.DefaultConfiguration + configuration.accelerateModeEnabled should be(false) + configuration.dualstackEnabled should be(false) + configuration.pathStyleAccessEnabled should be(false) + } + + it should "build s3 storage" taggedAs S3StorageSpecUtils.AwsTest in { + val configuration = S3Storage.s3AdvancedConfiguration(false, true) + configuration.accelerateModeEnabled should be(false) + configuration.dualstackEnabled should be(true) + configuration.pathStyleAccessEnabled should be(false) + } + + it should "build s3 client with credentials" taggedAs S3StorageSpecUtils.AwsTest in { + S3Storage.s3Client(AnonymousCredentialsProvider.create.getCredentials) + } + +} + +object S3StorageSpecUtils { + val AwsTest = Tag("AwsTest") +} diff --git a/common/src/main/scala/common/util/IntUtil.scala b/common/src/main/scala/common/util/IntUtil.scala new file mode 100644 index 00000000000..07a2357842d --- /dev/null +++ b/common/src/main/scala/common/util/IntUtil.scala @@ -0,0 +1,8 @@ +package common.util + +object IntUtil { + implicit class EnhancedInt(val int: Int) extends AnyVal { + def isEven: Boolean = int % 2 == 0 + def isOdd: Boolean = !isEven + } +} diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index fcaadef8348..2f44ae74b66 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -243,6 +243,9 @@ filesystems { oss { class = "cromwell.filesystems.oss.OssPathBuilderFactory" } + s3 { + class = "cromwell.filesystems.s3.S3PathBuilderFactory" + } } docker { diff --git a/core/src/main/scala/cromwell/core/WorkflowState.scala b/core/src/main/scala/cromwell/core/WorkflowState.scala index f58a48642fa..db4bddfedc5 100644 --- a/core/src/main/scala/cromwell/core/WorkflowState.scala +++ b/core/src/main/scala/cromwell/core/WorkflowState.scala @@ -12,7 +12,7 @@ sealed trait WorkflowState { object WorkflowState { lazy val WorkflowStateValues = Seq(WorkflowOnHold, WorkflowSubmitted, WorkflowRunning, WorkflowFailed, WorkflowSucceeded, WorkflowAborting, WorkflowAborted) - def withName(str: String): WorkflowState = WorkflowStateValues.find(_.toString == str).getOrElse( + def withName(str: String): WorkflowState = WorkflowStateValues.find(_.toString.equalsIgnoreCase(str)).getOrElse( throw new NoSuchElementException(s"No such WorkflowState: $str")) implicit val WorkflowStateSemigroup = new Semigroup[WorkflowState] { diff --git a/core/src/main/scala/cromwell/core/io/AsyncIo.scala b/core/src/main/scala/cromwell/core/io/AsyncIo.scala index 37196cc940d..130525dd92b 100644 --- a/core/src/main/scala/cromwell/core/io/AsyncIo.scala +++ b/core/src/main/scala/cromwell/core/io/AsyncIo.scala @@ -57,6 +57,10 @@ class AsyncIo(ioEndpoint: ActorRef, ioCommandBuilder: IoCommandBuilder) { def readLinesAsync(path: Path): Future[Traversable[String]] = { asyncCommand(ioCommandBuilder.readLines(path)) } + + def isDirectory(path: Path): Future[Boolean] = { + asyncCommand(ioCommandBuilder.isDirectoryCommand(path)) + } def copyAsync(src: Path, dest: Path, overwrite: Boolean = true): Future[Unit] = { // Allow for a much larger timeout for copies, as large files can take a while (even on gcs, if they are in different locations...) diff --git a/core/src/main/scala/cromwell/core/io/DefaultIoCommand.scala b/core/src/main/scala/cromwell/core/io/DefaultIoCommand.scala index eda1798f93b..e282b7e855f 100644 --- a/core/src/main/scala/cromwell/core/io/DefaultIoCommand.scala +++ b/core/src/main/scala/cromwell/core/io/DefaultIoCommand.scala @@ -25,4 +25,5 @@ object DefaultIoCommand { case class DefaultIoTouchCommand(override val file: Path) extends IoTouchCommand(file) case class DefaultIoExistsCommand(override val file: Path) extends IoExistsCommand(file) case class DefaultIoReadLinesCommand(override val file: Path) extends IoReadLinesCommand(file) + case class DefaultIoIsDirectoryCommand(override val file: Path) extends IoIsDirectoryCommand(file) } diff --git a/core/src/main/scala/cromwell/core/io/IoCommand.scala b/core/src/main/scala/cromwell/core/io/IoCommand.scala index 9723a31cdd8..8e1aa21d91e 100644 --- a/core/src/main/scala/cromwell/core/io/IoCommand.scala +++ b/core/src/main/scala/cromwell/core/io/IoCommand.scala @@ -128,3 +128,11 @@ abstract class IoReadLinesCommand(val file: Path) extends SingleFileIoCommand[Tr override def toString = s"read lines of ${file.pathAsString}" override lazy val name = "read lines" } + +/** + * Check whether a path represents a directory + */ +abstract class IoIsDirectoryCommand(val file: Path) extends SingleFileIoCommand[Boolean] { + override def toString = s"check whether ${file.pathAsString} is a directory" + override lazy val name = "is directory" +} diff --git a/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala b/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala index 236143bede5..d35e5d85482 100644 --- a/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala +++ b/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala @@ -17,6 +17,7 @@ abstract class PartialIoCommandBuilder { def hashCommand: PartialFunction[Path, IoHashCommand] = PartialFunction.empty def touchCommand: PartialFunction[Path, IoTouchCommand] = PartialFunction.empty def existsCommand: PartialFunction[Path, IoExistsCommand] = PartialFunction.empty + def isDirectoryCommand: PartialFunction[Path, IoIsDirectoryCommand] = PartialFunction.empty def readLinesCommand: PartialFunction[Path, IoReadLinesCommand] = PartialFunction.empty } @@ -82,6 +83,10 @@ class IoCommandBuilder(partialBuilders: List[PartialIoCommandBuilder] = List.emp buildOrDefault(_.existsCommand, file, DefaultIoExistsCommand(file)) } + def isDirectoryCommand(file: Path): IoIsDirectoryCommand = { + buildOrDefault(_.isDirectoryCommand, file, DefaultIoIsDirectoryCommand(file)) + } + def readLines(file: Path): IoReadLinesCommand = { buildOrDefault(_.readLinesCommand, file, DefaultIoReadLinesCommand(file)) } diff --git a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala index a27e2b4a1c6..0f5d03262e9 100644 --- a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala +++ b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala @@ -2,6 +2,7 @@ package cromwell.core.logging import akka.actor.{Actor, ActorLogging} import akka.event.LoggingAdapter +import ch.qos.logback.classic import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.classic.{Level, LoggerContext} @@ -108,7 +109,15 @@ class WorkflowLogger(loggerName: String, override def getName = loggerName - def deleteLogFile() = Try { workflowLogPath foreach { _.delete() } } + /** + * Stop all log appenders to release file handles, delete the file if requested. + */ + def close(andDelete: Boolean = false) = Try { + workflowLogPath foreach { path => + if (andDelete) path.delete() + if (fileLogger != NOPLogger.NOP_LOGGER) fileLogger.asInstanceOf[classic.Logger].detachAndStopAllAppenders() + } + } import WorkflowLogger._ diff --git a/core/src/test/scala/cromwell/core/Tags.scala b/core/src/test/scala/cromwell/core/Tags.scala index 2cee16a04fb..39c3cbf9750 100644 --- a/core/src/test/scala/cromwell/core/Tags.scala +++ b/core/src/test/scala/cromwell/core/Tags.scala @@ -7,4 +7,5 @@ object Tags { object IntegrationTest extends Tag("CromwellIntegrationTest") object DbmsTest extends Tag("DbmsTest") object PostWomTest extends Tag("PostWomTest") + object AwsTest extends Tag("AwsTest") } diff --git a/core/src/test/scala/cromwell/core/path/PathBuilderSpecUtils.scala b/core/src/test/scala/cromwell/core/path/PathBuilderSpecUtils.scala index 70e76e6465a..d16ca4fa53d 100644 --- a/core/src/test/scala/cromwell/core/path/PathBuilderSpecUtils.scala +++ b/core/src/test/scala/cromwell/core/path/PathBuilderSpecUtils.scala @@ -1,9 +1,9 @@ package cromwell.core.path -import org.scalatest.FlatSpecLike import org.scalatest.Matchers._ import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop._ +import org.scalatest.{FlatSpecLike, Tag} case class GoodPath(description: String, path: String, @@ -27,10 +27,12 @@ case class BadPath(description: String, path: String, exceptionMessage: String) trait PathBuilderSpecUtils { this: FlatSpecLike => - def truncateCommonRoots(builder: => PathBuilder, pathsToTruncate: TableFor3[String, String, String]) = { + def truncateCommonRoots(builder: => PathBuilder, + pathsToTruncate: TableFor3[String, String, String], + tag: Tag = PathBuilderSpecUtils.PathTest): Unit = { behavior of s"PathCopier" - it should "truncate common roots" in { + it should "truncate common roots" taggedAs tag in { forAll(pathsToTruncate) { (context, file, relative) => val contextPath = builder.build(context).get val filePath = builder.build(file).get @@ -40,7 +42,7 @@ trait PathBuilderSpecUtils { } } - def buildGoodPath(builder: => PathBuilder, goodPath: GoodPath) = { + def buildGoodPath(builder: => PathBuilder, goodPath: GoodPath, tag: Tag = PathBuilderSpecUtils.PathTest): Unit = { behavior of s"Building ${goodPath.description}" lazy val path = { @@ -48,61 +50,61 @@ trait PathBuilderSpecUtils { if (goodPath.normalize) path.normalize() else path } - it should "match expected pathAsString" in + it should "match expected pathAsString" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.pathAsString should be(goodPath.pathAsString) } - it should "match expected pathWithoutScheme" in + it should "match expected pathWithoutScheme" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.pathWithoutScheme should be(goodPath.pathWithoutScheme) } - it should "match expected parent" in + it should "match expected parent" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { Option(path.parent).map(_.pathAsString).orNull should be(goodPath.parent) } - it should "match expected getParent" in + it should "match expected getParent" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { Option(path.getParent).map(_.pathAsString).orNull should be(goodPath.getParent) } - it should "match expected root" in + it should "match expected root" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { Option(path.root).map(_.pathAsString).orNull should be(goodPath.root) } - it should "match expected name" in + it should "match expected name" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.name should be(goodPath.name) } - it should "match expected getFileName" in + it should "match expected getFileName" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { Option(path.getFileName).map(_.pathAsString).orNull should be(goodPath.getFileName) } - it should "match expected getNameCount" in + it should "match expected getNameCount" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.getNameCount should be(goodPath.getNameCount) } - it should "match expected isAbsolute" in + it should "match expected isAbsolute" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.isAbsolute should be(goodPath.isAbsolute) } - it should "match expected isDirectory" in + it should "match expected isDirectory" taggedAs tag in withClue(s"for path ${goodPath.path}${if (goodPath.normalize) " (normalized)" else ""}:") { path.isDirectory should be(goodPath.isDirectory) } } - def buildBadPath(builder: => PathBuilder, badPath: BadPath) = { + def buildBadPath(builder: => PathBuilder, badPath: BadPath, tag: Tag = PathBuilderSpecUtils.PathTest): Unit = { behavior of s"Building ${badPath.description}" - it should "fail to build a path" in + it should "fail to build a path" taggedAs tag in withClue(s"for path ${badPath.path}:") { val exception = intercept[Exception](builder.build(badPath.path).get) exception.getMessage should be(badPath.exceptionMessage) @@ -110,3 +112,8 @@ trait PathBuilderSpecUtils { } } + +object PathBuilderSpecUtils { + val PathTest = Tag("PathTest") + val AwsTest = Tag("AwsTest") +} diff --git a/cromwell.examples.conf b/cromwell.examples.conf index 777a1bdbcea..c94bf004a0b 100644 --- a/cromwell.examples.conf +++ b/cromwell.examples.conf @@ -277,13 +277,13 @@ backend { # `temporary-directory` creates the temporary directory for commands. # # If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to: - # temporary-directory = "mktemp -d \"$PWD\"/tmp.XXXXXX" + # temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)" # # The expression is run from the execution directory for the script. The expression must create the directory # if it does not exist, and then return the full path to the directory. # # To create and return a non-random temporary directory, use something like: - # temporary-directory = "mkdir -p /tmp/mydir && echo /tmp/mydir" + # temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)" # `script-epilogue` configures a shell command to run after the execution of every command block. # @@ -437,8 +437,8 @@ backend { # -b y \ # -N ${job_name} \ # -wd ${cwd} \ - # -o ${out} \ - # -e ${err} \ + # -o ${out}.qsub \ + # -e ${err}.qsub \ # -pe smp ${cpu} \ # ${"-l mem_free=" + memory_gb + "g"} \ # ${"-q " + sge_queue} \ @@ -649,7 +649,7 @@ backend { # cpu: 1 # failOnStderr: false # continueOnReturnCode: 0 - # memory: "2 GB" + # memory: "2048 MB" # bootDiskSizeGb: 10 # # Allowed to be a String, or a list of Strings # disks: "local-disk 10 SSD" diff --git a/cwl/src/main/scala/cwl/CommandOutputBinding.scala b/cwl/src/main/scala/cwl/CommandOutputBinding.scala index 1e60b13ea2e..14af88bc6cf 100644 --- a/cwl/src/main/scala/cwl/CommandOutputBinding.scala +++ b/cwl/src/main/scala/cwl/CommandOutputBinding.scala @@ -6,7 +6,7 @@ import cats.syntax.traverse._ import cats.syntax.validated._ import common.validation.ErrorOr._ import common.validation.Validation._ -import wom.expression.IoFunctionSet +import wom.expression.{FileEvaluation, IoFunctionSet} import wom.expression.IoFunctionSet.{IoDirectory, IoFile} import wom.types._ import wom.values._ @@ -46,7 +46,7 @@ object CommandOutputBinding { commandOutputBinding: CommandOutputBinding, secondaryFilesOption: Option[SecondaryFiles], ioFunctionSet: IoFunctionSet, - expressionLib: ExpressionLib): ErrorOr[Set[WomFile]] = { + expressionLib: ExpressionLib): ErrorOr[Set[FileEvaluation]] = { val parameterContext = ParameterContext(ioFunctionSet, expressionLib, inputs = inputValues) /* @@ -89,8 +89,12 @@ object CommandOutputBinding { for { primaryPaths <- GlobEvaluator.globs(commandOutputBinding.glob, parameterContext, expressionLib) primaryWomFiles <- primaryPathsToWomFiles(primaryPaths) + // This sets optional = false arbitrarily for now as this code doesn't have the context to make that determination, + // the caller can change this if necessary. + primaryEvaluations = primaryWomFiles map { FileEvaluation(_, optional = false, secondary = false) } secondaryWomFiles <- secondaryFilesToWomFiles(primaryWomFiles, ioFunctionSet) - } yield (primaryWomFiles ++ secondaryWomFiles).toSet + secondaryEvaluations = secondaryWomFiles map { FileEvaluation(_, optional = false, secondary = true) } + } yield (primaryEvaluations ++ secondaryEvaluations).toSet } /** diff --git a/cwl/src/main/scala/cwl/CwlJsonToDelayedCoercionFunction.scala b/cwl/src/main/scala/cwl/CwlJsonToDelayedCoercionFunction.scala index 0574b559ca7..25e669d75b2 100644 --- a/cwl/src/main/scala/cwl/CwlJsonToDelayedCoercionFunction.scala +++ b/cwl/src/main/scala/cwl/CwlJsonToDelayedCoercionFunction.scala @@ -62,25 +62,6 @@ private [cwl] object CwlJsonToDelayedCoercionFunction extends Json.Folder[Delaye case WomSingleFileType | WomMaybePopulatedFileType if value.toMap.get("class").flatMap(_.asString).contains("File") => Json.fromJsonObject(value).as[File] match { case Left(errors) => errors.message.invalidNel - - /* - From the CWL spec: - If no location or path is specified, a file object must specify contents with the UTF-8 text content of the file. - This is a "file literal". File literals do not correspond to external resources, but are created on disk with - contents with when needed for a executing a tool. Where appropriate, expressions can return file literals to - define new files on a runtime. The maximum size of contents is 64 kilobytes. - - - NOTE WELL: - This implementation is incompatible with cloud backends, as the file is local and thus inaccessible from - those environments. Please see this issue: - https://github.com/broadinstitute/cromwell/issues/3581 - */ - case Right(file@File(_, None, None, _, _,_,_,_,Some(contents))) => { - val tempDir = better.files.File.newTemporaryDirectory() - val cwlFile: better.files.File = tempDir./(s"${contents.hashCode}").write(contents) - file.asWomValue.map(_.copy(valueOption = Option(cwlFile.path.toString))) - } case Right(file) => file.asWomValue } case WomMaybeListedDirectoryType | WomUnlistedDirectoryType if value.toMap.get("class").flatMap(_.asString).contains("Directory") => diff --git a/cwl/src/main/scala/cwl/CwlType.scala b/cwl/src/main/scala/cwl/CwlType.scala index 18404961e87..4535e500852 100644 --- a/cwl/src/main/scala/cwl/CwlType.scala +++ b/cwl/src/main/scala/cwl/CwlType.scala @@ -56,10 +56,19 @@ case class File private lazy val asWomValue: ErrorOr[WomMaybePopulatedFile] = { errorOrSecondaryFiles flatMap { secondaryFiles => - val valueOption = path.orElse(location).orElse(basename) + val valueOption = location.orElse(path) (valueOption, contents) match { case (None, None) => - "Cannot convert CWL File to WomValue without either a location, a path, a basename, or contents".invalidNel + "Cannot convert CWL File to WomValue without either a location, a path, or contents".invalidNel + case (None, Some(content)) => + new WomMaybePopulatedFile(None, checksum, size, format, contents) with LazyWomFile { + override def initialize(ioFunctionSet: IoFunctionSet) = { + val name = basename.getOrElse(content.hashCode.toString) + sync(ioFunctionSet.writeFile(name, content)).toErrorOr map { writtenFile => + this.copy(valueOption = Option(writtenFile.value)) + } + } + }.valid case (_, _) => WomMaybePopulatedFile(valueOption, checksum, size, format, contents, secondaryFiles).valid } @@ -76,7 +85,7 @@ object File { size: Option[Long] = None, secondaryFiles: Option[Array[FileOrDirectory]] = None, format: Option[String] = None, - contents: Option[String] = None): File = + contents: Option[String] = None): File = { new cwl.File( "File".narrow, location, @@ -88,6 +97,7 @@ object File { format, contents ) + } def dirname(value: String): String = { val index = value.lastIndexOf('/') @@ -111,7 +121,7 @@ object File { "" } } - + def recursivelyBuildDirectory(directory: String, ioFunctions: IoFunctionSet)(visited: Vector[String] = Vector.empty): ErrorOr[WomMaybeListedDirectory] = { for { listing <- sync(ioFunctions.listDirectory(directory)(visited)).toErrorOr @@ -121,7 +131,7 @@ object File { } } yield WomMaybeListedDirectory(Option(directory), Option(fileListing)) } - + private def asAbsoluteSiblingOfPrimary(primary: WomFile, pathFunctions: PathFunctionSet)(path: String) = { pathFunctions.absoluteSibling(primary.value, path) } @@ -149,7 +159,7 @@ object File { parameterContext: ParameterContext, expressionLib: ExpressionLib, ioFunctions: IoFunctionSet): ErrorOr[List[WomFile]] = { - + /* If the value is an expression, the value of self in the expression must be the primary input or output File object to which this binding applies. @@ -163,11 +173,11 @@ object File { */ def parseResult(nestedLevel: Int)(womValue: WomValue): ErrorOr[List[WomFile]] = { womValue match { - case womString: WomString => + case womString: WomString => List(WomFile(stringWomFileType, womString.value |> asAbsoluteSiblingOfPrimary(primaryWomFile, ioFunctions.pathFunctions))).valid - case womMaybeListedDirectory: WomMaybeListedDirectory => + case womMaybeListedDirectory: WomMaybeListedDirectory => List(womMaybeListedDirectory.mapFile(asAbsoluteSiblingOfPrimary(primaryWomFile, ioFunctions.pathFunctions))).valid - case womMaybePopulatedFile: WomMaybePopulatedFile => + case womMaybePopulatedFile: WomMaybePopulatedFile => List(womMaybePopulatedFile.mapFile(asAbsoluteSiblingOfPrimary(primaryWomFile, ioFunctions.pathFunctions))).valid case womArray: WomArray if nestedLevel == 0 => womArray.value.toList flatTraverse parseResult(nestedLevel + 1) @@ -223,7 +233,6 @@ case class Directory private basename: Option[String], listing: Option[Array[FileOrDirectory]] ) { - lazy val errorOrListingOption: ErrorOr[Option[List[WomFile]]] = { val maybeErrorOrList: Option[ErrorOr[List[WomFile]]] = listing map { @@ -235,17 +244,18 @@ case class Directory private } lazy val asWomValue: ErrorOr[WomFile] = { - // Callers expect the directory's last component to match the basename if this Directory has a basename. - def tempDirectory: String = { - val dir = better.files.File.newTemporaryDirectory() - basename match { - case None => dir.pathAsString - case Some(b) => dir.createChild(b, asDirectory = true)().pathAsString - } - } errorOrListingOption flatMap { listingOption => - val valueOption = path.orElse(location).orElse(Option(tempDirectory)) - WomMaybeListedDirectory(valueOption, listingOption, basename).valid + path.orElse(location) map { value => + WomMaybeListedDirectory(Option(value), listingOption, basename).valid + } getOrElse { + new WomMaybeListedDirectory(None, listingOption, basename) with LazyWomFile { + override def initialize(ioFunctionSet: IoFunctionSet) = { + sync(ioFunctionSet.createTemporaryDirectory(basename)).toErrorOr map { tempDir => + this.copy(valueOption = Option(tempDir)) + } + } + }.valid + } } } } @@ -258,7 +268,10 @@ object Directory { ): Directory = new cwl.Directory("Directory".narrow, location, path, basename, listing) - def basename(value: String): String = value.stripSuffix("/").substring(value.lastIndexOf('/') + 1) + def basename(value: String): String = { + val stripped = value.stripSuffix("/") + stripped.substring(stripped.lastIndexOf('/') + 1) + } } private[cwl] object CwlDirectoryOrFileAsWomSingleDirectoryOrFile extends Poly1 { diff --git a/cwl/src/main/scala/cwl/CwlWomExpression.scala b/cwl/src/main/scala/cwl/CwlWomExpression.scala index 965d4c35a10..9064d83af68 100644 --- a/cwl/src/main/scala/cwl/CwlWomExpression.scala +++ b/cwl/src/main/scala/cwl/CwlWomExpression.scala @@ -1,7 +1,5 @@ package cwl -import java.nio.file.Paths - import cats.data.Validated.{Invalid, Valid} import cats.syntax.validated._ import common.validation.ErrorOr.{ErrorOr, ShortCircuitingFlatMap} @@ -17,8 +15,8 @@ import wom.expression.{FileEvaluation, IoFunctionSet, WomExpression} import wom.types._ import wom.values._ +import scala.concurrent.Await import scala.concurrent.duration.Duration -import scala.concurrent.{Await, Future} trait CwlWomExpression extends WomExpression { @@ -54,7 +52,7 @@ case class ECMAScriptWomExpression(expression: Expression, final case class InitialWorkDirFileGeneratorExpression(entry: IwdrListingArrayEntry, expressionLib: ExpressionLib) extends ContainerizedInputExpression { - def evaluate(inputValues: Map[String, WomValue], mappedInputValues: Map[String, WomValue], ioFunctionSet: IoFunctionSet): ErrorOr[AdHocValue] = { + def evaluate(inputValues: Map[String, WomValue], mappedInputValues: Map[String, WomValue], ioFunctionSet: IoFunctionSet): ErrorOr[List[AdHocValue]] = { def recursivelyBuildDirectory(directory: String): ErrorOr[WomMaybeListedDirectory] = { import cats.instances.list._ import cats.syntax.traverse._ @@ -68,13 +66,12 @@ final case class InitialWorkDirFileGeneratorExpression(entry: IwdrListingArrayEn } val updatedValues = inputValues map { case (k, v: WomMaybeListedDirectory) => k -> { - val absolutePathString = Paths.get(v.value).toAbsolutePath.toString + val absolutePathString = ioFunctionSet.pathFunctions.relativeToHostCallRoot(v.value) recursivelyBuildDirectory(absolutePathString) match { case Valid(d) => d case Invalid(es) => throw new RuntimeException(es.toList.mkString("Error building directory: ", ", ", "")) } } - case (k, v: WomMaybePopulatedFile) => k -> WomSingleFile(v.value) case kv => kv } val unmappedParameterContext = ParameterContext(ioFunctionSet, expressionLib, updatedValues) @@ -83,7 +80,7 @@ final case class InitialWorkDirFileGeneratorExpression(entry: IwdrListingArrayEn } object InitialWorkDirFileGeneratorExpression { - type InitialWorkDirFileEvaluator = (ParameterContext, Map[String, WomValue]) => ErrorOr[AdHocValue] + type InitialWorkDirFileEvaluator = (ParameterContext, Map[String, WomValue]) => ErrorOr[List[AdHocValue]] /** * Converts an InitialWorkDir. @@ -116,16 +113,14 @@ object InitialWorkDirFileGeneratorExpression { case _ => ExpressionEvaluator.eval(expressionDirent.entry, unmappedParameterContext) } - val womValueErrorOr: ErrorOr[WomSingleFile] = entryEvaluation flatMap { + val womValueErrorOr: ErrorOr[AdHocValue] = entryEvaluation flatMap { case womFile: WomFile => - val errorOrEntryName: ErrorOr[String] = expressionDirent.entryname match { - case Some(actualEntryName) => actualEntryName.fold(EntryNamePoly).apply(unmappedParameterContext) - case None => unmappedParameterContext.ioFunctionSet.pathFunctions.name(womFile.value).valid + val errorOrEntryName: ErrorOr[Option[String]] = expressionDirent.entryname match { + case Some(actualEntryName) => actualEntryName.fold(EntryNamePoly).apply(unmappedParameterContext).map(Option.apply) + case None => None.valid } - errorOrEntryName flatMap { entryName => - validate { - Await.result(unmappedParameterContext.ioFunctionSet.copyFile(womFile.value, entryName), Duration.Inf) - } + errorOrEntryName map { entryName => + AdHocValue(womFile, entryName, inputName = mutableInputOption) } case other => for { coerced <- WomStringType.coerceRawValue(other).toErrorOr @@ -136,10 +131,10 @@ object InitialWorkDirFileGeneratorExpression { entryName <- entryNameStringOrExpression.fold(EntryNamePoly).apply(unmappedParameterContext) writeFile = unmappedParameterContext.ioFunctionSet.writeFile(entryName, contentString) writtenFile <- validate(Await.result(writeFile, Duration.Inf)) - } yield writtenFile + } yield AdHocValue(writtenFile, alternativeName = None, inputName = mutableInputOption) } - womValueErrorOr.map(AdHocValue(_, mutableInputOption)) + womValueErrorOr.map(List(_)) } } } @@ -155,7 +150,7 @@ object InitialWorkDirFileGeneratorExpression { writtenFile <- validate(Await.result(writeFile, Duration.Inf)) } yield writtenFile - womValueErrorOr.map(AdHocValue(_, mutableInputOption = None)) + womValueErrorOr.map(AdHocValue(_, alternativeName = None, inputName = None)).map(List(_)) } } } @@ -166,33 +161,16 @@ object InitialWorkDirFileGeneratorExpression { // A single expression which must evaluate to an array of Files val expressionEvaluation = ExpressionEvaluator.eval(expression, unmappedParameterContext) - def stageFile(file: WomFile): Future[WomSingleFile] = { - // TODO WomFile could be a WomMaybePopulatedFile with secondary files but this code only stages in - // the primary file. - // The file should be staged to the initial work dir using the base filename. - val baseFileName = unmappedParameterContext.ioFunctionSet.pathFunctions.name(file.value) - unmappedParameterContext.ioFunctionSet.copyFile(file.value, baseFileName) - } - - def stageFiles(womArray: WomArray): ErrorOr[WomValue] = { - implicit val ec = unmappedParameterContext.ioFunctionSet.ec - - val unstagedFiles = womArray.value.map(_.asInstanceOf[WomFile]) - val stagedFiles = Await.result(Future.sequence(unstagedFiles map stageFile), Duration.Inf) - womArray.womType.coerceRawValue(stagedFiles).toErrorOr - } - - val womValueErrorOr = expressionEvaluation flatMap { - case array: WomArray if array.value.forall(_.isInstanceOf[WomFile]) => stageFiles(array) + expressionEvaluation flatMap { + case array: WomArray if array.value.forall(_.isInstanceOf[WomFile]) => + array.value.toList.map(_.asInstanceOf[WomFile]).map(AdHocValue(_, alternativeName = None, inputName = None)).validNel case file: WomFile => - validate(Await.result(stageFile(file), Duration.Inf)) + List(AdHocValue(file, alternativeName = None, inputName = None)).validNel case other => val error = "InitialWorkDirRequirement listing expression must be File or Array[File] but got %s: %s" .format(other, other.womType.toDisplayString) error.invalidNel } - - womValueErrorOr.map(AdHocValue(_, mutableInputOption = None)) } } } @@ -200,7 +178,7 @@ object InitialWorkDirFileGeneratorExpression { implicit val caseString: Case.Aux[String, InitialWorkDirFileEvaluator] = { at { string => (_, _) => { - AdHocValue(WomSingleFile(string), mutableInputOption = None).valid + List(AdHocValue(WomSingleFile(string), alternativeName = None, inputName = None)).valid } } } @@ -214,7 +192,7 @@ object InitialWorkDirFileGeneratorExpression { implicit val caseFile: Case.Aux[File, InitialWorkDirFileEvaluator] = { at { file => (_, _) => { - file.asWomValue.map(AdHocValue(_, mutableInputOption = None)) + file.asWomValue.map(AdHocValue(_, alternativeName = None, inputName = None)).map(List(_)) } } } @@ -222,7 +200,7 @@ object InitialWorkDirFileGeneratorExpression { implicit val caseDirectory: Case.Aux[Directory, InitialWorkDirFileEvaluator] = { at { directory => (_, _) => { - directory.asWomValue.map(AdHocValue(_, mutableInputOption = None)) + directory.asWomValue.map(AdHocValue(_, alternativeName = None, inputName = None)).map(List(_)) } } } diff --git a/cwl/src/main/scala/cwl/ExpressionEvaluator.scala b/cwl/src/main/scala/cwl/ExpressionEvaluator.scala index af55f59958c..4dd198f2ae4 100644 --- a/cwl/src/main/scala/cwl/ExpressionEvaluator.scala +++ b/cwl/src/main/scala/cwl/ExpressionEvaluator.scala @@ -92,7 +92,7 @@ object ExpressionEvaluator { Map( "outdir" -> WomString(runtime.outputPath), "tmpdir" -> WomString(runtime.tempPath), - "cores" -> WomInteger(runtime.cores), + "cores" -> WomInteger(runtime.cores.value), "ram" -> WomFloat(runtime.ram), "outdirSize" -> WomFloat(runtime.outputPathSize.toDouble), "tmpdirSize" -> WomFloat(runtime.tempPathSize.toDouble) diff --git a/cwl/src/main/scala/cwl/FileParameter.scala b/cwl/src/main/scala/cwl/FileParameter.scala index 51fba0b2cdd..40fc4838203 100644 --- a/cwl/src/main/scala/cwl/FileParameter.scala +++ b/cwl/src/main/scala/cwl/FileParameter.scala @@ -8,7 +8,7 @@ import common.validation.Validation.validate import cwl.ontology.Schema import shapeless.Poly1 import wom.expression.IoFunctionSet -import wom.types.{WomFileType, WomSingleFileType} +import wom.types.{WomFileType, WomMaybePopulatedFileType} import wom.values.{WomArray, WomFile, WomMaybePopulatedFile, WomValue} import scala.concurrent.duration._ @@ -33,7 +33,7 @@ object FileParameter { case womMaybePopulatedFile: WomMaybePopulatedFile => val secondaryFilesErrorOr = FileParameter.secondaryFiles( womMaybePopulatedFile, - WomSingleFileType, + WomMaybePopulatedFileType, secondaryFilesCoproduct, parameterContext, expressionLib, diff --git a/cwl/src/main/scala/cwl/MyriadOutputTypeToWomFiles.scala b/cwl/src/main/scala/cwl/MyriadOutputTypeToWomFiles.scala index f063f4b40c1..3aa73a9bca8 100644 --- a/cwl/src/main/scala/cwl/MyriadOutputTypeToWomFiles.scala +++ b/cwl/src/main/scala/cwl/MyriadOutputTypeToWomFiles.scala @@ -8,19 +8,19 @@ import cwl.CwlType.CwlType import cwl.MyriadOutputTypeToWomFiles.EvaluationFunction import mouse.all._ import shapeless.Poly1 -import wom.values.WomFile +import wom.expression.FileEvaluation object MyriadOutputTypeToWomFiles extends Poly1 { - type EvaluationFunction = CommandOutputBinding => ErrorOr[Set[WomFile]] + type EvaluationFunction = CommandOutputBinding => ErrorOr[Set[FileEvaluation]] import Case._ - implicit def cwlType: Aux[MyriadOutputInnerType, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[MyriadOutputInnerType]{ + implicit def cwlType: Aux[MyriadOutputInnerType, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[MyriadOutputInnerType]{ _.fold(MyriadOutputInnerTypeToWomFiles) } - implicit def acwl: Aux[Array[MyriadOutputInnerType], EvaluationFunction => ErrorOr[Set[WomFile]]] = at[Array[MyriadOutputInnerType]] { types => + implicit def acwl: Aux[Array[MyriadOutputInnerType], EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[Array[MyriadOutputInnerType]] { types => evalFunction => types.toList.traverse(_.fold(MyriadOutputInnerTypeToWomFiles).apply(evalFunction)).map(_.toSet.flatten) } @@ -32,11 +32,11 @@ object MyriadOutputInnerTypeToWomFiles extends Poly1 { def ex(component: String) = throw new RuntimeException(s"output type $component cannot yield wom files") - implicit def cwlType: Aux[CwlType, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[CwlType] { _ => _ => - Set.empty[WomFile].validNel + implicit def cwlType: Aux[CwlType, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[CwlType] { _ => _ => + Set.empty[FileEvaluation].validNel } - implicit def ors: Aux[OutputRecordSchema, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[OutputRecordSchema] { + implicit def ors: Aux[OutputRecordSchema, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[OutputRecordSchema] { case OutputRecordSchema(_, Some(fields), _) => evalFunction => fields.toList.traverse({ field => @@ -48,20 +48,20 @@ object MyriadOutputInnerTypeToWomFiles extends Poly1 { case ors => ors.toString |> ex } - implicit def oes: Aux[OutputEnumSchema, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[OutputEnumSchema]{ oes => _ => + implicit def oes: Aux[OutputEnumSchema, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[OutputEnumSchema]{ oes => _ => oes.toString |> ex } - implicit def oas: Aux[OutputArraySchema, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[OutputArraySchema]{ oas => + implicit def oas: Aux[OutputArraySchema, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[OutputArraySchema]{ oas => evalFunction => import cats.syntax.apply._ - def fromBinding: ErrorOr[Set[WomFile]] = oas.outputBinding.map(evalFunction).getOrElse(Set.empty[WomFile].validNel) - def fromType: ErrorOr[Set[WomFile]] = oas.items.fold(MyriadOutputTypeToWomFiles).apply(evalFunction) + def fromBinding: ErrorOr[Set[FileEvaluation]] = oas.outputBinding.map(evalFunction).getOrElse(Set.empty[FileEvaluation].validNel) + def fromType: ErrorOr[Set[FileEvaluation]] = oas.items.fold(MyriadOutputTypeToWomFiles).apply(evalFunction) (fromBinding, fromType) mapN (_ ++ _) } - implicit def s: Aux[String, EvaluationFunction => ErrorOr[Set[WomFile]]] = at[String]{ _ => _ => - Set.empty[WomFile].validNel + implicit def s: Aux[String, EvaluationFunction => ErrorOr[Set[FileEvaluation]]] = at[String]{ _ => _ => + Set.empty[FileEvaluation].validNel } } diff --git a/cwl/src/main/scala/cwl/OutputParameterExpression.scala b/cwl/src/main/scala/cwl/OutputParameterExpression.scala index e0d96107b76..26ac0769760 100644 --- a/cwl/src/main/scala/cwl/OutputParameterExpression.scala +++ b/cwl/src/main/scala/cwl/OutputParameterExpression.scala @@ -4,11 +4,12 @@ import cats.syntax.validated._ import common.validation.ErrorOr.ErrorOr import cwl.CwlType.CwlType import shapeless.Poly1 -import wom.expression.{FileEvaluation, IoFunctionSet} +import wom.expression.{EmptyIoFunctionSet, FileEvaluation, IoFunctionSet} import wom.types._ -import wom.values.{WomFile, WomValue} +import wom.values.WomValue import scala.Function.const +import scala.concurrent.{ExecutionContext, Future} case class OutputParameterExpression(parameter: OutputParameter, override val cwlExpressionType: WomType, @@ -41,7 +42,7 @@ case class OutputParameterExpression(parameter: OutputParameter, ioFunctionSet: IoFunctionSet, secondaryFilesOption: Option[SecondaryFiles], coerceTo: WomType - )(outputBinding: CommandOutputBinding) = { + )(outputBinding: CommandOutputBinding): ErrorOr[Set[FileEvaluation]] = { CommandOutputBinding.getOutputWomFiles( inputValues, coerceTo, @@ -77,22 +78,30 @@ case class OutputParameterExpression(parameter: OutputParameter, * - WomMaybeListedDirectoryType * - WomArrayType(WomMaybeListedDirectoryType) (Possible according to the way the spec is written, but not likely?) */ - override def evaluateFiles(inputs: Map[String, WomValue], ioFunctionSet: IoFunctionSet, coerceTo: WomType): ErrorOr[Set[FileEvaluation]] = { + override def evaluateFiles(inputs: Map[String, WomValue], unused: IoFunctionSet, coerceTo: WomType): ErrorOr[Set[FileEvaluation]] = { import cats.syntax.apply._ - - def fromOutputBinding: ErrorOr[Set[WomFile]] = parameter + + // Ignore the supplied ioFunctionSet and use a custom stubbed IoFunctionSet. This is better than a real I/O function set + // because in the context of file evaluation we don't care about the results of these operations. The NoIoFunctionSet + // that otherwise would be used throws for all of its operations which doesn't fly for the way our CWL evaluation works. + val stubbedIoFunctionSet = new EmptyIoFunctionSet { + override def size(path: String): Future[Long] = Future.successful(0L) + override def isDirectory(path: String): Future[Boolean] = Future.successful(false) + override def ec: ExecutionContext = scala.concurrent.ExecutionContext.global + } + def fromOutputBinding: ErrorOr[Set[FileEvaluation]] = parameter .outputBinding - .map(evaluateOutputBindingFiles(inputs, ioFunctionSet, parameter.secondaryFiles, coerceTo)) - .getOrElse(Set.empty[WomFile].validNel) + .map(evaluateOutputBindingFiles(inputs, stubbedIoFunctionSet, parameter.secondaryFiles, coerceTo)) + .getOrElse(Set.empty[FileEvaluation].validNel) - def fromType: ErrorOr[Set[WomFile]] = parameter + def fromType: ErrorOr[Set[FileEvaluation]] = parameter .`type` - .map(_.fold(MyriadOutputTypeToWomFiles).apply(evaluateOutputBindingFiles(inputs, ioFunctionSet, parameter.secondaryFiles, coerceTo))) - .getOrElse(Set.empty[WomFile].validNel) + .map(_.fold(MyriadOutputTypeToWomFiles).apply(evaluateOutputBindingFiles(inputs, stubbedIoFunctionSet, parameter.secondaryFiles, coerceTo))) + .getOrElse(Set.empty[FileEvaluation].validNel) val optional: Boolean = parameter.`type`.exists(_.fold(OutputTypeIsOptional)) - (fromOutputBinding, fromType) mapN (_ ++ _) map { _ map { FileEvaluation(_, optional) } } + (fromOutputBinding, fromType) mapN (_ ++ _) map { _ map { _.copy(optional = optional) } } } } diff --git a/cwl/src/main/scala/cwl/WorkflowStep.scala b/cwl/src/main/scala/cwl/WorkflowStep.scala index b4075b4a30d..f291d95e2ad 100644 --- a/cwl/src/main/scala/cwl/WorkflowStep.scala +++ b/cwl/src/main/scala/cwl/WorkflowStep.scala @@ -339,13 +339,13 @@ case class WorkflowStep( ).validNel // No expression node mapping, use the default - case withDefault @ InputDefinitionWithDefault(_, _, expression, _) => + case withDefault @ InputDefinitionWithDefault(_, _, expression, _, _) => InputDefinitionFold( mappings = List(withDefault -> Coproduct[InputDefinitionPointer](expression)) ).validNel // Required input without default value and without mapping, this is a validation error - case RequiredInputDefinition(requiredName, _, _) => + case RequiredInputDefinition(requiredName, _, _, _) => s"Input ${requiredName.value} is required and is not bound to any value".invalidNel // Optional input without mapping, defaults to empty value diff --git a/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala b/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala index c0cf19b6166..67ad2013e22 100644 --- a/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala +++ b/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala @@ -37,7 +37,7 @@ class EcmaScriptEncoder(ioFunctionSet: IoFunctionSet) { */ def encode(value: WomValue): ECMAScriptVariable = { value match { - case file: WomFile => encodeFileOrDirectory(file) + case file: WomFile => encodeFileOrDirectory(file, withSize = true) case WomOptionalValue(_, None) => ESPrimitive(null) case WomOptionalValue(_, Some(innerValue)) => encode(innerValue) case WomString(string) => string |> ESPrimitive @@ -76,27 +76,27 @@ class EcmaScriptEncoder(ioFunctionSet: IoFunctionSet) { /** * Encodes a sequence of wom file or directory values. */ - def encodeFileOrDirectories(values: Seq[WomFile]): ESArray = { - ESArray(values.toList.map(encodeFileOrDirectory).toArray) + def encodeFileOrDirectories(values: Seq[WomFile], withSize: Boolean): ESArray = { + ESArray(values.toList.map(encodeFileOrDirectory(_, withSize)).toArray) } /** * Encodes a wom file or directory value. */ - def encodeFileOrDirectory(value: WomFile): ECMAScriptVariable = { + def encodeFileOrDirectory(value: WomFile, withSize: Boolean): ECMAScriptVariable = { value match { case directory: WomUnlistedDirectory => encodeDirectory(WomMaybeListedDirectory(directory.value)) - case file: WomSingleFile => encodeFile(WomMaybePopulatedFile(file.value)) - case glob: WomGlobFile => encodeFile(WomMaybePopulatedFile(glob.value)) + case file: WomSingleFile => encodeFile(WomMaybePopulatedFile(file.value), withSize) + case glob: WomGlobFile => encodeFile(WomMaybePopulatedFile(glob.value), withSize) case directory: WomMaybeListedDirectory => encodeDirectory(directory) - case file: WomMaybePopulatedFile => encodeFile(file) + case file: WomMaybePopulatedFile => encodeFile(file, withSize) } } /** * Encodes a wom file. */ - def encodeFile(file: WomMaybePopulatedFile): ECMAScriptVariable = + def encodeFile(file: WomMaybePopulatedFile, withSize: Boolean): ECMAScriptVariable = List( Option("class" -> ESPrimitive("File")), file.valueOption.map("location" -> ESPrimitive(_)), @@ -106,8 +106,11 @@ class EcmaScriptEncoder(ioFunctionSet: IoFunctionSet) { Option("nameroot" -> (File.nameroot(file.value) |> ESPrimitive)), Option("nameext" -> (File.nameext(file.value) |> ESPrimitive)), file.checksumOption.map("checksum" -> ESPrimitive(_)), - sync(file.withSize(ioFunctionSet)).toOption.flatMap(_.sizeOption).map(Long.box).map("size" -> ESPrimitive(_)), - Option("secondaryFiles" -> encodeFileOrDirectories(file.secondaryFiles)), + if (withSize) + sync(file.withSize(ioFunctionSet)).toOption.flatMap(_.sizeOption).map(Long.box).map("size" -> ESPrimitive(_)) + else + None, + Option("secondaryFiles" -> encodeFileOrDirectories(file.secondaryFiles, withSize = false)), file.formatOption.map("format" -> ESPrimitive(_)), file.contentsOption.map("contents" -> ESPrimitive(_)) ).flatten.toMap |> ESObject @@ -121,7 +124,7 @@ class EcmaScriptEncoder(ioFunctionSet: IoFunctionSet) { directory.valueOption.map("location" -> ESPrimitive(_)), Option(directory.value).map("path" -> ESPrimitive(_)), Option("basename" -> ESPrimitive(Directory.basename(directory.value))), - directory.listingOption.map(encodeFileOrDirectories).map("listing" -> _) + directory.listingOption.map(encodeFileOrDirectories(_, withSize = true)).map("listing" -> _) ).flatten.toMap |> ESObject } } diff --git a/cwl/src/test/scala/cwl/CommandLineToolSpec.scala b/cwl/src/test/scala/cwl/CommandLineToolSpec.scala index 90a52123bf5..cf264babab0 100644 --- a/cwl/src/test/scala/cwl/CommandLineToolSpec.scala +++ b/cwl/src/test/scala/cwl/CommandLineToolSpec.scala @@ -2,10 +2,12 @@ package cwl import cats.instances.list._ import cats.syntax.traverse._ +import eu.timepit.refined.numeric.Positive import org.scalatest.{FlatSpec, Matchers, ParallelTestExecution} import shapeless.Coproduct import wom.callable.Callable.RequiredInputDefinition import wom.callable.RuntimeEnvironment +import eu.timepit.refined.refineMV import wom.expression.NoIoFunctionSet import wom.types._ import wom.values.{WomArray, WomBoolean, WomEvaluatedCallInputs, WomInteger, WomObject, WomSingleFile, WomString, WomValue} @@ -31,7 +33,7 @@ class CommandLineToolSpec extends FlatSpec with Matchers with ParallelTestExecut val noIoFunctionSet = NoIoFunctionSet - val runtimeEnv = RuntimeEnvironment("", "", 0, 0D, 0L, 0L) + val runtimeEnv = RuntimeEnvironment("", "", refineMV[Positive](1), 0D, 0L, 0L) def validate(tool: String, expectation: List[String]) = { val cltFile = better.files.File.newTemporaryFile()().write(tool) diff --git a/cwl/src/test/scala/cwl/CwlExpressionCommandPartSpec.scala b/cwl/src/test/scala/cwl/CwlExpressionCommandPartSpec.scala index 93dc8bdbcf9..aa7fb3c1a9b 100644 --- a/cwl/src/test/scala/cwl/CwlExpressionCommandPartSpec.scala +++ b/cwl/src/test/scala/cwl/CwlExpressionCommandPartSpec.scala @@ -3,6 +3,7 @@ package cwl import common.validation.Validation._ import cwl.ExpressionEvaluator._ import eu.timepit.refined._ +import eu.timepit.refined.numeric.Positive import org.scalatest.{FlatSpec, Matchers} import shapeless.Coproduct import wom.callable.RuntimeEnvironment @@ -14,7 +15,7 @@ class CwlExpressionCommandPartSpec extends FlatSpec with Matchers { behavior of "CwlExpressionCommandPart" - val emptyEnvironment = RuntimeEnvironment("","",1,1,1,1) + val emptyEnvironment = RuntimeEnvironment("","",refineMV[Positive](1),1,1,1) it should "instantiate" in { // NOTE: toFixed used to remove the fraction part of ECMAScript numbers diff --git a/cwl/src/test/scala/cwl/DirectorySpec.scala b/cwl/src/test/scala/cwl/DirectorySpec.scala index e423aa1aef9..c84068008f5 100644 --- a/cwl/src/test/scala/cwl/DirectorySpec.scala +++ b/cwl/src/test/scala/cwl/DirectorySpec.scala @@ -1,8 +1,10 @@ package cwl import common.validation.Validation._ +import eu.timepit.refined.refineMV import cwl.CwlDecoder.decodeCwlFile import cwl.TestSetup.rootPath +import eu.timepit.refined.numeric.Positive import org.scalatest.{FlatSpec, Matchers} import wom.callable.Callable.InputDefinition import wom.callable.{CallableTaskDefinition, RuntimeEnvironment} @@ -18,7 +20,7 @@ class DirectorySpec extends FlatSpec with Matchers { val cwl = decodeCwlFile(rootPath / "dir_example.cwl").value.unsafeRunSync.right.get val executable = cwl.womExecutable(AcceptAllRequirements, None, NoIoFunctionSet, strictValidation = false).right.get val call = executable.graph.calls.head - val runtimeEnvironment = RuntimeEnvironment("output/path", "temp/path", 1, 2e10, 100, 100) + val runtimeEnvironment = RuntimeEnvironment("output/path", "temp/path",refineMV[Positive](1), 2e10, 100, 100) val defaultCallInputs = executable.graph.nodes.collect({ case oginwd: OptionalGraphInputNodeWithDefault => val key: InputDefinition = call.inputDefinitionMappings.toMap.keys.find( diff --git a/cwl/src/test/scala/cwl/ExpressionInterpolatorSpec.scala b/cwl/src/test/scala/cwl/ExpressionInterpolatorSpec.scala index c13503280c1..d621822deb7 100644 --- a/cwl/src/test/scala/cwl/ExpressionInterpolatorSpec.scala +++ b/cwl/src/test/scala/cwl/ExpressionInterpolatorSpec.scala @@ -1,12 +1,14 @@ package cwl import common.validation.ErrorOr._ +import eu.timepit.refined.refineMV import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} import wom.callable.RuntimeEnvironment import wom.values._ import common.validation.Validation._ import cwl.ExpressionInterpolator.SubstitutionException +import eu.timepit.refined.numeric.Positive import wom.expression.DefaultSizeIoFunctionSet class ExpressionInterpolatorSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { @@ -19,7 +21,7 @@ class ExpressionInterpolatorSpec extends FlatSpec with Matchers with TableDriven ) private lazy val parameterContext = { - val runtime = RuntimeEnvironment("out", "tmp", 1, 2.0D, 100L, 200L) + val runtime = RuntimeEnvironment("out", "tmp", refineMV[Positive](1), 2.0D, 100L, 200L) val inputs = Map( "cram" -> WomSingleFile("/path/to/my.cram"), "file1" -> WomSingleFile("/path/to/my.file.txt") diff --git a/cwl/src/test/scala/cwl/FileSpec.scala b/cwl/src/test/scala/cwl/FileSpec.scala index cc25d58f94c..021621e33c8 100644 --- a/cwl/src/test/scala/cwl/FileSpec.scala +++ b/cwl/src/test/scala/cwl/FileSpec.scala @@ -1,8 +1,10 @@ package cwl import common.validation.Validation._ +import eu.timepit.refined.refineMV import cwl.CwlDecoder.decodeCwlFile import cwl.TestSetup.rootPath +import eu.timepit.refined.numeric.Positive import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} import wom.callable.Callable.InputDefinition @@ -26,7 +28,7 @@ class FileSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { val cwl = decodeCwlFile(rootPath / filePath).value.unsafeRunSync.right.get val executable = cwl.womExecutable(AcceptAllRequirements, None, NoIoFunctionSet, strictValidation = false).right.get val call = executable.graph.calls.head - val runtimeEnvironment = RuntimeEnvironment("output/path", "temp/path", 1, 2e10, 100, 100) + val runtimeEnvironment = RuntimeEnvironment("output/path", "temp/path", refineMV[Positive](1), 2e10, 100, 100) val defaultCallInputs = executable.graph.nodes.collect({ case oginwd: OptionalGraphInputNodeWithDefault => val key: InputDefinition = call.inputDefinitionMappings.toMap.keys.find( diff --git a/database/migration/src/main/resources/metadata_changesets/add_submission_timestamp_metadata_summary.xml b/database/migration/src/main/resources/metadata_changesets/add_submission_timestamp_metadata_summary.xml new file mode 100644 index 00000000000..9c9fab7a596 --- /dev/null +++ b/database/migration/src/main/resources/metadata_changesets/add_submission_timestamp_metadata_summary.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + diff --git a/database/migration/src/main/resources/sql_metadata_changelog.xml b/database/migration/src/main/resources/sql_metadata_changelog.xml index 0b9af385013..c140fd2f7f9 100644 --- a/database/migration/src/main/resources/sql_metadata_changelog.xml +++ b/database/migration/src/main/resources/sql_metadata_changelog.xml @@ -6,5 +6,6 @@ + diff --git a/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala index bf4ecbc20eb..57a9c03593e 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala @@ -157,17 +157,17 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) } override def refreshMetadataSummaryEntries(startMetadataKey: String, endMetadataKey: String, nameMetadataKey: String, - statusMetadataKey: String, labelMetadataKey: String, buildUpdatedSummary: - (Option[WorkflowMetadataSummaryEntry], Seq[MetadataEntry]) => + statusMetadataKey: String, labelMetadataKey: String, submissionMetadataKey: String, + buildUpdatedSummary: (Option[WorkflowMetadataSummaryEntry], Seq[MetadataEntry]) => WorkflowMetadataSummaryEntry) (implicit ec: ExecutionContext): Future[Long] = { val likeMetadataLabelKey = labelMetadataKey + "%" val action = for { previousMetadataEntryIdOption <- getSummaryStatusEntryMaximumId( "WORKFLOW_METADATA_SUMMARY_ENTRY", "METADATA_ENTRY") - previousMetadataEntryId = previousMetadataEntryIdOption.getOrElse(0L) + previousMetadataEntryId = previousMetadataEntryIdOption.getOrElse(-1L) metadataEntries <- dataAccess.metadataEntriesForIdGreaterThanOrEqual(( - previousMetadataEntryId + 1L, startMetadataKey, endMetadataKey, nameMetadataKey, statusMetadataKey, likeMetadataLabelKey)).result + previousMetadataEntryId + 1L, startMetadataKey, endMetadataKey, nameMetadataKey, statusMetadataKey, likeMetadataLabelKey, submissionMetadataKey)).result metadataWithoutLabels = metadataEntries.filterNot(_.metadataKey.contains(labelMetadataKey)).groupBy(_.workflowExecutionUuid) customLabelEntries = metadataEntries.filter(_.metadataKey.contains(labelMetadataKey)) _ <- DBIO.sequence(metadataWithoutLabels map updateWorkflowMetadataSummaryEntry(buildUpdatedSummary)) @@ -198,13 +198,16 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestampOption: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp], page: Option[Int], pageSize: Option[Int]) (implicit ec: ExecutionContext): Future[Seq[WorkflowMetadataSummaryEntry]] = { val action = dataAccess.queryWorkflowMetadataSummaryEntries(workflowStatuses, workflowNames, workflowExecutionUuids, - labelAndKeyLabelValues, labelOrKeyLabelValues, startTimestampOption, endTimestampOption, page, pageSize).result + labelAndKeyLabelValues, labelOrKeyLabelValues, excludeLabelAndValues, excludeLabelOrValues, submissionTimestampOption, startTimestampOption, endTimestampOption, page, pageSize).result runTransaction(action) } @@ -213,11 +216,14 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestampOption: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp]) (implicit ec: ExecutionContext): Future[Int] = { val action = dataAccess.countWorkflowMetadataSummaryEntries(workflowStatuses, workflowNames, workflowExecutionUuids, - labelAndKeyLabelValues, labelOrKeyLabelValues, startTimestampOption, endTimestampOption).result + labelAndKeyLabelValues, labelOrKeyLabelValues, excludeLabelAndValues, excludeLabelOrValues, submissionTimestampOption, startTimestampOption, endTimestampOption).result runTransaction(action) } } diff --git a/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala index bcc3ae009c4..9815d9c9b49 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/WorkflowStoreSlickDatabase.scala @@ -72,12 +72,12 @@ trait WorkflowStoreSlickDatabase extends WorkflowStoreSqlDatabase { runTransaction(action) } - override def writeWorkflowHeartbeats(workflowExecutionUuids: List[String])(implicit ec: ExecutionContext): Future[Int] = { + override def writeWorkflowHeartbeats(workflowExecutionUuids: Set[String])(implicit ec: ExecutionContext): Future[Int] = { val optionNow = Option(now) // Return the count of heartbeats written. This could legitimately be less than the size of the `workflowExecutionUuids` // List if any of those workflows completed and their workflow store entries were removed. val action = for { - counts <- DBIO.sequence(workflowExecutionUuids map { i => dataAccess.heartbeatForWorkflowStoreEntry(i).update(optionNow) }) + counts <- DBIO.sequence(workflowExecutionUuids.toList map { i => dataAccess.heartbeatForWorkflowStoreEntry(i).update(optionNow) }) } yield counts.sum runTransaction(action) } diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala index 373d3b92730..d700c6f16a2 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala @@ -114,12 +114,12 @@ trait MetadataEntryComponent { // that last ID + 1 as the minimum ID for the next query iteration. val metadataEntriesForIdGreaterThanOrEqual = Compiled( (metadataEntryId: Rep[Long], startMetadataKey: Rep[String], endMetadataKey: Rep[String], nameMetadataKey: Rep[String], - statusMetadataKey: Rep[String], likeLabelMetadataKey: Rep[String]) => for { + statusMetadataKey: Rep[String], likeLabelMetadataKey: Rep[String], submissionMetadataKey: Rep[String]) => for { metadataEntry <- metadataEntries if metadataEntry.metadataEntryId >= metadataEntryId if (metadataEntry.metadataKey === startMetadataKey || metadataEntry.metadataKey === endMetadataKey || metadataEntry.metadataKey === nameMetadataKey || metadataEntry.metadataKey === statusMetadataKey || - metadataEntry.metadataKey.like(likeLabelMetadataKey)) && + metadataEntry.metadataKey.like(likeLabelMetadataKey) || metadataEntry.metadataKey === submissionMetadataKey) && (metadataEntry.callFullyQualifiedName.isEmpty && metadataEntry.jobIndex.isEmpty && metadataEntry.jobAttempt.isEmpty) } yield metadataEntry diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowMetadataSummaryEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowMetadataSummaryEntryComponent.scala index 802bb47f75c..79feb5ff01d 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowMetadataSummaryEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowMetadataSummaryEntryComponent.scala @@ -24,7 +24,9 @@ trait WorkflowMetadataSummaryEntryComponent { def endTimestamp = column[Option[Timestamp]]("END_TIMESTAMP") - override def * = (workflowExecutionUuid, workflowName, workflowStatus, startTimestamp, endTimestamp, + def submissionTimestamp = column[Option[Timestamp]]("SUBMISSION_TIMESTAMP") + + override def * = (workflowExecutionUuid, workflowName, workflowStatus, startTimestamp, endTimestamp, submissionTimestamp, workflowMetadataSummaryEntryId.?) <> (WorkflowMetadataSummaryEntry.tupled, WorkflowMetadataSummaryEntry.unapply) def ucWorkflowMetadataSummaryEntryWeu = @@ -58,6 +60,9 @@ trait WorkflowMetadataSummaryEntryComponent { workflowExecutionUuids: Set[String], labelAndKeyValues: Set[(String, String)], labelOrKeyValues: Set[(String, String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestampOption: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp]): (WorkflowMetadataSummaryEntries) => Rep[Boolean] = { @@ -74,6 +79,8 @@ trait WorkflowMetadataSummaryEntryComponent { map(startTimestamp => workflowMetadataSummaryEntry.startTimestamp.fold(ifEmpty = exclude)(_ >= startTimestamp)) val endTimestampFilter = endTimestampOption. map(endTimestamp => workflowMetadataSummaryEntry.endTimestamp.fold(ifEmpty = exclude)(_ <= endTimestamp)) + val submissionTimestampFilter = submissionTimestampOption. + map(submissionTimestamp => workflowMetadataSummaryEntry.submissionTimestamp.fold(ifEmpty = exclude)(_ >= submissionTimestamp)) // Names, UUIDs, and statuses are potentially multi-valued, the reduceLeftOption ORs together any name, UUID, or // status criteria to include all matching names, UUIDs, and statuses. val workflowNameFilter = workflowNames. @@ -87,6 +94,8 @@ trait WorkflowMetadataSummaryEntryComponent { reduceLeftOption(_ || _) val labelsAndFilter = existsWorkflowLabels(workflowMetadataSummaryEntry, labelAndKeyValues, _ && _) val labelsOrFilter = existsWorkflowLabels(workflowMetadataSummaryEntry, labelOrKeyValues, _ || _) + val excludeLabelsAndFilter = existsWorkflowLabels(workflowMetadataSummaryEntry, excludeLabelAndValues, _ && _).map(v => !v) + val excludeLabelsOrFilter = existsWorkflowLabels(workflowMetadataSummaryEntry, excludeLabelOrValues, _ || _).map(v => !v) // Put all the optional filters above together in one place. val optionalFilters: List[Option[Rep[Boolean]]] = List( workflowNameFilter, @@ -94,8 +103,11 @@ trait WorkflowMetadataSummaryEntryComponent { workflowStatusFilter, labelsAndFilter, labelsOrFilter, + excludeLabelsAndFilter, + excludeLabelsOrFilter, startTimestampFilter, - endTimestampFilter + endTimestampFilter, + submissionTimestampFilter ) // Unwrap the optional filters. If any of these filters are not defined, replace with `include` to include all // rows which might otherwise have been filtered. @@ -125,6 +137,9 @@ trait WorkflowMetadataSummaryEntryComponent { workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestampOption: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp]) = { val filter = filterWorkflowMetadataSummaryEntries( @@ -133,6 +148,9 @@ trait WorkflowMetadataSummaryEntryComponent { workflowExecutionUuids, labelAndKeyLabelValues, labelOrKeyLabelValues, + excludeLabelAndValues, + excludeLabelOrValues, + submissionTimestampOption, startTimestampOption, endTimestampOption ) @@ -146,6 +164,9 @@ trait WorkflowMetadataSummaryEntryComponent { workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestampOption: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp], page: Option[Int], pageSize: Option[Int]) = { @@ -155,6 +176,9 @@ trait WorkflowMetadataSummaryEntryComponent { workflowExecutionUuids, labelAndKeyLabelValues, labelOrKeyLabelValues, + excludeLabelAndValues, + excludeLabelOrValues, + submissionTimestampOption, startTimestampOption, endTimestampOption ) diff --git a/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala index c494ee48f44..5bfbdf85d77 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala @@ -65,7 +65,7 @@ trait MetadataSqlDatabase { * @return A `Future` with the maximum metadataEntryId summarized by the invocation of this method. */ def refreshMetadataSummaryEntries(startMetadataKey: String, endMetadataKey: String, nameMetadataKey: String, - statusMetadataKey: String, labelMetadataKey: String, + statusMetadataKey: String, labelMetadataKey: String, submissionMetadataKey: String, buildUpdatedSummary: (Option[WorkflowMetadataSummaryEntry], Seq[MetadataEntry]) => WorkflowMetadataSummaryEntry) @@ -80,6 +80,9 @@ trait MetadataSqlDatabase { workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestamp: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp], page: Option[Int], @@ -90,6 +93,9 @@ trait MetadataSqlDatabase { workflowExecutionUuids: Set[String], labelAndKeyLabelValues: Set[(String,String)], labelOrKeyLabelValues: Set[(String,String)], + excludeLabelAndValues: Set[(String,String)], + excludeLabelOrValues: Set[(String,String)], + submissionTimestamp: Option[Timestamp], startTimestampOption: Option[Timestamp], endTimestampOption: Option[Timestamp]) (implicit ec: ExecutionContext): Future[Int] diff --git a/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala index e932dd4563f..b011eb2be25 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/WorkflowStoreSqlDatabase.scala @@ -59,7 +59,7 @@ ____ __ ____ ______ .______ __ ___ _______ __ ______ def fetchStartableWorkflows(limit: Int, cromwellId: String, heartbeatTtl: FiniteDuration) (implicit ec: ExecutionContext): Future[Seq[WorkflowStoreEntry]] - def writeWorkflowHeartbeats(workflowExecutionUuids: List[String])(implicit ec: ExecutionContext): Future[Int] + def writeWorkflowHeartbeats(workflowExecutionUuids: Set[String])(implicit ec: ExecutionContext): Future[Int] /** * Clears out cromwellId and heartbeatTimestamp for all workflow store entries currently assigned diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowMetadataSummaryEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowMetadataSummaryEntry.scala index 4264c871449..285f9e4e154 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowMetadataSummaryEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowMetadataSummaryEntry.scala @@ -9,5 +9,6 @@ case class WorkflowMetadataSummaryEntry workflowStatus: Option[String], startTimestamp: Option[Timestamp], endTimestamp: Option[Timestamp], + submissionTimestamp: Option[Timestamp], workflowMetadataSummaryEntryId: Option[Long] = None ) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 4f9bf2bb8f2..f2113fe2364 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -293,13 +293,14 @@ runtime { ## `maxRetries` -Applicable for all backends in Cromwell. Takes an Int as a value that indicates the maximum number of times Cromwell should retry a failed task. +This retry option is introduced to provide a strategy for tackling transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. It takes an Int as a value that indicates the maximum number of times Cromwell should retry a failed task. This retry is applied towards jobs that fail while executing the task command. -*eg. With a value of 1, Cromwell will run the task, if the task fails for any reason, the task will be retried without any changes.* +If using the Google backend, it's important to note that The `maxRetries` count is independent from the [preemptible](#preemptible) count. For example, the task below can be retried up to 6 times if it's preempted 3 times AND the command execution fails 3 times. ``` runtime { - maxRetries: 1 + preemptible: 3 + maxRetries: 3 } ``` diff --git a/docs/api/RESTAPI.md b/docs/api/RESTAPI.md index b035b26a8f8..53d98a6c9a3 100644 --- a/docs/api/RESTAPI.md +++ b/docs/api/RESTAPI.md @@ -1,5 +1,5 @@