diff --git a/.gitignore b/.gitignore index e4216b910d1..38c671ead86 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ tags .idea/inspectionProfiles/* !/.idea/inspectionProfiles/Project_Default.xml target +/site # custom config cromwell-executions diff --git a/.travis.yml b/.travis.yml index 4d29b88b9ec..d37734d46cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -92,9 +92,18 @@ env: BUILD_MYSQL=5.7 - >- BUILD_TYPE=sbt - BUILD_MYSQL=5.7 - BUILD_POSTGRESQL=11.3 - BUILD_MARIADB=10.3 + # The below list of docker tags should be synced with the tags in DatabaseTestKit.getDatabaseSystemSettings + - >- + BUILD_TYPE=dbms + BUILD_MARIADB=5.5 + BUILD_MARIADB_LATEST=latest + BUILD_MYSQL=5.6 + BUILD_MYSQL_LATEST=latest + BUILD_POSTGRESQL=9.5 + BUILD_POSTGRESQL_LATEST=latest + # The list above of docker tags should be synced with the tags in DatabaseTestKit.getDatabaseSystemSettings + - >- + BUILD_TYPE=singleWorkflowRunner script: - src/ci/bin/test.sh notifications: diff --git a/CHANGELOG.md b/CHANGELOG.md index 8811945c7f8..33f8f0eff4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Cromwell Change Log +## 46 Release Notes + +### Nvidia GPU Driver Update + +The default driver for Nvidia GPU's on Google Cloud has been updated from `390` to `418.87.00`. A user may override this option at anytime by providing the `nvidiaDriverVersion` runtime attribute. See the [Runtime Attribute description for GPUs](https://cromwell.readthedocs.io/en/stable/RuntimeAttributes/#runtime-attribute-descriptions) for detailed information. + +### Enhanced "error code 10" handling in PAPIv2 + +On Google Pipelines API v2, a worker VM that is preempted may emit a generic error message like +``` +PAPI error code 10. The assigned worker has failed to complete the operation +``` +instead of a preemption-specific message like +``` +PAPI error code 14. Task was preempted for the 2nd time. +``` +Cromwell 44 introduced special handling that detects both preemption indicators and re-runs the job consistent with the `preemptible` setting. + +Cromwell 46 enhances this handling in response to user reports of possible continued issues. + ## 45 Release Notes ### Improved input and output transfer performance on PAPI v2 @@ -25,6 +45,13 @@ Globs can be used to define outputs for BCS backend. #### NAS mount Alibaba Cloud NAS is now supported for the `mounts` runtime attribute. +### Call Caching Failure Messages [(#5095)](https://github.com/broadinstitute/cromwell/pull/5095) + +Call cache failures are no longer sent to the workflow metadata. Instead a limited number of call cache failure messages +will be sent to the workflow log. See [the Cromwell call caching +documentation](https://cromwell.readthedocs.io/en/stable/cromwell_features/CallCaching/) for more information on call +cache failure logging. + ## 44 Release Notes ### Improved PAPI v2 Preemptible VM Support diff --git a/build.sbt b/build.sbt index de448a088ac..0bc34403f4c 100644 --- a/build.sbt +++ b/build.sbt @@ -383,6 +383,7 @@ lazy val root = (project in file(".")) .aggregate(`cloud-nio-impl-ftp`) .aggregate(`cloud-nio-spi`) .aggregate(`cloud-nio-util`) + .aggregate(`cromwell-drs-localizer`) .aggregate(awsBackend) .aggregate(awsS3FileSystem) .aggregate(backend) @@ -400,13 +401,13 @@ lazy val root = (project in file(".")) .aggregate(databaseSql) .aggregate(dockerHashing) .aggregate(drsFileSystem) - .aggregate(`cromwell-drs-localizer`) .aggregate(engine) .aggregate(ftpFileSystem) .aggregate(gcsFileSystem) .aggregate(googlePipelinesCommon) .aggregate(googlePipelinesV1Alpha2) .aggregate(googlePipelinesV2Alpha1) + .aggregate(httpFileSystem) .aggregate(jesBackend) .aggregate(languageFactoryCore) .aggregate(ossFileSystem) @@ -433,3 +434,4 @@ lazy val root = (project in file(".")) .aggregate(wes2cromwell) .aggregate(wom) .aggregate(womtool) + .withAggregateSettings() diff --git a/centaur/src/main/resources/papiUpgradeTestCases/papi_upgrade/papi_upgrade.wdl b/centaur/src/main/resources/papiUpgradeTestCases/papi_upgrade/papi_upgrade.wdl index 35ecc38937a..ba0ef3c28f2 100644 --- a/centaur/src/main/resources/papiUpgradeTestCases/papi_upgrade/papi_upgrade.wdl +++ b/centaur/src/main/resources/papiUpgradeTestCases/papi_upgrade/papi_upgrade.wdl @@ -7,7 +7,7 @@ task sum { File out = "file.md5" } runtime { - docker: "ubuntu:latest" + docker: "ubuntu@sha256:d1d454df0f579c6be4d8161d227462d69e163a8ff9d20a847533989cf0c94d90" } } @@ -18,7 +18,7 @@ task cromwell_killer { echo restarting yo } runtime { - docker: "ubuntu:latest" + docker: "ubuntu@sha256:d1d454df0f579c6be4d8161d227462d69e163a8ff9d20a847533989cf0c94d90" } } diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_typearray.test b/centaur/src/main/resources/standardTestCases/cwl_input_typearray.test new file mode 100644 index 00000000000..da5b85f7c2b --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/cwl_input_typearray.test @@ -0,0 +1,17 @@ +name: cwl_input_typearray +testFormat: workflowsuccess +workflowType: CWL +workflowTypeVersion: v1.0 +workflowRoot: input_typearray + +files { + workflow: cwl_input_typearray/input_typearray.cwl + inputs: cwl_input_typearray/input_typearray.yml +} + +metadata { + "submittedFiles.workflowType": CWL + "submittedFiles.workflowTypeVersion": v1.0 + "outputs.input_typearray.response_f": "input.txt" + "outputs.input_typearray.response_s": "nonexistent_path.txt" +} diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input.txt b/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.cwl b/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.cwl new file mode 100644 index 00000000000..897b74822e8 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.cwl @@ -0,0 +1,42 @@ +cwlVersion: v1.0 +$graph: +- id: input_typearray + cwlVersion: v1.0 + class: CommandLineTool + baseCommand: ['/bin/echo'] + stdout: "response.txt" + requirements: + - class: DockerRequirement + dockerPull: "ubuntu:latest" + - class: InlineJavascriptRequirement + arguments: + - position: 3 + valueFrom: "sentinel" + inputs: + value_f: + type: + - string + - File + inputBinding: + position: 1 + doc: "an input to test with a File value" + value_s: + type: + - string + - File + inputBinding: + position: 2 + doc: "an input to test with a string value" + outputs: + response_f: + type: string + outputBinding: + glob: response.txt + loadContents: true + outputEval: $(self[0].contents.split(" ")[0].split("/").slice(-1)[0]) + response_s: + type: string + outputBinding: + glob: response.txt + loadContents: true + outputEval: $(self[0].contents.split(" ")[1].split("/").slice(-1)[0]) diff --git a/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.yml b/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.yml new file mode 100644 index 00000000000..421402d9289 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/cwl_input_typearray/input_typearray.yml @@ -0,0 +1,4 @@ +value_f: + class: File + path: "centaur/src/main/resources/standardTestCases/cwl_input_typearray/input.txt" +value_s: "centaur/src/main/resources/standardTestCases/cwl_input_typearray/nonexistent_path.txt" diff --git a/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2.test b/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2.test new file mode 100644 index 00000000000..71edd6de8ea --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2.test @@ -0,0 +1,13 @@ +name: dedup_localizations_papi_v2 +testFormat: workflowsuccess +backends: [Papiv2] + +files { + workflow: dedup_localizations_papi_v2/dedup_localizations_papi_v2.wdl +} + +metadata { + workflowName: dedup_localizations_papi_v2 + status: Succeeded + "outputs.dedup_localizations_papi_v2.check_log.num_input_localizations": 1 +} diff --git a/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2/dedup_localizations_papi_v2.wdl b/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2/dedup_localizations_papi_v2.wdl new file mode 100644 index 00000000000..25ecc045fb0 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/dedup_localizations_papi_v2/dedup_localizations_papi_v2.wdl @@ -0,0 +1,59 @@ +version 1.0 + +workflow dedup_localizations_papi_v2 { + call producer + call consumer { input: first = producer.data, second = producer.data } + call check_log { input: out_file_path = consumer.out, log_file_name = "consumer.log" } +} + +task producer { + command { + echo "Here is some data." > data.txt + } + + runtime { + docker: "ubuntu:latest" + } + + output { + File data = "data.txt" + } +} + +task consumer { + input { + File first + File second + } + + command { + # noop + } + + runtime { + docker: "ubuntu:latest" + } + + output { + File out = stdout() + } +} + +task check_log { + input { + String out_file_path + String log_file_name + } + String file_log = sub(out_file_path, "/stdout$", "/" + log_file_name) + command { + set -euo pipefail + gsutil cp ~{file_log} log.txt + set +e + grep 'Localizing input gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/dedup_localizations_papi_v2/' log.txt | grep -c "data.txt" + } + output { + File out = stdout() + Int num_input_localizations = read_int(stdout()) + } + runtime { docker: "google/cloud-sdk" } +} diff --git a/centaur/src/main/resources/standardTestCases/drs_tests/wf_level_file_size.wdl b/centaur/src/main/resources/standardTestCases/drs_tests/wf_level_file_size.wdl index 56d89094921..dc84d2e346b 100644 --- a/centaur/src/main/resources/standardTestCases/drs_tests/wf_level_file_size.wdl +++ b/centaur/src/main/resources/standardTestCases/drs_tests/wf_level_file_size.wdl @@ -1,8 +1,8 @@ version 1.0 workflow wf_level_file_size { - File input1 = "dos://wb-mock-drs-dev.storage.googleapis.com/4a3908ad-1f0b-4e2a-8a92-611f2123e8b0" - File input2 = "dos://wb-mock-drs-dev.storage.googleapis.com/0c8e7bc6-fd76-459d-947b-808b0605beb3" + File input1 = "drs://wb-mock-drs-dev.storage.googleapis.com/4a3908ad-1f0b-4e2a-8a92-611f2123e8b0" + File input2 = "drs://wb-mock-drs-dev.storage.googleapis.com/0c8e7bc6-fd76-459d-947b-808b0605beb3" output { Float fileSize1 = size(input1) diff --git a/centaur/src/main/resources/standardTestCases/drs_usa_hca/drs_usa_hca.inputs b/centaur/src/main/resources/standardTestCases/drs_usa_hca/drs_usa_hca.inputs index f528e7d2364..f82844519f9 100644 --- a/centaur/src/main/resources/standardTestCases/drs_usa_hca/drs_usa_hca.inputs +++ b/centaur/src/main/resources/standardTestCases/drs_usa_hca/drs_usa_hca.inputs @@ -1,8 +1,8 @@ { # For all below 5 HCA uuids, Martha does not return a service account - "drs_usa_hca.localize_drs_with_usa.file1": "dos://service.staging.explore.data.humancellatlas.org/033c9840-c5cd-438b-b0e4-8e4cd8fc8dc6?version=2019-07-04T104122.106166Z", - "drs_usa_hca.localize_drs_with_usa.file2": "dos://service.staging.explore.data.humancellatlas.org/4defa7b0-46c2-4053-8e99-b827eed1bc96?version=2019-07-04T104122.100969Z", - "drs_usa_hca.localize_drs_with_usa.file3": "dos://service.staging.explore.data.humancellatlas.org/de5dcfc1-5aea-41ba-a7ae-e72c416cb450?version=2019-07-04T104122.092788Z", - "drs_usa_hca.localize_drs_with_usa.file4": "dos://service.staging.explore.data.humancellatlas.org/16dea2c5-e2bd-45bc-b2fd-fcac0daafc48?version=2019-07-04T104122.060634Z", - "drs_usa_hca.localize_drs_with_usa.file5": "dos://service.dev.explore.data.humancellatlas.org/7c800467-9143-402f-b965-4e7cad75c1e6?version=2019-05-26T130511.722646Z" + "drs_usa_hca.localize_drs_with_usa.file1": "drs://service.staging.explore.data.humancellatlas.org/033c9840-c5cd-438b-b0e4-8e4cd8fc8dc6?version=2019-07-04T104122.106166Z", + "drs_usa_hca.localize_drs_with_usa.file2": "drs://service.staging.explore.data.humancellatlas.org/4defa7b0-46c2-4053-8e99-b827eed1bc96?version=2019-07-04T104122.100969Z", + "drs_usa_hca.localize_drs_with_usa.file3": "drs://service.staging.explore.data.humancellatlas.org/de5dcfc1-5aea-41ba-a7ae-e72c416cb450?version=2019-07-04T104122.092788Z", + "drs_usa_hca.localize_drs_with_usa.file4": "drs://service.staging.explore.data.humancellatlas.org/16dea2c5-e2bd-45bc-b2fd-fcac0daafc48?version=2019-07-04T104122.060634Z", + "drs_usa_hca.localize_drs_with_usa.file5": "drs://service.dev.explore.data.humancellatlas.org/7c800467-9143-402f-b965-4e7cad75c1e6?version=2019-05-26T130511.722646Z" } diff --git a/centaur/src/main/resources/standardTestCases/gpu_on_papi/gpu_cuda_image.wdl b/centaur/src/main/resources/standardTestCases/gpu_on_papi/gpu_cuda_image.wdl index f7c784cb574..ce0b508f8dc 100644 --- a/centaur/src/main/resources/standardTestCases/gpu_on_papi/gpu_cuda_image.wdl +++ b/centaur/src/main/resources/standardTestCases/gpu_on_papi/gpu_cuda_image.wdl @@ -3,7 +3,7 @@ version 1.0 workflow gpu_cuda_image { input { - Array[String] driver_versions = [ "390.46" ] + Array[String] driver_versions = [ "418.87.00" ] } scatter (driver_version in driver_versions) { diff --git a/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2.test b/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2.test new file mode 100644 index 00000000000..edf5789b98d --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2.test @@ -0,0 +1,16 @@ +name: localization_sanity_papi_v2 +testFormat: workflowsuccess +workflowType: WDL +workflowTypeVersion: 1.0 +backends: [Papiv2] + +files { + workflow: localization_sanity_papi_v2/localization_sanity_papi_v2.wdl +} + +metadata { + status: Succeeded + "outputs.localization_sanity.sanity_check.lines.0": "file a.txt: 1" + "outputs.localization_sanity.sanity_check.lines.1": "file b.txt: 1" + "outputs.localization_sanity.sanity_check.lines.2": "file c.txt: 1" +} diff --git a/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2/localization_sanity_papi_v2.wdl b/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2/localization_sanity_papi_v2.wdl new file mode 100644 index 00000000000..55b3abb7e7b --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/localization_sanity_papi_v2/localization_sanity_papi_v2.wdl @@ -0,0 +1,44 @@ +version 1.0 + +task make_files { + command <<< + names=(a b c) + mkdir -p "${names[@]}" + for name in "${names[@]}"; do + touch "${name}/dummy.txt" # the first file is not bulk-transferred via `gsutil cp -I ...` which is what this test is about. + touch "${name}/${name}.txt" + done + >>> + output { + # Intentionally not globbed as the current implementaton of globbing would defeat what this test + # is trying to assert. + Array[File] files = ["a/dummy.txt", "a/a.txt", "b/dummy.txt", "b/b.txt", "c/dummy.txt", "c/c.txt"] + } + runtime { + docker: "ubuntu:latest" + } +} + +task sanity_check { + input { + Array[File] files + } + command <<< + names=(a b c) + for name in "${names[@]}"; do + file="${name}.txt" + echo "file $file: $(find . -name $file | wc -l)" + done + >>> + output { + Array[String] lines = read_lines(stdout()) + } + runtime { + docker: "ubuntu:latest" + } +} + +workflow localization_sanity { + call make_files + call sanity_check { input: files = make_files.files } +} diff --git a/centaur/src/main/resources/standardTestCases/lots_of_inputs.test b/centaur/src/main/resources/standardTestCases/lots_of_inputs.test index af01604d6b5..a8c59ba2466 100644 --- a/centaur/src/main/resources/standardTestCases/lots_of_inputs.test +++ b/centaur/src/main/resources/standardTestCases/lots_of_inputs.test @@ -7,6 +7,7 @@ tags: [ big_metadata ] files { workflow: lots_of_inputs/lots_of_inputs.wdl + inputs: lots_of_inputs/lots_of_inputs.inputs } metadata { diff --git a/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.inputs b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.inputs new file mode 100644 index 00000000000..a95cde97de9 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.inputs @@ -0,0 +1 @@ +{ "lots_of_inputs.how_many_is_lots": 400 } diff --git a/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.wdl b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.wdl index 30773a1b247..73a483cb7f9 100644 --- a/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.wdl +++ b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs.wdl @@ -32,7 +32,8 @@ task make_array { } workflow lots_of_inputs { - call make_array { input: n = 400 } + Int how_many_is_lots + call make_array { input: n = how_many_is_lots } call do_nothing { input: f = make_array.a } output { diff --git a/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs_papiv2.inputs b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs_papiv2.inputs new file mode 100644 index 00000000000..7449dadf0cc --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/lots_of_inputs/lots_of_inputs_papiv2.inputs @@ -0,0 +1 @@ +{ "lots_of_inputs.how_many_is_lots": 10000 } diff --git a/centaur/src/main/resources/standardTestCases/lots_of_inputs_papiv2.test b/centaur/src/main/resources/standardTestCases/lots_of_inputs_papiv2.test new file mode 100644 index 00000000000..a3ea44e0a95 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/lots_of_inputs_papiv2.test @@ -0,0 +1,19 @@ +# This test makes sure that: +# - 10000 output files are all found and collected by the glob() method +# - 10000 input files to a task doesn't make anything explode inappropriately +name: lots_of_inputs_papiv2 +testFormat: workflowsuccess +tags: [ big_metadata ] +backends: [ Papiv2 ] + +files { + workflow: lots_of_inputs/lots_of_inputs.wdl + inputs: lots_of_inputs/lots_of_inputs_papiv2.inputs +} + +metadata { + workflowName: lots_of_inputs + status: Succeeded + "outputs.lots_of_inputs.out_count": "10000" + "outputs.lots_of_inputs.nothing_out": "no-op" +} diff --git a/centaur/src/main/resources/standardTestCases/monitoring_log/monitoring_log.wdl b/centaur/src/main/resources/standardTestCases/monitoring_log/monitoring_log.wdl index 36ca4a5b162..1ee71c1e64f 100644 --- a/centaur/src/main/resources/standardTestCases/monitoring_log/monitoring_log.wdl +++ b/centaur/src/main/resources/standardTestCases/monitoring_log/monitoring_log.wdl @@ -10,6 +10,6 @@ task get_stats { Array[String] stats = read_lines("monitoring.log") } runtime { - docker: "ubuntu" + docker: "ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950" } } diff --git a/centaur/src/main/resources/standardTestCases/monitoring_log_papiv2.test b/centaur/src/main/resources/standardTestCases/monitoring_log_papiv2.test index d46e6cfa4c8..86d116765e1 100644 --- a/centaur/src/main/resources/standardTestCases/monitoring_log_papiv2.test +++ b/centaur/src/main/resources/standardTestCases/monitoring_log_papiv2.test @@ -13,6 +13,6 @@ metadata { "calls.monitoring_log.get_stats.jes.monitoringScript": "gs://cloud-cromwell-dev/some/simple_script.sh" "calls.monitoring_log.get_stats.monitoringLog": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/monitoring_log/<>/call-get_stats/monitoring.log" "outputs.monitoring_log.get_stats.stats.0": "CPU: 1" - "outputs.monitoring_log.get_stats.stats.1": "Total Memory: 2.0G" + "outputs.monitoring_log.get_stats.stats.1": "Total Memory: 1.9G" "outputs.monitoring_log.get_stats.stats.2": "Total Disk space: 9.8G" } diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioFileSystemProvider.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioFileSystemProvider.scala index 1daf2da9893..2f36374212a 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioFileSystemProvider.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioFileSystemProvider.scala @@ -33,13 +33,13 @@ class DrsCloudNioFileSystemProvider(rootConfig: Config, override def isTransient(exception: Exception): Boolean = false - override def getScheme: String = "dos" + override def getScheme: String = "drs" override def getHost(uriAsString: String): String = { require(uriAsString.startsWith(s"$getScheme://"), s"Scheme does not match $getScheme") /* - * In some cases for a URI, the host name is null. For example, for DRS urls like 'dos://dg.123/123-123-123', + * In some cases for a URI, the host name is null. For example, for DRS urls like 'drs://dg.123/123-123-123', * even though 'dg.123' is a valid host, somehow since it does not conform to URI's standards, uri.getHost returns null. In such * cases, authority is used instead of host. If there is no authority, use an empty string. */ diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioRegularFileAttributes.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioRegularFileAttributes.scala index d4fe08e4655..1a9ec260059 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioRegularFileAttributes.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsCloudNioRegularFileAttributes.scala @@ -24,7 +24,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath override def fileHash: Option[String] = { drsPathResolver.resolveDrsThroughMartha(drsPath).map(marthaResponse => { - marthaResponse.dos.data_object.checksums.flatMap { + marthaResponse.drs.data_object.checksums.flatMap { _.collectFirst{ case c if c.`type`.equalsIgnoreCase("md5") => c.checksum } } }).unsafeRunSync() @@ -34,7 +34,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath override def lastModifiedTime(): FileTime = { val lastModifiedIO = for { marthaResponse <- drsPathResolver.resolveDrsThroughMartha(drsPath) - lastModifiedInString <- IO.fromEither(marthaResponse.dos.data_object.updated.toRight(throwRuntimeException("updated"))) + lastModifiedInString <- IO.fromEither(marthaResponse.drs.data_object.updated.toRight(throwRuntimeException("updated"))) lastModified <- convertToFileTime(lastModifiedInString) } yield lastModified @@ -45,7 +45,7 @@ class DrsCloudNioRegularFileAttributes(drsPath: String, drsPathResolver: DrsPath override def size(): Long = { val sizeIO = for { marthaResponse <- drsPathResolver.resolveDrsThroughMartha(drsPath) - size <- IO.fromEither(marthaResponse.dos.data_object.size.toRight(throwRuntimeException("size"))) + size <- IO.fromEither(marthaResponse.drs.data_object.size.toRight(throwRuntimeException("size"))) } yield size sizeIO.unsafeRunSync() diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala index cc8bbf7dd26..3adaed3152e 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala @@ -19,10 +19,14 @@ case class DrsPathResolver(drsConfig: DrsConfig, httpClientBuilder: HttpClientBu implicit lazy val urlDecoder: Decoder[Url] = deriveDecoder implicit lazy val checksumDecoder: Decoder[ChecksumObject] = deriveDecoder - implicit lazy val dataObjectDecoder: Decoder[DosDataObject] = deriveDecoder - implicit lazy val dosObjectDecoder: Decoder[DosObject] = deriveDecoder + implicit lazy val dataObjectDecoder: Decoder[DrsDataObject] = deriveDecoder + implicit lazy val drsObjectDecoder: Decoder[DrsObject] = deriveDecoder implicit lazy val saDataObjectDecoder: Decoder[SADataObject] = deriveDecoder - implicit lazy val marthaResponseDecoder: Decoder[MarthaResponse] = deriveDecoder + // Martha is still returning objects keyed by the obsolete "dos" terminology rather than the current term "drs". + // In order to avoid having Cromwell's case classes use the obsolete terminology that would arise from a derived + // decoder, this `forProduct2` construct instructs Circe to take the value keyed by `dos` and pass that as the + // first argument to `MarthaResponse.apply`, which happens to be the constructor parameter formally named `drs`. + implicit lazy val marthaResponseDecoder: Decoder[MarthaResponse] = Decoder.forProduct2("dos", "googleServiceAccount")(MarthaResponse.apply) private val DrsPathToken = "${drsPath}" @@ -48,7 +52,7 @@ case class DrsPathResolver(drsConfig: DrsConfig, httpClientBuilder: HttpClientBu e => IO.raiseError(new RuntimeException(s"Failed to parse response from Martha into a case class. Error: ${ExceptionUtils.getMessage(e)}")) } } - + private def executeMarthaRequest(httpPost: HttpPost): Resource[IO, HttpResponse]= { for { httpClient <- Resource.fromAutoCloseable(IO(httpClientBuilder.build())) @@ -78,13 +82,13 @@ case class Url(url: String) case class ChecksumObject(checksum: String, `type`: String) -case class DosDataObject(size: Option[Long], +case class DrsDataObject(size: Option[Long], checksums: Option[Array[ChecksumObject]], updated: Option[String], urls: Array[Url]) -case class DosObject(data_object: DosDataObject) +case class DrsObject(data_object: DrsDataObject) case class SADataObject(data: Json) -case class MarthaResponse(dos: DosObject, googleServiceAccount: Option[SADataObject]) +case class MarthaResponse(drs: DrsObject, googleServiceAccount: Option[SADataObject]) diff --git a/common/src/main/scala/common/validation/ErrorOr.scala b/common/src/main/scala/common/validation/ErrorOr.scala index 95eb9bab63e..d01a6a15174 100644 --- a/common/src/main/scala/common/validation/ErrorOr.scala +++ b/common/src/main/scala/common/validation/ErrorOr.scala @@ -10,7 +10,7 @@ object ErrorOr { type ErrorOr[+A] = Validated[NonEmptyList[String], A] implicit class EnhancedErrorOr[A](val eoa: ErrorOr[A]) extends AnyVal { - def contextualizeErrors(s: String): ErrorOr[A] = eoa.leftMap { errors => + def contextualizeErrors(s: => String): ErrorOr[A] = eoa.leftMap { errors => val total = errors.size errors.zipWithIndex map { case (e, i) => s"Failed to $s (reason ${i + 1} of $total): $e" } } diff --git a/common/src/main/scala/common/validation/Validation.scala b/common/src/main/scala/common/validation/Validation.scala index f88a56e89d7..4f805112573 100644 --- a/common/src/main/scala/common/validation/Validation.scala +++ b/common/src/main/scala/common/validation/Validation.scala @@ -94,11 +94,11 @@ object Validation { } implicit class OptionValidation[A](val o: Option[A]) extends AnyVal { - def toErrorOr(errorMessage: String): ErrorOr[A] = { + def toErrorOr(errorMessage: => String): ErrorOr[A] = { Validated.fromOption(o, NonEmptyList.of(errorMessage)) } - def toChecked(errorMessage: String): Checked[A] = { + def toChecked(errorMessage: => String): Checked[A] = { Either.fromOption(o, NonEmptyList.of(errorMessage)) } } diff --git a/core/src/test/resources/application.conf b/core/src/test/resources/application.conf index de6a92d45b6..ce250d4b583 100644 --- a/core/src/test/resources/application.conf +++ b/core/src/test/resources/application.conf @@ -19,66 +19,6 @@ backend { # Bumped up for tests database.db.connectionTimeout = 30000 -database-test-mysql { - # Run the following to (optionally) drop and (re-)create the database: - # mysql -ucromwell -ptest -e "DROP DATABASE IF EXISTS cromwell_test; CREATE DATABASE cromwell_test;" - profile = "slick.jdbc.MySQLProfile$" - db { - driver = "com.mysql.cj.jdbc.Driver" - url = "jdbc:mysql://localhost:3306/cromwell_test?useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" - url = ${?CROMWELL_BUILD_MYSQL_JDBC_URL} - user = "cromwell" - user = ${?CROMWELL_BUILD_MYSQL_USERNAME} - password = "test" - password = ${?CROMWELL_BUILD_MYSQL_PASSWORD} - connectionTimeout = 5000 - } -} - -database-test-mariadb { - # Installing both mysql and mariadb takes skill... Instead, try running this docker from the cromwell directory: - # - # docker run \ - # --rm \ - # --env MYSQL_ROOT_PASSWORD=private \ - # --env MYSQL_USER=cromwell \ - # --env MYSQL_PASSWORD=test \ - # --env MYSQL_DATABASE=cromwell_test \ - # --publish 13306:3306 \ - # --volume ${PWD}/src/ci/docker-compose/mariadb-conf.d:/etc/mysql/conf.d \ - # mariadb:10.3 - - # Run the following to (optionally) drop and (re-)create the database: - # mysql --protocol=tcp -P13306 -ucromwell -ptest -e "DROP DATABASE IF EXISTS cromwell_test; CREATE DATABASE cromwell_test;" - profile = "slick.jdbc.MySQLProfile$" - db { - driver = "org.mariadb.jdbc.Driver" - url = "jdbc:mariadb://localhost:13306/cromwell_test?rewriteBatchedStatements=true" - url = ${?CROMWELL_BUILD_MARIADB_JDBC_URL} - user = "cromwell" - user = ${?CROMWELL_BUILD_MARIADB_USERNAME} - password = "test" - password = ${?CROMWELL_BUILD_MARIADB_PASSWORD} - connectionTimeout = 5000 - } -} - -database-test-postgresql { - # Run the following to (optionally) drop and (re-)create the database: - # psql postgres <<< 'drop database if exists cromwell_test; create database cromwell_test;' - profile = "slick.jdbc.PostgresProfile$" - db { - driver = "org.postgresql.Driver" - url = "jdbc:postgresql://localhost:5432/cromwell_test?reWriteBatchedInserts=true" - url = ${?CROMWELL_BUILD_POSTGRESQL_JDBC_URL} - user = "cromwell" - user = ${?CROMWELL_BUILD_POSTGRESQL_USERNAME} - password = "test" - password = ${?CROMWELL_BUILD_POSTGRESQL_PASSWORD} - connectionTimeout = 5000 - } -} - akka { log-dead-letters = "off" loggers = ["akka.event.slf4j.Slf4jLogger"] diff --git a/core/src/test/scala/cromwell/core/TestKitSuite.scala b/core/src/test/scala/cromwell/core/TestKitSuite.scala index e8443a4d5f7..df1317deae3 100644 --- a/core/src/test/scala/cromwell/core/TestKitSuite.scala +++ b/core/src/test/scala/cromwell/core/TestKitSuite.scala @@ -2,8 +2,8 @@ package cromwell.core import java.util.UUID -import akka.actor.{ActorSystem, Props} -import akka.testkit.TestKit +import akka.actor.ActorSystem +import akka.testkit.{TestActors, TestKit} import com.typesafe.config.{Config, ConfigFactory} import org.scalatest.{BeforeAndAfterAll, Suite} @@ -21,7 +21,9 @@ abstract class TestKitSuite(actorSystemName: String = TestKitSuite.randomName, shutdown() } - val emptyActor = system.actorOf(Props.empty, "TestKitSuiteEmptyActor") + // 'BlackHoleActor' swallows messages without logging them (thus reduces log file overhead): + val emptyActor = system.actorOf(TestActors.blackholeProps, "TestKitSuiteEmptyActor") + val mockIoActor = system.actorOf(MockIoActor.props(), "TestKitSuiteMockIoActor") val simpleIoActor = system.actorOf(SimpleIoActor.props, "TestKitSuiteSimpleIoActor") val failIoActor = system.actorOf(FailIoActor.props(), "TestKitSuiteFailIoActor") diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala index 450532cf6a9..f52b8e5d562 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala @@ -61,7 +61,7 @@ object DrsLocalizerMain extends IOApp { marthaResponse <- resolveDrsThroughMartha(drsUrl, marthaUri) _ = httpBackendConnection.close() // Currently Martha only supports resolving DRS paths to GCS paths - gcsUrl <- extractFirstGcsUrl(marthaResponse.dos.data_object.urls) + gcsUrl <- extractFirstGcsUrl(marthaResponse.drs.data_object.urls) exitState <- downloadFileFromGcs(gcsUrl, marthaResponse.googleServiceAccount.map(_.data.toString), downloadLoc, requesterPaysId) } yield exitState diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/MarthaResponse.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/MarthaResponse.scala index 3d907978e5f..be172bae181 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/MarthaResponse.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/MarthaResponse.scala @@ -6,10 +6,14 @@ import io.circe.generic.semiauto.deriveDecoder object MarthaResponseJsonSupport { implicit val urlFormat: Decoder[Url] = deriveDecoder - implicit val dataObject: Decoder[DosDataObject] = deriveDecoder - implicit val dosObjectFormat: Decoder[DosObject] = deriveDecoder + implicit val dataObject: Decoder[DrsDataObject] = deriveDecoder + implicit val drsObjectFormat: Decoder[DrsObject] = deriveDecoder implicit val googleServiceAccountFormat: Decoder[GoogleServiceAccount] = deriveDecoder - implicit val marthaResponseFormat: Decoder[MarthaResponse] = deriveDecoder + // Martha is still returning objects keyed by the obsolete "dos" terminology rather than the current term "drs". + // In order to avoid having Cromwell's case classes use the obsolete terminology that would arise from a derived + // decoder, this `forProduct2` construct instructs Circe to take the value keyed by `dos` and pass that as the + // first argument to `MarthaResponse.apply`, which happens to be the constructor parameter formally named `drs`. + implicit val marthaResponseFormat: Decoder[MarthaResponse] = Decoder.forProduct2("dos", "googleServiceAccount")(MarthaResponse.apply) implicit val samErrorResponseFormat: Decoder[SamErrorResponse] = deriveDecoder implicit val samErrorResponseCodeFormat: Decoder[SamErrorResponseCode] = deriveDecoder @@ -17,10 +21,10 @@ object MarthaResponseJsonSupport { } case class Url(url: String) -case class DosDataObject(urls: Array[Url]) -case class DosObject(data_object: DosDataObject) +case class DrsDataObject(urls: Array[Url]) +case class DrsObject(data_object: DrsDataObject) case class GoogleServiceAccount(data: Json) -case class MarthaResponse(dos: DosObject, googleServiceAccount: Option[GoogleServiceAccount]) +case class MarthaResponse(drs: DrsObject, googleServiceAccount: Option[GoogleServiceAccount]) case class SamErrorResponse(text: String) diff --git a/cwl/src/main/scala/cwl/CwlExpressionCommandPart.scala b/cwl/src/main/scala/cwl/CwlExpressionCommandPart.scala index 779c3df7da5..f323c74dbd2 100644 --- a/cwl/src/main/scala/cwl/CwlExpressionCommandPart.scala +++ b/cwl/src/main/scala/cwl/CwlExpressionCommandPart.scala @@ -125,6 +125,7 @@ abstract class CommandLineBindingCommandPart(commandLineBinding: CommandLineBind } case _: WomObjectLike => prefixAsList case WomEnumerationValue(_, value) => handlePrefix(value) + case WomCoproductValue(_, value) => processValue(value) case w => throw new RuntimeException(s"Unhandled CwlExpressionCommandPart value '$w' of type ${w.womType.stableName}") } diff --git a/cwl/src/main/scala/cwl/CwltoolRunner.scala b/cwl/src/main/scala/cwl/CwltoolRunner.scala index a68418ec065..834c9d80b23 100644 --- a/cwl/src/main/scala/cwl/CwltoolRunner.scala +++ b/cwl/src/main/scala/cwl/CwltoolRunner.scala @@ -18,7 +18,7 @@ object CwltoolRunner { lazy val instance: CwltoolRunner = { val runnerClass = config.getString("cwltool-runner.class") - Class.forName(runnerClass).newInstance().asInstanceOf[CwltoolRunner] + Class.forName(runnerClass).getDeclaredConstructor().newInstance().asInstanceOf[CwltoolRunner] } } diff --git a/database/migration/src/main/resources/changesets/resync_engine_schema.xml b/database/migration/src/main/resources/changesets/resync_engine_schema.xml index e477bce91ab..d55c353f1d9 100644 --- a/database/migration/src/main/resources/changesets/resync_engine_schema.xml +++ b/database/migration/src/main/resources/changesets/resync_engine_schema.xml @@ -24,13 +24,24 @@ - + + + + SELECT count(*) + FROM information_schema.sequences + WHERE sequence_name = 'CALL_CACHING_HASH_ENTRY_CALL_CACHING_HASH_ENTRY_ID_seq' + AND data_type = 'bigint'; + + alter sequence "CALL_CACHING_HASH_ENTRY_CALL_CACHING_HASH_ENTRY_ID_seq" as bigint; + + + 8:b0e84d303355e808f09cc8a9ddd87595 true + case validCrVpcEndpoint(_) => true + case _ => false + } + } + + override def accepts(dockerImageIdentifier: DockerImageIdentifier): Boolean = isValidAlibabaCloudCRHost(dockerImageIdentifier.host) override protected def getToken(dockerInfoContext: DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { @@ -59,7 +70,8 @@ class AlibabaCloudCRRegistry(config: DockerRegistryConfig) extends DockerRegistr case _ => throw new Exception(s"The host ${context.dockerImageID.host} does not have the expected region id") } - val endpoint = ProductName + "." + regionId + ".aliyuncs.com" + val defaultEndpoint = ProductName + "." + regionId + ".aliyuncs.com" + val endpoint = getAliyunEndpointFromContext(context).getOrElse(defaultEndpoint) DefaultProfile.addEndpoint(regionId, ProductName, endpoint) val profile: IClientProfile = getAliyunCredentialFromContext(context) match { @@ -92,6 +104,13 @@ class AlibabaCloudCRRegistry(config: DockerRegistryConfig) extends DockerRegistr } } + //cr.cn-beijing.aliyuncs.com or cr-vpc.cn-beijing.aliyuncs.com + private[alibabacloudcrregistry] def getAliyunEndpointFromContext(context: DockerInfoContext): Option[String] = { + context.credentials collectFirst { + case endpoint: String if (isValidAlibabaCloudCREndpoint(endpoint)) => endpoint + } + } + private def matchTag(jsObject: JsObject, dockerHashContext: DockerInfoContext): Boolean = { val tag = dockerHashContext.dockerImageID.reference jsObject.fields.get("tag") match { diff --git a/dockerHashing/src/test/scala/cromwell/docker/registryv2/AlibabaCloudCRRegistrySpec.scala b/dockerHashing/src/test/scala/cromwell/docker/registryv2/AlibabaCloudCRRegistrySpec.scala index 34124cc4ae1..76a01282d3e 100644 --- a/dockerHashing/src/test/scala/cromwell/docker/registryv2/AlibabaCloudCRRegistrySpec.scala +++ b/dockerHashing/src/test/scala/cromwell/docker/registryv2/AlibabaCloudCRRegistrySpec.scala @@ -23,6 +23,7 @@ object AlibabaCloudCRRegistrySpec { |alibabacloudcr { | num-threads = 5 | auth { + | endpoint = "cr.cn-shanghai.aliyuncs.com" | access-id = "test-access-id" | access-key = "test-access-key" | security-token = "test-security-token" @@ -125,14 +126,26 @@ class AlibabaCloudCRRegistrySpec extends TestKitSuite with FlatSpecLike with Mat val basicCredential = new BasicCredentials(access_id, access_key) val sessionCredential = new BasicSessionCredentials(access_id, access_key, security_token) + val vpcEndpoint: String = "cr-vpc.cn-shanghai.aliyuncs.com" + val normalEndpoint = "cr.cn-shanghai.aliyuncs.com" + val validEndpoint = "cr.validendpoint.com" - val dockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List(basicCredential)) + val dockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List(basicCredential, normalEndpoint)) val context: DockerInfoContext = DockerInfoContext(dockerRequest, null) - registry.getAliyunCredentialFromContext(context) shouldEqual Some(basicCredential) + registry.getAliyunCredentialFromContext(context) shouldEqual Option(basicCredential) + registry.getAliyunEndpointFromContext(context) shouldEqual Option(normalEndpoint) + + val vpcDockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List(basicCredential, vpcEndpoint)) + val vpcContext: DockerInfoContext = DockerInfoContext(vpcDockerRequest, null) + registry.getAliyunEndpointFromContext(vpcContext) shouldEqual Option(vpcEndpoint) + + val validDockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List(basicCredential, validEndpoint)) + val validContext: DockerInfoContext = DockerInfoContext(validDockerRequest, null) + registry.getAliyunEndpointFromContext(validContext) shouldEqual None val sessionDockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List(sessionCredential)) val sessionContext: DockerInfoContext = DockerInfoContext(sessionDockerRequest, null) - registry.getAliyunCredentialFromContext(sessionContext) shouldEqual Some(sessionCredential) + registry.getAliyunCredentialFromContext(sessionContext) shouldEqual Option(sessionCredential) val invalidDockerRequest = DockerInfoRequest(DockerImageIdentifier.fromString(testCRDockerImageTagNotExist).get, List.empty) val invalidContext: DockerInfoContext = DockerInfoContext(invalidDockerRequest, null) diff --git a/docs/CommandLine.md b/docs/CommandLine.md index f82bea32620..ed078f3da20 100644 --- a/docs/CommandLine.md +++ b/docs/CommandLine.md @@ -25,7 +25,7 @@ Run the workflow and print out the outputs in JSON format. -l, --labels Workflow labels file. -p, --imports A directory or zipfile to search for workflow imports. -m, --metadata-output - An optional directory path to output metadata. + An optional JSON file path to output metadata. ``` Cromwell's Server and Run modes can be invoked with the `server` and `run` arguments respectively. More information on these Cromwell modes can be found in [Modes](Modes). diff --git a/docs/Configuring.md b/docs/Configuring.md index 60248d0bbfd..1f06d9e86a7 100644 --- a/docs/Configuring.md +++ b/docs/Configuring.md @@ -315,7 +315,7 @@ database { profile = "slick.jdbc.PostgresProfile$" db { driver = "org.postgresql.Driver" - url = "jdbc:postgresql//localhost:5432/cromwell" + url = "jdbc:postgresql://localhost:5432/cromwell" user = "user" password = "pass" port = 5432 diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index b2fe17751c0..a95421736cf 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -380,17 +380,28 @@ Configure your Google network to use "Private Google Access". This will allow yo That's it! You can now run with `noAddress` runtime attribute and it will work as expected. -### `gpuCount` and `gpuType` +### `gpuCount`, `gpuType`, and `nvidiaDriverVersion` -Attach GPUs to the instance when running on the Pipelines API: https://cloud.google.com/compute/docs/gpus/ +Attach GPUs to the instance when running on the Pipelines API([GPU documentation](https://cloud.google.com/compute/docs/gpus/)). Make sure to choose a zone for which the type of GPU you want to attach is available. -The two types of GPU supported are `nvidia-tesla-k80` and `nvidia-tesla-p100`. +The types of compute GPU supported are: + +* `nvidia-tesla-k80` +* `nvidia-tesla-v100` +* `nvidia-tesla-p100` +* `nvidia-tesla-p4` +* `nvidia-tesla-t4` + +For the latest list of supported GPU's, please visit [Google's GPU documentation](nvidia-drivers-us-public). + +The default driver is `418.87.00`, you may specify your own via the `nvidiaDriverVersion` key. Make sure that driver exists in the `nvidia-drivers-us-public` beforehand, per the [Google Pipelines API documentation](https://cloud.google.com/genomics/reference/rest/Shared.Types/Metadata#VirtualMachine). ``` runtime { gpuType: "nvidia-tesla-k80" gpuCount: 2 + nvidiaDriverVersion: "418.87.00" zones: ["us-central1-c"] } ``` diff --git a/docs/api/RESTAPI.md b/docs/api/RESTAPI.md index 29f9193c32d..7f62159d801 100644 --- a/docs/api/RESTAPI.md +++ b/docs/api/RESTAPI.md @@ -1,5 +1,5 @@ + + | | DockerHub | DockerHub | GCR | GCR | ECR | ECR | ACR | ACR | + |:-------------:|:---------:|:---------:|:------:|:-------:|:------:|:-------:|:------:|:-------:| + | | Public | Private | Public | Private | Public | Private | Public | Private | + | Pipelines API | X | X | X | X | | | | | + | AWS Batch | X | | X | | | | | | + | BCS | | | | | | | | X | + | Other | X | | X | | | | | | + + + **Runtime Attributes** -As well as call inputs and the command to run, call caching considers the following [runtime attributes](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/) of a given task when determining whether to call cache: +As well as call inputs and the command to run, call caching considers the following [runtime +attributes](../../RuntimeAttributes/) of a given task when determining whether to call cache: -* [`ContinueOnReturnCode`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#continueonreturncode) -* [`Docker`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#docker) -* [`FailOnStderr`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#failonstderr) +* [`ContinueOnReturnCode`](../../RuntimeAttributes/#continueonreturncode) +* [`Docker`](../../RuntimeAttributes/#docker) +* [`FailOnStderr`](../../RuntimeAttributes/#failonstderr) -If any of these attributes have changed from a previous instance of the same task, that instance will not be call-cached from. Other runtime attributes, including [`memory`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#memory), [`cpu`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#cpu), and [`disks`](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#disks), are not considered by call caching and therefore may be changed without preventing a cached result from being used. +If any of these attributes have changed from a previous instance of the same task, that instance will not be call-cached +from. Other runtime attributes, including [`memory`](../../RuntimeAttributes/#memory), +[`cpu`](../../RuntimeAttributes/#cpu), and [`disks`](../../RuntimeAttributes/#disks), are not considered by call caching +and therefore may be changed without preventing a cached result from being used. diff --git a/docs/tutorials/PipelinesApi101.md b/docs/tutorials/PipelinesApi101.md index e1e54dc9de8..fda667d6227 100644 --- a/docs/tutorials/PipelinesApi101.md +++ b/docs/tutorials/PipelinesApi101.md @@ -17,15 +17,36 @@ In addition, the following changes are to be expected: ### Setting up PAPIv2 -For now the easiest way to try PAPIv2 is to migrate an existing set up from PAPIv1 (see below). After that, copy the PAPIv2 sample configuration in [cromwell.examples.conf](https://github.com/broadinstitute/cromwell/blob/develop/cromwell.examples.conf) in place of the PAPIv1 backend. +For now the easiest way to try PAPIv2 is to migrate an existing set up from PAPIv1 (see below). After that, copy the PAPIv2 sample configuration in [cromwell.examples.conf](https://github.com/broadinstitute/cromwell/blob/develop/cromwell.example.backends/PAPIv2.conf) in place of the PAPIv1 backend. #### Permissions: -With Pipelines API v2, the mode of authentication (ie, Application Default, User Service Account, default compute service account, etc) will need to have these permissions on the bucket holding the Cromwell directory (root directory): +Google recommends using a service account to authenticate to GCP. -* storage.objects.list -* storage.objects.create -* storage.objects.delete +You may create a service account using the `gcloud` command, consider running the following script and replace MY-GOOGLE-PROJECT: + +``` +#!/bin/bash +RANDOM_BUCKET_NAME=$(head /dev/urandom | tr -dc a-z | head -c 32 ; echo '') + +#Create a new service account called "MyServiceAccount", and from the output of the command, take the email address that was generated +EMAIL=$(gcloud beta iam service-accounts create MyServiceAccount --description "to run cromwell" --display-name "cromwell service account" --format json | jq '.email' | sed -e 's/\"//g') + +# add all the roles to the service account +for i in storage.objectCreator storage.objectViewer genomics.pipelinesRunner genomics.admin iam.serviceAccountUser storage.objects.create +do + gcloud projects add-iam-policy-binding MY-GOOGLE-PROJECT --member serviceAccount:"$EMAIL" --role roles/$i +done + +# create a bucket to keep the execution directory +gsutil mb gs://"$RANDOM_BUCKET_NAME" + +# give the service account write access to the new bucket +gsutil acl ch -u "$EMAIL":W gs://"$RANDOM_BUCKET_NAME" + +# create a file that represents your service account. KEEP THIS A SECRET. +gcloud iam service-accounts keys create sa.json --iam-account "$EMAIL" +``` ## Pipelines API v1 diff --git a/engine/src/main/resources/swagger/cromwell.yaml b/engine/src/main/resources/swagger/cromwell.yaml index 4d3e82fbe3a..5531b1a8b92 100644 --- a/engine/src/main/resources/swagger/cromwell.yaml +++ b/engine/src/main/resources/swagger/cromwell.yaml @@ -731,7 +731,7 @@ definitions: id: type: string description: The identifier of the workflow - example: e442e52a-9de1-47f0-8b4f-e6e565008cf1 + example: 00001111-2222-3333-aaaa-bbbbccccdddd status: type: string description: The status of the workflow @@ -744,7 +744,7 @@ definitions: id: type: string description: The identifier of the workflow - example: e442e52a-9de1-47f0-8b4f-e6e565008cf1 + example: 00001111-2222-3333-aaaa-bbbbccccdddd status: type: string description: The status of the workflow diff --git a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala index 1ee8e8cbb76..c17ef5339d5 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala @@ -1,7 +1,5 @@ package cromwell.engine.workflow -import java.util.UUID - import akka.actor.FSM.{CurrentState, Transition} import akka.actor._ import akka.stream.ActorMaterializer @@ -23,9 +21,8 @@ import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowSt import cromwell.jobstore.EmptyJobStoreActor import cromwell.server.CromwellRootActor import cromwell.services.metadata.MetadataService.{GetSingleWorkflowMetadataAction, GetStatus, ListenToMetadataWriteActor, WorkflowOutputs} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse} import cromwell.subworkflowstore.EmptySubWorkflowStoreActor -import cromwell.webservice.metadata.MetadataBuilderActor -import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse} import spray.json._ import scala.concurrent.ExecutionContext.Implicits.global @@ -79,18 +76,18 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, case Event(IssuePollRequest, RunningSwraData(_, id)) => requestStatus(id) stay() - case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(_, _)) if !jsObject.state.isTerminal => + case Event(BuiltMetadataResponse(_, jsObject: JsObject), RunningSwraData(_, _)) if !jsObject.state.isTerminal => schedulePollRequest() stay() - case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowSucceeded => + case Event(BuiltMetadataResponse(_, jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowSucceeded => log.info(s"$Tag workflow finished with status '$WorkflowSucceeded'.") serviceRegistryActor ! ListenToMetadataWriteActor goto(WaitingForFlushedMetadata) using SucceededSwraData(replyTo, id) - case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowFailed => + case Event(BuiltMetadataResponse(_, jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowFailed => log.info(s"$Tag workflow finished with status '$WorkflowFailed'.") serviceRegistryActor ! ListenToMetadataWriteActor goto(WaitingForFlushedMetadata) using FailedSwraData(replyTo, id, new RuntimeException(s"Workflow $id transitioned to state $WorkflowFailed")) - case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowAborted => + case Event(BuiltMetadataResponse(_, jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowAborted => log.info(s"$Tag workflow finished with status '$WorkflowAborted'.") serviceRegistryActor ! ListenToMetadataWriteActor goto(WaitingForFlushedMetadata) using AbortedSwraData(replyTo, id) @@ -99,22 +96,21 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, when (WaitingForFlushedMetadata) { case Event(QueueWeight(weight), _) if weight > 0 => stay() case Event(QueueWeight(_), data: SucceededSwraData) => - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), - s"CompleteRequest-Workflow-${data.id}-request-${UUID.randomUUID()}") - metadataBuilder ! WorkflowOutputs(data.id) + + serviceRegistryActor ! WorkflowOutputs(data.id) goto(RequestingOutputs) case Event(QueueWeight(_), data : TerminalSwraData) => requestMetadataOrIssueReply(data) } when (RequestingOutputs) { - case Event(BuiltMetadataResponse(outputs: JsObject), data: TerminalSwraData) => + case Event(BuiltMetadataResponse(_, outputs: JsObject), data: TerminalSwraData) => outputOutputs(outputs) requestMetadataOrIssueReply(data) } when (RequestingMetadata) { - case Event(BuiltMetadataResponse(metadata: JsObject), data: TerminalSwraData) => + case Event(BuiltMetadataResponse(_, metadata: JsObject), data: TerminalSwraData) => outputMetadata(metadata) issueReply(data) } @@ -128,7 +124,7 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, case Event(r: WorkflowAbortFailureResponse, data) => failAndFinish(r.failure, data) case Event(Failure(e), data) => failAndFinish(e, data) case Event(Status.Failure(e), data) => failAndFinish(e, data) - case Event(FailedMetadataResponse(e), data) => failAndFinish(e, data) + case Event(FailedMetadataResponse(_, e), data) => failAndFinish(e, data) case Event(CurrentState(_, _) | Transition(_, _, _), _) => // ignore uninteresting current state and transition messages stay() @@ -144,8 +140,7 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, private def requestMetadataOrIssueReply(newData: TerminalSwraData) = if (metadataOutputPath.isDefined) requestMetadata(newData) else issueReply(newData) private def requestMetadata(newData: TerminalSwraData): State = { - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${newData.id}") - metadataBuilder ! GetSingleWorkflowMetadataAction(newData.id, None, None, expandSubWorkflows = true) + serviceRegistryActor ! GetSingleWorkflowMetadataAction(newData.id, None, None, expandSubWorkflows = true) goto (RequestingMetadata) using newData } @@ -159,8 +154,7 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for // when outputs or metadata have stabilized. - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-$id-request-${UUID.randomUUID()}") - metadataBuilder ! GetStatus(id) + serviceRegistryActor ! GetStatus(id) } private def issueSuccessReply(replyTo: ActorRef): State = { diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowProcessingEventPublishing.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowProcessingEventPublishing.scala index 912826b29fc..67b5626f018 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowProcessingEventPublishing.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowProcessingEventPublishing.scala @@ -3,10 +3,12 @@ package cromwell.engine.workflow import java.time.OffsetDateTime import akka.actor.ActorRef +import cats.Monad import common.util.VersionUtil -import cromwell.core.WorkflowId +import common.validation.IOChecked.IOChecked import cromwell.core.WorkflowProcessingEvents.EventKey.{CromwellId, CromwellVersion, Description, Timestamp} import cromwell.core.WorkflowProcessingEvents.{DescriptionEventValue, ProcessingEventsKey} +import cromwell.core.{WorkflowId, WorkflowMetadataKeys} import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} @@ -36,4 +38,21 @@ object WorkflowProcessingEventPublishing { serviceRegistry ! PutMetadataAction(metadata) } + + def publishLabelsToMetadata(workflowId: WorkflowId, + labels: Map[String, String], + serviceRegistry: ActorRef): IOChecked[Unit] = { + val defaultLabel = "cromwell-workflow-id" -> s"cromwell-$workflowId" + Monad[IOChecked].pure(labelsToMetadata(workflowId, labels + defaultLabel, serviceRegistry)) + } + + private def labelsToMetadata(workflowId: WorkflowId, + labels: Map[String, String], + serviceRegistry: ActorRef): Unit = { + labels foreach { case (labelKey, labelValue) => + val metadataKey = MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Labels}:$labelKey") + val metadataValue = MetadataValue(labelValue) + serviceRegistry ! PutMetadataAction(MetadataEvent(metadataKey, metadataValue)) + } + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala index efc7a316f2e..71677826499 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala @@ -543,10 +543,10 @@ case class WorkflowExecutionActor(params: WorkflowExecutionActorParams) data: WorkflowExecutionActorData, expressionNode: TaskCallInputExpressionNode): ErrorOr[WorkflowExecutionDiff] = { import cats.syntax.either._ - val taskCallNode = expressionNode.taskCallNodeReceivingInput.get(()) + val taskCallNode: CommandCallNode = expressionNode.taskCallNodeReceivingInput.get(()) (for { - backendJobDescriptorKey <- data.executionStore.backendJobDescriptorKeyForNode(taskCallNode) toChecked s"No BackendJobDescriptorKey found for call node $taskCallNode" + backendJobDescriptorKey <- data.executionStore.backendJobDescriptorKeyForNode(taskCallNode) toChecked s"No BackendJobDescriptorKey found for call node ${taskCallNode.identifier.fullyQualifiedName}" factory <- backendFactoryForTaskCallNode(taskCallNode) backendInitializationData = params.initializationData.get(factory.name) functions = factory.expressionLanguageFunctions(workflowDescriptor.backendDescriptor, backendJobDescriptorKey, backendInitializationData, params.ioActor, ioEc) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala index 688a9a06185..2cb7b0708a4 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala @@ -3,52 +3,19 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{ActorRef, LoggingFSM, Props} import cats.data.NonEmptyList import cats.instances.list._ -import cats.syntax.foldable._ +import cats.syntax.apply._ +import cats.syntax.traverse._ +import cats.syntax.validated._ +import common.exception.AggregatedMessageException +import common.validation.ErrorOr._ +import common.validation.Validation._ import cromwell.core.Dispatcher.EngineDispatcher import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{CallCacheDiffActorData, _} import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffQueryParameter.CallCacheDiffQueryCall -import cromwell.services.metadata.CallMetadataKeys.CallCachingKeys -import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse, MetadataServiceKeyLookupFailed} +import cromwell.services.metadata.MetadataService.GetMetadataAction import cromwell.services.metadata._ -import cromwell.webservice.metadata.MetadataComponent._ -import cromwell.webservice.metadata._ -import spray.json.JsObject - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object CallCacheDiffActor { - private val PlaceholderMissingHashValue = MetadataPrimitive(MetadataValue("Error: there is a hash entry for this key but the value is null !")) - - final case class CachedCallNotFoundException(message: String) extends Exception { - override def getMessage = message - } - - // Exceptions when calls exist but have no hashes in their metadata, indicating they were run pre-28 - private val HashesForCallAAndBNotFoundException = new Exception("callA and callB have not finished yet, or were run on a previous version of Cromwell on which this endpoint was not supported.") - private val HashesForCallANotFoundException = new Exception("callA has not finished yet, or was run on a previous version of Cromwell on which this endpoint was not supported.") - private val HashesForCallBNotFoundException = new Exception("callB has not finished yet, or was run on a previous version of Cromwell on which this endpoint was not supported.") - - sealed trait CallCacheDiffActorState - case object Idle extends CallCacheDiffActorState - case object WaitingForMetadata extends CallCacheDiffActorState - - sealed trait CallCacheDiffActorData - case object CallCacheDiffNoData extends CallCacheDiffActorData - case class CallCacheDiffWithRequest(queryA: MetadataQuery, - queryB: MetadataQuery, - responseA: Option[MetadataLookupResponse], - responseB: Option[MetadataLookupResponse], - replyTo: ActorRef - ) extends CallCacheDiffActorData - - sealed abstract class CallCacheDiffActorResponse - case class BuiltCallCacheDiffResponse(response: JsObject) extends CallCacheDiffActorResponse - case class FailedCallCacheDiffResponse(reason: Throwable) extends CallCacheDiffActorResponse - - - def props(serviceRegistryActor: ActorRef) = Props(new CallCacheDiffActor(serviceRegistryActor)).withDispatcher(EngineDispatcher) -} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse} +import spray.json.{JsArray, JsBoolean, JsNumber, JsObject, JsString, JsValue} class CallCacheDiffActor(serviceRegistryActor: ActorRef) extends LoggingFSM[CallCacheDiffActorState, CallCacheDiffActorData] { startWith(Idle, CallCacheDiffNoData) @@ -57,76 +24,63 @@ class CallCacheDiffActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Call case Event(CallCacheDiffQueryParameter(callA, callB), CallCacheDiffNoData) => val queryA = makeMetadataQuery(callA) val queryB = makeMetadataQuery(callB) - serviceRegistryActor ! GetMetadataQueryAction(queryA) - serviceRegistryActor ! GetMetadataQueryAction(queryB) + serviceRegistryActor ! GetMetadataAction(queryA) + serviceRegistryActor ! GetMetadataAction(queryB) goto(WaitingForMetadata) using CallCacheDiffWithRequest(queryA, queryB, None, None, sender()) } when(WaitingForMetadata) { // First Response // Response A - case Event(response: MetadataLookupResponse, data @ CallCacheDiffWithRequest(queryA, _, None, None, _)) if queryA == response.query => - stay() using data.copy(responseA = Option(response)) + case Event(BuiltMetadataResponse(GetMetadataAction(originalQuery), responseJson), data@CallCacheDiffWithRequest(queryA, _, None, None, _)) if queryA == originalQuery => + stay() using data.copy(responseA = Option(WorkflowMetadataJson(responseJson))) // Response B - case Event(response: MetadataLookupResponse, data @ CallCacheDiffWithRequest(_, queryB, None, None, _)) if queryB == response.query => - stay() using data.copy(responseB = Option(response)) + case Event(BuiltMetadataResponse(GetMetadataAction(originalQuery), responseJson), data@CallCacheDiffWithRequest(_, queryB, None, None, _)) if queryB == originalQuery => + stay() using data.copy(responseB = Option(WorkflowMetadataJson(responseJson))) // Second Response // Response A - case Event(response: MetadataLookupResponse, CallCacheDiffWithRequest(queryA, queryB, None, Some(responseB), replyTo)) if queryA == response.query => - buildDiffAndRespond(queryA, queryB, response, responseB, replyTo) + case Event(BuiltMetadataResponse(GetMetadataAction(originalQuery), responseJson), CallCacheDiffWithRequest(queryA, queryB, None, Some(responseB), replyTo)) if queryA == originalQuery => + buildDiffAndRespond(queryA, queryB, WorkflowMetadataJson(responseJson), responseB, replyTo) // Response B - case Event(response: MetadataLookupResponse, CallCacheDiffWithRequest(queryA, queryB, Some(responseA), None, replyTo)) if queryB == response.query => - buildDiffAndRespond(queryA, queryB, responseA, response, replyTo) - case Event(MetadataServiceKeyLookupFailed(_, failure), data: CallCacheDiffWithRequest) => + case Event(BuiltMetadataResponse(GetMetadataAction(originalQuery), responseJson), CallCacheDiffWithRequest(queryA, queryB, Some(responseA), None, replyTo)) if queryB == originalQuery => + buildDiffAndRespond(queryA, queryB, responseA, WorkflowMetadataJson(responseJson), replyTo) + case Event(FailedMetadataResponse(_, failure), data: CallCacheDiffWithRequest) => data.replyTo ! FailedCallCacheDiffResponse(failure) context stop self stay() } - /** - * Builds a response and sends it back as Json. - * The response is structured in the following way - * { - * "callA": { - * -- information about call A -- - * }, - * "callB": { - * -- information about call B -- - * }, - * "hashDifferential": [ - * { - * "hash key": { - * "callA": -- hash value for call A, or null --, - * "callB": -- hash value for call B, or null -- - * } - * }, - * ... - * ] - * } - */ + whenUnhandled { + case Event(oops, oopsData) => + log.error(s"Programmer Error: Unexpected event received by ${this.getClass.getSimpleName}: $oops / $oopsData (in state $stateName)") + stay() + + } + private def buildDiffAndRespond(queryA: MetadataQuery, queryB: MetadataQuery, - responseA: MetadataLookupResponse, - responseB: MetadataLookupResponse, + responseA: WorkflowMetadataJson, + responseB: WorkflowMetadataJson, replyTo: ActorRef) = { - lazy val buildResponse = { - diffHashes(responseA.eventList, responseB.eventList) match { - case Success(diff) => - val diffObject = MetadataObject(Map( - "callA" -> makeCallInfo(queryA, responseA.eventList), - "callB" -> makeCallInfo(queryB, responseB.eventList), - "hashDifferential" -> diff - )) - - BuiltCallCacheDiffResponse(metadataComponentJsonWriter.write(diffObject).asJsObject) - case Failure(f) => FailedCallCacheDiffResponse(f) - } - } + def describeCallFromQuery(query: MetadataQuery): String = s"${query.workflowId} / ${query.jobKey.map(_.callFqn).getOrElse("<>")}:${query.jobKey.map(_.index.getOrElse(-1)).getOrElse("<>")}" + + val callACachingMetadata = extractCallMetadata(queryA, responseA).contextualizeErrors(s"extract relevant metadata for call A (${describeCallFromQuery(queryA)})") + val callBCachingMetadata = extractCallMetadata(queryB, responseB).contextualizeErrors(s"extract relevant metadata for call B (${describeCallFromQuery(queryB)})") + + val response = (callACachingMetadata, callBCachingMetadata) flatMapN { case (callA, callB) => - val response = checkCallsExistence(queryA, queryB, responseA, responseB) match { - case Some(msg) => FailedCallCacheDiffResponse(CachedCallNotFoundException(msg)) - case None => buildResponse + val callADetails = extractCallDetails(queryA, callA) + val callBDetails = extractCallDetails(queryB, callB) + + (callADetails, callBDetails) mapN { (cad, cbd) => + val callAHashes = callA.callCachingMetadataJson.hashes + val callBHashes = callB.callCachingMetadataJson.hashes + + SuccessfulCallCacheDiffResponse(cad, cbd, calculateHashDifferential(callAHashes, callBHashes)) + } + } valueOr { + e => FailedCallCacheDiffResponse(AggregatedMessageException("Failed to calculate diff for call A and call B", e.toList)) } replyTo ! response @@ -134,172 +88,178 @@ class CallCacheDiffActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Call context stop self stay() } +} - /** - * Returns an error message if one or both of the calls are not found, or None if it does - */ - private def checkCallsExistence(queryA: MetadataQuery, - queryB: MetadataQuery, - responseA: MetadataLookupResponse, - responseB: MetadataLookupResponse): Option[String] = { - import cromwell.core.ExecutionIndex._ - - def makeTag(query: MetadataQuery) = { - s"${query.workflowId}:${query.jobKey.get.callFqn}:${query.jobKey.get.index.fromIndex}" - } - def makeNotFoundMessage(queries: NonEmptyList[MetadataQuery]) = { - val plural = if (queries.tail.nonEmpty) "s" else "" - s"Cannot find call$plural ${queries.map(makeTag).toList.mkString(", ")}" - } +object CallCacheDiffActor { - (responseA.eventList, responseB.eventList) match { - case (a, b) if a.isEmpty && b.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryA, queryB))) - case (a, _) if a.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryA))) - case (_, b) if b.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryB))) - case _ => None - } + final case class CachedCallNotFoundException(message: String) extends Exception { + override def getMessage = message } - /** - * Generates the "info" section of callA or callB - */ - private def makeCallInfo(query: MetadataQuery, eventList: Seq[MetadataEvent]): MetadataComponent = { - val callKey = MetadataObject(Map( - "workflowId" -> MetadataPrimitive(MetadataValue(query.workflowId.toString)), - "callFqn" -> MetadataPrimitive(MetadataValue(query.jobKey.get.callFqn)), - "jobIndex" -> MetadataPrimitive(MetadataValue(query.jobKey.get.index.getOrElse(-1))) - )) + sealed trait CallCacheDiffActorState + case object Idle extends CallCacheDiffActorState + case object WaitingForMetadata extends CallCacheDiffActorState - val allowResultReuse = attributeToComponent(eventList, { _ == CallCachingKeys.AllowReuseMetadataKey }, { _ => "allowResultReuse" }) - val executionStatus = attributeToComponent(eventList, { _ == CallMetadataKeys.ExecutionStatus }) + sealed trait CallCacheDiffActorData + case object CallCacheDiffNoData extends CallCacheDiffActorData + case class CallCacheDiffWithRequest(queryA: MetadataQuery, + queryB: MetadataQuery, + responseA: Option[WorkflowMetadataJson], + responseB: Option[WorkflowMetadataJson], + replyTo: ActorRef + ) extends CallCacheDiffActorData - List(callKey, allowResultReuse, executionStatus) combineAll - } + sealed abstract class CallCacheDiffActorResponse - /** - * Collects events from the list for which the keys verify the keyFilter predicate - * and apply keyModifier to the event's key - */ - private def collectEvents(events: Seq[MetadataEvent], - keyFilter: (String => Boolean), - keyModifier: (String => String)) = events collect { - case event @ MetadataEvent(metadataKey @ MetadataKey(_, _, key), _, _) if keyFilter(key) => - event.copy(key = metadataKey.copy(key = keyModifier(key))) - } + case class FailedCallCacheDiffResponse(reason: Throwable) extends CallCacheDiffActorResponse + final case class SuccessfulCallCacheDiffResponse(callA: CallDetails, callB: CallDetails, hashDifferential: List[HashDifference]) extends CallCacheDiffActorResponse + def props(serviceRegistryActor: ActorRef) = Props(new CallCacheDiffActor(serviceRegistryActor)).withDispatcher(EngineDispatcher) + + final case class CallDetails(executionStatus: String, allowResultReuse: Boolean, callFqn: String, jobIndex: Int, workflowId: String) + final case class HashDifference(hashKey: String, callA: Option[String], callB: Option[String]) - /** - * Given a list of events, a keyFilter and a keyModifier, returns the associated MetadataComponent. - * Ensures that events are properly aggregated together (CRDTs and latest timestamp rule) - */ - private def attributeToComponent(events: Seq[MetadataEvent], keyFilter: (String => Boolean), keyModifier: (String => String) = identity[String]) = { - MetadataComponent(collectEvents(events, keyFilter, keyModifier)) - } /** - * Makes a diff object out of a key and a pair of values. - * Values are Option[Option[MetadataValue]] for the following reason: - * - * The outer option represents whether or not this key had a corresponding hash metadata entry for the given call - * If the above is true, the inner value is the metadata value for this entry, which is nullable, hence an Option. - * The first outer option will determine whether the resulting json value will be null (no hash entry for this key), - * or the actual value. - * If the metadata value (inner option) happens to be None, it's an error, as we don't expect to publish null hash values. - * In that case we replace it with the placeholderMissingHashValue. + * Create a Metadata query from a CallCacheDiffQueryCall */ - private def makeHashDiffObject(key: String, valueA: Option[Option[MetadataValue]], valueB: Option[Option[MetadataValue]]) = { - def makeFinalValue(value: Option[Option[MetadataValue]]) = value match { - case Some(Some(metadataValue)) => MetadataPrimitive(metadataValue) - case Some(None) => PlaceholderMissingHashValue - case None => MetadataNullComponent - } + def makeMetadataQuery(call: CallCacheDiffQueryCall) = MetadataQuery( + workflowId = call.workflowId, + // jobAttempt None will return keys for all attempts + jobKey = Option(MetadataQueryJobKey(call.callFqn, call.jobIndex, None)), + key = None, + includeKeysOption = Option(NonEmptyList.of("callCaching", "executionStatus")), + excludeKeysOption = Option(NonEmptyList.of("callCaching:hitFailures")), + expandSubWorkflows = false + ) - MetadataObject( - "hashKey" -> MetadataPrimitive(MetadataValue(key.trim, MetadataString)), - "callA" -> makeFinalValue(valueA), - "callB" -> makeFinalValue(valueB) - ) + // These simple case classes are just to help apply a little type safety to input and output types: + final case class WorkflowMetadataJson(value: JsObject) extends AnyVal + final case class CallMetadataJson(rawValue: JsObject, jobKey: MetadataQueryJobKey, callCachingMetadataJson: CallCachingMetadataJson) + final case class CallCachingMetadataJson(rawValue: JsObject, hashes: Map[String, String]) + + + /* + * Takes in the JsObject returned from a metadata query and filters out only the appropriate call's callCaching section + */ + def extractCallMetadata(query: MetadataQuery, response: WorkflowMetadataJson): ErrorOr[CallMetadataJson] = { + + for { + // Sanity Checks: + _ <- response.value.checkFieldValue("id", s""""${query.workflowId}"""") + jobKey <- query.jobKey.toErrorOr("Call is required in call cache diff query") + + // Unpack the JSON: + allCalls <- response.value.fieldAsObject("calls") + callShards <- allCalls.fieldAsArray(jobKey.callFqn) + onlyShardElement <- callShards.elementWithHighestAttemptField + _ <- onlyShardElement.checkFieldValue("shardIndex", jobKey.index.getOrElse(-1).toString) + callCachingElement <- onlyShardElement.fieldAsObject(CallMetadataKeys.CallCaching) + hashes <- extractHashes(callCachingElement) + } yield CallMetadataJson(onlyShardElement, jobKey, CallCachingMetadataJson(callCachingElement, hashes)) } - /** - * Creates the hash differential between 2 list of events - */ - private def diffHashes(eventsA: Seq[MetadataEvent], eventsB: Seq[MetadataEvent]): Try[MetadataComponent] = { - val hashesKey = CallCachingKeys.HashesKey + MetadataKey.KeySeparator - // Collect hashes events and map their key to only keep the meaningful part of the key - // Then map the result to get a Map of hashKey -> Option[MetadataValue]. This will allow for fast lookup when - // comparing the 2 hash sets. - // Note that it's an Option[MetadataValue] because metadata values can be null, although for this particular - // case we don't expect it to be (we should never publish a hash metadata event with a null value) - // If that happens we will place a placeholder value in place of the hash to signify of the unexpected absence of it - def collectHashes(events: Seq[MetadataEvent]) = { - collectEvents(events, { _.startsWith(hashesKey) }, { _.stripPrefix(hashesKey) }) map { - case MetadataEvent(MetadataKey(_, _, keyA), valueA, _) => keyA -> valueA - } toMap + def extractHashes(callCachingMetadataJson: JsObject): ErrorOr[Map[String, String]] = { + def processField(keyPrefix: String)(fieldValue: (String, JsValue)): ErrorOr[Map[String, String]] = fieldValue match { + case (key, hashString: JsString) => Map(keyPrefix + key -> hashString.value).validNel + case (key, subObject: JsObject) => extractHashEntries(key + ":", subObject) + case (key, otherValue) => s"Cannot extract hashes for $key. Expected JsString or JsObject but got ${otherValue.getClass.getSimpleName} $otherValue".invalidNel } - val hashesA: Map[String, Option[MetadataValue]] = collectHashes(eventsA) - val hashesB: Map[String, Option[MetadataValue]] = collectHashes(eventsB) + def extractHashEntries(keyPrefix: String, jsObject: JsObject): ErrorOr[Map[String, String]] = { + val traversed = jsObject.fields.toList.traverse(processField(keyPrefix)) + traversed.map(_.flatten.toMap) + } - (hashesA.isEmpty, hashesB.isEmpty) match { - case (true, true) => Failure(HashesForCallAAndBNotFoundException) - case (true, false) => Failure(HashesForCallANotFoundException) - case (false, true) => Failure(HashesForCallBNotFoundException) - case (false, false) => Success(diffHashEvents(hashesA, hashesB)) + for { + hashesSection <- callCachingMetadataJson.fieldAsObject("hashes") + entries <- extractHashEntries("", hashesSection) + } yield entries + } + + def calculateHashDifferential(hashesA: Map[String, String], hashesB: Map[String, String]): List[HashDifference] = { + val hashesInANotMatchedInB: List[HashDifference] = hashesA.toList collect { + case (key, value) if hashesB.get(key) != Option(value) => HashDifference(key, Option(value), hashesB.get(key)) + } + val hashesUniqueToB: List[HashDifference] = hashesB.toList.collect { + case (key, value) if !hashesA.keySet.contains(key) => HashDifference(key, None, Option(value)) } + hashesInANotMatchedInB ++ hashesUniqueToB + } + def extractCallDetails(query: MetadataQuery, callMetadataJson: CallMetadataJson): ErrorOr[CallDetails] = { + val executionStatus = callMetadataJson.rawValue.fieldAsString("executionStatus") + val allowResultReuse = callMetadataJson.callCachingMetadataJson.rawValue.fieldAsBoolean("allowResultReuse") + + (executionStatus, allowResultReuse) mapN { (es, arr) => + CallDetails( + executionStatus = es.value, + allowResultReuse = arr.value, + callFqn = callMetadataJson.jobKey.callFqn, + jobIndex = callMetadataJson.jobKey.index.getOrElse(-1), + workflowId = query.workflowId.toString + ) + } } - private def diffHashEvents(hashesA: Map[String, Option[MetadataValue]], hashesB: Map[String, Option[MetadataValue]]) = { - val hashesUniqueToB: Map[String, Option[MetadataValue]] = hashesB.filterNot({ case (k, _) => hashesA.keySet.contains(k) }) - - val hashDiff: List[MetadataComponent] = { - // Start with all hashes in A - hashesA - // Try to find the corresponding pair in B. - // We end up with a - // List[(Option[String, Option[MetadataValue], Option[String, Option[MetadataValue])] - // ^ ^ ^ ^ - // hashKey hashValue hashKey hashValue - // for for for for - // A A B B - // |____________________________________| |___________________________________| - // hashPair for A hashPair for B - // - // HashPairs are Some or None depending on whether or not they have a metadata entry for the corresponding hashKey - // At this stage we only have Some(hashPair) for A, and either Some(hashPair) or None for B depending on if we found it in hashesB - .map({ - hashPairA => Option(hashPairA) -> hashesB.find(_._1 == hashPairA._1) - }) - // Add the missing hashes that are in B but not in A. The left hashPair is therefore None - .++(hashesUniqueToB.map(None -> Option(_))) - .collect({ - // Both have a value but they're different. We can assume the keys are the same (if we did our job right until here) - case (Some((keyA, valueA)), Some((_, valueB))) if valueA != valueB => - makeHashDiffObject(keyA, Option(valueA), Option(valueB)) - // Key is in A but not in B - case (Some((keyA, valueA)), None) => - makeHashDiffObject(keyA, Option(valueA), None) - // Key is in B but not in A - case (None, Some((keyB, valueB))) => - makeHashDiffObject(keyB, None, Option(valueB)) - }) - .toList + implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { + def getField(field: String): ErrorOr[JsValue] = jsObject.fields.get(field).toErrorOr(s"No '$field' field found") + def fieldAsObject(field: String): ErrorOr[JsObject] = jsObject.getField(field) flatMap { _.mapToJsObject } + def fieldAsArray(field: String): ErrorOr[JsArray] = jsObject.getField(field) flatMap { _.mapToJsArray } + def fieldAsString(field: String): ErrorOr[JsString] = jsObject.getField(field) flatMap { _.mapToJsString } + def fieldAsNumber(field: String): ErrorOr[JsNumber] = jsObject.getField(field) flatMap { _.mapToJsNumber } + def fieldAsBoolean(field: String): ErrorOr[JsBoolean] = jsObject.getField(field) flatMap { _.mapToJsBoolean } + def checkFieldValue(field: String, expectation: String): ErrorOr[Unit] = jsObject.getField(field) flatMap { + case v: JsValue if v.toString == expectation => ().validNel + case other => s"Unexpected metadata field '$field'. Expected '$expectation' but got ${other.toString}".invalidNel } + } - MetadataList(hashDiff) + implicit class EnhancedJsArray(val jsArray: JsArray) extends AnyVal { + + def elementWithHighestAttemptField: ErrorOr[JsObject] = { + def extractAttemptAndObject(value: JsValue): ErrorOr[(Int, JsObject)] = for { + asObject <- value.mapToJsObject + attempt <- asObject.fieldAsNumber("attempt") + } yield (attempt.value.intValue(), asObject) + + def foldFunction(accumulator: ErrorOr[(Int, JsObject)], nextElement: JsValue): ErrorOr[(Int, JsObject)] = { + (accumulator, extractAttemptAndObject(nextElement)) mapN { case ((previousHighestAttempt, previousJsObject), (nextAttempt, nextJsObject)) => + if (previousHighestAttempt > nextAttempt) { + (previousHighestAttempt, previousJsObject) + } else { + (nextAttempt, nextJsObject) + } + } + } + + for { + attemptListNel <- NonEmptyList.fromList(jsArray.elements.toList).toErrorOr("Expected at least one attempt but found 0") + highestAttempt <- attemptListNel.toList.foldLeft(extractAttemptAndObject(attemptListNel.head))(foldFunction) + } yield highestAttempt._2 + } } - /** - * Create a Metadata query from a CallCacheDiffQueryCall - */ - private def makeMetadataQuery(call: CallCacheDiffQueryCall) = MetadataQuery( - call.workflowId, - // jobAttempt None will return keys for all attempts - Option(MetadataQueryJobKey(call.callFqn, call.jobIndex, None)), - None, - Option(NonEmptyList.of("callCaching", "executionStatus")), - None, - expandSubWorkflows = false - ) + implicit class EnhancedJsValue(val jsValue: JsValue) extends AnyVal { + def mapToJsObject: ErrorOr[JsObject] = jsValue match { + case obj: JsObject => obj.validNel + case other => s"Invalid value type. Expected JsObject but got ${other.getClass.getSimpleName}: ${other.prettyPrint}".invalidNel + } + def mapToJsArray: ErrorOr[JsArray] = jsValue match { + case arr: JsArray => arr.validNel + case other => s"Invalid value type. Expected JsArray but got ${other.getClass.getSimpleName}: ${other.prettyPrint}".invalidNel + } + def mapToJsString: ErrorOr[JsString] = jsValue match { + case str: JsString => str.validNel + case other => s"Invalid value type. Expected JsString but got ${other.getClass.getSimpleName}: ${other.prettyPrint}".invalidNel + } + def mapToJsBoolean: ErrorOr[JsBoolean] = jsValue match { + case boo: JsBoolean => boo.validNel + case other => s"Invalid value type. Expected JsBoolean but got ${other.getClass.getSimpleName}: ${other.prettyPrint}".invalidNel + } + def mapToJsNumber: ErrorOr[JsNumber] = jsValue match { + case boo: JsNumber => boo.validNel + case other => s"Invalid value type. Expected JsNumber but got ${other.getClass.getSimpleName}: ${other.prettyPrint}".invalidNel + } + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorJsonFormatting.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorJsonFormatting.scala new file mode 100644 index 00000000000..11ba8bfc3a4 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorJsonFormatting.scala @@ -0,0 +1,27 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{CallDetails, HashDifference, SuccessfulCallCacheDiffResponse} +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport +import org.apache.commons.lang3.NotImplementedException +import spray.json._ + +object CallCacheDiffActorJsonFormatting extends SprayJsonSupport with DefaultJsonProtocol { + + implicit val callDetailsJsonFormatter = jsonFormat5(CallDetails) + + // Note: This json format is written out longform to get the non-standard Option behavior (the default omits 'None' fields altogether) + implicit val hashDifferenceJsonFormatter = new RootJsonFormat[HashDifference] { + override def write(hashDifference: HashDifference): JsValue = { + def fromOption(opt: Option[String]) = opt.map(JsString.apply).getOrElse(JsNull) + JsObject(Map( + "hashKey" -> JsString(hashDifference.hashKey), + "callA" -> fromOption(hashDifference.callA), + "callB" -> fromOption(hashDifference.callB) + )) + } + override def read(json: JsValue): HashDifference = + throw new NotImplementedException("Programmer Error: No reader for HashDifferentials written. It was not expected to be required") + } + + implicit val successfulResponseJsonFormatter = jsonFormat3(SuccessfulCallCacheDiffResponse) +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala index 31d6fb35372..c496619e30f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala @@ -37,7 +37,7 @@ import cromwell.jobstore.JobStoreActor._ import cromwell.jobstore._ import cromwell.services.EngineServicesStore import cromwell.services.metadata.CallMetadataKeys.CallCachingKeys -import cromwell.services.metadata.{CallMetadataKeys, MetadataJobKey, MetadataKey} +import cromwell.services.metadata.{CallMetadataKeys, MetadataKey} import cromwell.webservice.EngineStatsActor import scala.concurrent.ExecutionContext @@ -82,6 +82,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, val jobTag = s"${workflowIdForLogging.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" val tag = s"EJEA_$jobTag" + //noinspection ActorMutableStateInspection // There's no need to check for a cache hit again if we got preempted, or if there's no result copying actor defined // NB: this can also change (e.g. if we have a HashError we just force this to CallCachingOff) private[execution] var effectiveCallCachingMode = { @@ -98,12 +99,18 @@ class EngineJobExecutionActor(replyTo: ActorRef, private val callCachingReadResultMetadataKey = CallCachingKeys.ReadResultMetadataKey private val callCachingHitResultMetadataKey = CallCachingKeys.HitResultMetadataKey private val callCachingAllowReuseMetadataKey = CallCachingKeys.AllowReuseMetadataKey - private val callCachingHitFailures = CallCachingKeys.HitFailuresKey private val callCachingHashes = CallCachingKeys.HashesKey - val callCachePathPrefixes = for { - activity <- Option(effectiveCallCachingMode) collect { case a: CallCachingActivity => a } - workflowOptionPrefixes <- activity.options.workflowOptionCallCachePrefixes + private val callCachingOptionsOption = effectiveCallCachingMode match { + case callCachingActivity: CallCachingActivity => Option(callCachingActivity.options) + case _ => None + } + + private val invalidationRequired = callCachingOptionsOption.exists(_.invalidateBadCacheResults) + + private val callCachePathPrefixes = for { + callCachingOptions <- callCachingOptionsOption + workflowOptionPrefixes <- callCachingOptions.workflowOptionCallCachePrefixes d <- initializationData collect { case d: StandardInitializationData => d } rootPrefix = d.workflowPaths.callCacheRootPrefix } yield CallCachePathPrefixes(rootPrefix, workflowOptionPrefixes.toList) @@ -115,6 +122,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, } startWith(Pending, NoData) + //noinspection ActorMutableStateInspection private var eventList: Seq[ExecutionEvent] = Seq(ExecutionEvent(stateName.toString)) override def onTimedTransition(from: EngineJobExecutionActorState, to: EngineJobExecutionActorState, duration: FiniteDuration) = { @@ -222,7 +230,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, } when(FetchingCachedOutputsFromDatabase) { - case Event(CachedOutputLookupSucceeded(womValueSimpletons, jobDetritus, returnCode, cacheResultId, cacheHitDetails), data @ ResponsePendingData(_, _, _, _, Some(ejeaCacheHit), _)) => + case Event( + CachedOutputLookupSucceeded(womValueSimpletons, jobDetritus, returnCode, cacheResultId, cacheHitDetails), + data@ResponsePendingData(_, _, _, _, Some(ejeaCacheHit), _, _), + ) => if (cacheResultId != ejeaCacheHit.hit.cacheResultId) { // Sanity check: was this the right set of results (a false here is a BAD thing!): log.error(s"Received incorrect call cache results from FetchCachedResultsActor. Expected ${ejeaCacheHit.hit} but got $cacheResultId. Running job") @@ -248,14 +259,23 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(BackendIsCopyingCachedOutputs) { // Backend copying response: - case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _, _, _)) => + case Event( + response: JobSucceededResponse, + data@ResponsePendingData(_, _, Some(Success(hashes)), _, _, _, _), + ) => + logCacheHitSuccess(data) saveCacheResults(hashes, data.withSuccessResponse(response)) case Event(response: JobSucceededResponse, data: ResponsePendingData) if effectiveCallCachingMode.writeToCache && data.hashes.isEmpty => + logCacheHitSuccess(data) // Wait for the CallCacheHashes stay using data.withSuccessResponse(response) case Event(response: JobSucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off + logCacheHitSuccess(data) saveJobCompletionToJobStore(data.withSuccessResponse(response)) - case Event(CopyingOutputsFailedResponse(_, cacheCopyAttempt, throwable), data @ ResponsePendingData(_, _, _, _, Some(cacheHit), _)) if cacheCopyAttempt == cacheHit.hitNumber => + case Event( + CopyingOutputsFailedResponse(_, cacheCopyAttempt, throwable), + data@ResponsePendingData(_, _, _, _, Some(cacheHit), _, _) + ) if cacheCopyAttempt == cacheHit.hitNumber => invalidateCacheHitAndTransition(cacheHit, data, throwable) // Hashes arrive: @@ -292,7 +312,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, // Handles JobSucceededResponse messages val jobSuccessHandler: StateFunction = { // writeToCache is true and all hashes have already been retrieved - save to the cache - case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _, _, _)) if effectiveCallCachingMode.writeToCache => + case Event( + response: JobSucceededResponse, + data@ResponsePendingData(_, _, Some(Success(hashes)), _, _, _, _) + ) if effectiveCallCachingMode.writeToCache => eventList ++= response.executionEvents // Publish the image used now that we have it as we might lose the information if Cromwell is restarted // in between writing to the cache and writing to the job store @@ -311,7 +334,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, // Handles BackendJobFailedResponse messages val jobFailedHandler: StateFunction = { // writeToCache is true and all hashes already retrieved - save to job store - case Event(response: BackendJobFailedResponse, data @ ResponsePendingData(_, _, Some(Success(_)), _, _, _)) if effectiveCallCachingMode.writeToCache => + case Event( + response: BackendJobFailedResponse, + data@ResponsePendingData(_, _, Some(Success(_)), _, _, _, _) + ) if effectiveCallCachingMode.writeToCache => saveJobCompletionToJobStore(data.withFailedResponse(response)) // Hashes are still missing and we want them (writeToCache is true) - wait for them case Event(response: BackendJobFailedResponse, data: ResponsePendingData) if effectiveCallCachingMode.writeToCache && data.hashes.isEmpty => @@ -398,7 +424,11 @@ class EngineJobExecutionActor(replyTo: ActorRef, onTransition { case fromState -> toState => log.debug("Transitioning from {}({}) to {}({})", fromState, stateData, toState, nextStateData) - eventList :+= ExecutionEvent(toState.toString) + + EngineJobExecutionActorState.transitionEventString(fromState, toState) foreach { + eventList :+= ExecutionEvent(_) + } + } whenUnhandled { @@ -483,16 +513,15 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def forwardAndStop(response: BackendJobExecutionResponse): State = { replyTo forward response - returnExecutionToken() - instrumentJobComplete(response) - pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) - recordExecutionStepTiming(stateName.toString, currentStateDuration) - context stop self - stay() + stop(response) } private def respondAndStop(response: BackendJobExecutionResponse): State = { replyTo ! response + stop(response) + } + + private def stop(response: BackendJobExecutionResponse): State = { returnExecutionToken() instrumentJobComplete(response) pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) @@ -638,7 +667,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, ejha ! NextHit goto(CheckingCallCache) case _ => - workflowLogger.info("Could not find a suitable cache hit, falling back to running job: {}", jobDescriptorKey) + workflowLogger.info( + "Could not find a suitable cache hit. " + + "Call cache hit process had {} total hit failures before completing unsuccessfully. " + + "Falling back to running job: {}", data.cacheHitFailureCount, jobDescriptorKey) runJob(data) } } @@ -647,35 +679,33 @@ class EngineJobExecutionActor(replyTo: ActorRef, s"$workflowIdForLogging-BackendCacheHitCopyingActor-$jobTag-${cacheResultId.id}" } - private def publishHitFailure(cache: EJEACacheHit, failure: Throwable) = { - import WomValueSimpleton._ - import cromwell.services.metadata.MetadataService._ - - cache.details foreach { details => - val metadataKey = MetadataKey( - workflowIdForLogging, - Option(MetadataJobKey(jobDescriptorKey.call.fullyQualifiedName, jobDescriptorKey.index, jobDescriptorKey.attempt)), - s"$callCachingHitFailures[${cache.hitNumber}]:${details.escapeMeta}" - ) + private def logCacheHitSuccess(data: ResponsePendingData): Unit = { + workflowLogger.info( + "Call cache hit process had {} total hit failures before completing successfully", + data.cacheHitFailureCount, + ) + } - serviceRegistryActor ! PutMetadataAction(throwableToMetadataEvents(metadataKey, failure)) + private def logCacheHitFailure(data: ResponsePendingData, reason: Throwable): Unit = { + val problemSummary = + s"Failed copying cache results for job $jobDescriptorKey (${reason.getClass.getSimpleName}: ${reason.getMessage})" + if (invalidationRequired) { + // Whenever invalidating a cache result, always log why the invalidation occurred + workflowLogger.warn(s"$problemSummary, invalidating cache entry.") + } else if (data.cacheHitFailureCount < 3) { + workflowLogger.info(problemSummary) } } private def invalidateCacheHitAndTransition(ejeaCacheHit: EJEACacheHit, data: ResponsePendingData, reason: Throwable) = { - publishHitFailure(ejeaCacheHit, reason) + logCacheHitFailure(data, reason) + val updatedData = data.copy(cacheHitFailureCount = data.cacheHitFailureCount + 1) - val invalidationRequired = effectiveCallCachingMode match { - case CallCachingOff => throw new RuntimeException("Should not be calling invalidateCacheHit if call caching is off!") // Very unexpected. Fail out of this bad-state EJEA. - case activity: CallCachingActivity => activity.options.invalidateBadCacheResults - } if (invalidationRequired) { - val problemSummary = s"${reason.getClass.getSimpleName}: ${reason.getMessage}" - log.warning("Failed copying cache results for job {} ({}), invalidating cache entry.", jobDescriptorKey, problemSummary) invalidateCacheHit(ejeaCacheHit.hit.cacheResultId) - goto(InvalidatingCacheEntry) + goto(InvalidatingCacheEntry) using updatedData } else { - handleCacheInvalidatedResponse(CallCacheInvalidationUnnecessary, data) + handleCacheInvalidatedResponse(CallCacheInvalidationUnnecessary, updatedData) } } @@ -759,6 +789,26 @@ object EngineJobExecutionActor { case object UpdatingJobStore extends EngineJobExecutionActorState case object InvalidatingCacheEntry extends EngineJobExecutionActorState + object EngineJobExecutionActorState { + def transitionEventString(fromState: EngineJobExecutionActorState, toState: EngineJobExecutionActorState): Option[String] = { + + def callCacheStateGroup: Set[EngineJobExecutionActorState] = Set( + CheckingCallCache, + FetchingCachedOutputsFromDatabase, + BackendIsCopyingCachedOutputs, + CheckingCacheEntryExistence, + InvalidatingCacheEntry + ) + + if (fromState == toState) None + else if (callCacheStateGroup.contains(fromState) && callCacheStateGroup.contains(toState)) None + else if (callCacheStateGroup.contains(toState)) Option("CallCacheReading") + else Option(toState.toString) + } + } + + + /** Commands */ sealed trait EngineJobExecutionActorCommand case object Execute extends EngineJobExecutionActorCommand @@ -817,7 +867,8 @@ object EngineJobExecutionActor { hashes: Option[Try[CallCacheHashes]] = None, ejha: Option[ActorRef] = None, ejeaCacheHit: Option[EJEACacheHit] = None, - backendJobActor: Option[ActorRef] = None + backendJobActor: Option[ActorRef] = None, + cacheHitFailureCount: Int = 0 ) extends EJEAData { def withEJHA(ejha: ActorRef): EJEAData = this.copy(ejha = Option(ejha)) @@ -845,7 +896,7 @@ object EngineJobExecutionActor { def dockerImageUsed: Option[String] def withHashes(hashes: Option[Try[CallCacheHashes]]): ResponseData } - + // Only Successes and Failures are saved to the job store, not Aborts. Why ? Because. This could be an improvement AFAICT. private[execution] trait ShouldBeSavedToJobStoreResponseData extends ResponseData diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/materialization/MaterializeWorkflowDescriptorActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/materialization/MaterializeWorkflowDescriptorActor.scala index efef8de3edd..6ad940c1870 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/materialization/MaterializeWorkflowDescriptorActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/materialization/MaterializeWorkflowDescriptorActor.scala @@ -2,10 +2,9 @@ package cromwell.engine.workflow.lifecycle.materialization import akka.actor.{ActorRef, FSM, LoggingFSM, Props, Status} import akka.pattern.pipe -import cats.Monad import cats.data.EitherT._ -import cats.data.Validated.{Invalid, Valid} import cats.data.NonEmptyList +import cats.data.Validated.{Invalid, Valid} import cats.effect.IO import cats.instances.list._ import cats.syntax.apply._ @@ -21,14 +20,15 @@ import common.validation.IOChecked._ import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.{ReadFromCache, WorkflowOption, WriteToCache} -import cromwell.core.{HogGroup, _} import cromwell.core.callcaching._ import cromwell.core.io.AsyncIo import cromwell.core.labels.{Label, Labels} import cromwell.core.logging.WorkflowLogging import cromwell.core.path.{PathBuilder, PathBuilderFactory} +import cromwell.core._ import cromwell.engine._ import cromwell.engine.backend.CromwellBackends +import cromwell.engine.workflow.WorkflowProcessingEventPublishing._ import cromwell.engine.workflow.lifecycle.EngineLifecycleActorAbortCommand import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor._ import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder @@ -105,9 +105,15 @@ object MaterializeWorkflowDescriptorActor { } } - val enabled = conf.as[Option[Boolean]]("call-caching.enabled").getOrElse(false) - val invalidateBadCacheResults = conf.as[Option[Boolean]]("call-caching.invalidate-bad-cache-results").getOrElse(true) - if (enabled) { + val callCachingConfig = conf.getConfig("call-caching") + + def errorOrCallCachingBoolean(path: String): ErrorOr[Boolean] = { + import common.validation.Validation._ + validate(callCachingConfig.getBoolean(path)) + } + + val errorOrEnabled = errorOrCallCachingBoolean("enabled") + if (errorOrEnabled.exists(_ == true)) { val readFromCache = readOptionalOption(ReadFromCache) val writeToCache = readOptionalOption(WriteToCache) @@ -120,14 +126,28 @@ object MaterializeWorkflowDescriptorActor { } } + val errorOrMaybePrefixes = workflowOptions.getVectorOfStrings("call_cache_hit_path_prefixes") + val errorOrInvalidateBadCacheResults = errorOrCallCachingBoolean("invalidate-bad-cache-results") + val errorOrCallCachingOptions = ( + errorOrMaybePrefixes, + errorOrInvalidateBadCacheResults, + ) mapN { + ( + maybePrefixes, + invalidateBadCacheResults, + ) => + CallCachingOptions( + invalidateBadCacheResults, + maybePrefixes, + ) + } for { - maybePrefixes <- workflowOptions.getVectorOfStrings("call_cache_hit_path_prefixes") - callCachingOptions = CallCachingOptions(invalidateBadCacheResults, maybePrefixes) + callCachingOptions <- errorOrCallCachingOptions mode <- errorOrCallCachingMode(callCachingOptions) } yield mode } else { - CallCachingOff.validNel + errorOrEnabled.map(_ => CallCachingOff) } } } @@ -267,7 +287,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val labels = convertJsonToLabels(sourceFiles.labelsJson) for { - _ <- publishLabelsToMetadata(id, labels) + _ <- publishLabelsToMetadata(id, labels.asMap, serviceRegistryActor) zippedImportResolver <- zippedResolverCheck importResolvers = zippedImportResolver.toList ++ localFilesystemResolvers :+ HttpResolver(None, Map.empty) sourceAndResolvers <- fromEither[IO](LanguageFactoryUtil.findWorkflowSource(sourceFiles.workflowSource, sourceFiles.workflowUrl, importResolvers)) @@ -354,18 +374,6 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, } } - private def publishLabelsToMetadata(rootWorkflowId: WorkflowId, labels: Labels): IOChecked[Unit] = { - val defaultLabel = "cromwell-workflow-id" -> s"cromwell-$rootWorkflowId" - val customLabels = labels.asMap - Monad[IOChecked].pure(labelsToMetadata(customLabels + defaultLabel, rootWorkflowId)) - } - - protected def labelsToMetadata(labels: Map[String, String], workflowId: WorkflowId): Unit = { - labels foreach { case (k, v) => - serviceRegistryActor ! PutMetadataAction(MetadataEvent(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Labels}:$k"), MetadataValue(v))) - } - } - private def buildWorkflowDescriptor(id: WorkflowId, womNamespace: ValidatedWomNamespace, workflowOptions: WorkflowOptions, diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala index 3426a17050d..5ba36f8befe 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala @@ -3,14 +3,14 @@ package cromwell.engine.workflow.workflowstore import java.time.OffsetDateTime import akka.actor.{Actor, ActorLogging, ActorRef, Props} -import cats.Monad -import cats.data.EitherT._ import cats.data.NonEmptyList import common.validation.IOChecked._ +import common.validation.Validation._ import cromwell.core.Dispatcher._ import cromwell.core._ import cromwell.engine.instrumentation.WorkflowInstrumentation import cromwell.engine.workflow.WorkflowMetadataHelper +import cromwell.engine.workflow.WorkflowProcessingEventPublishing._ import cromwell.engine.workflow.workflowstore.SqlWorkflowStore.WorkflowStoreState.WorkflowStoreState import cromwell.engine.workflow.workflowstore.SqlWorkflowStore.{WorkflowStoreState, WorkflowSubmissionResponse} import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ @@ -43,7 +43,7 @@ final case class WorkflowStoreSubmitActor(store: WorkflowStore, serviceRegistryA val wfTypeVersion = cmd.source.workflowTypeVersion.getOrElse("Unspecified version") log.info("{} ({}) workflow {} submitted", wfType, wfTypeVersion, workflowSubmissionResponse.id) val labelsMap = convertJsonToLabelsMap(cmd.source.labelsJson) - publishLabelsToMetadata(workflowSubmissionResponse.id, labelsMap) + publishLabelsToMetadata(workflowSubmissionResponse.id, labelsMap, serviceRegistryActor).toErrorOr.toTry.get sndr ! WorkflowSubmittedToStore( workflowSubmissionResponse.id, convertDatabaseStateToApiState(workflowSubmissionResponse.state) @@ -68,7 +68,7 @@ final case class WorkflowStoreSubmitActor(store: WorkflowStore, serviceRegistryA case Success(workflowSubmissionResponses) => log.info("Workflows {} submitted.", workflowSubmissionResponses.toList.map(res => res.id).mkString(", ")) val labelsMap = convertJsonToLabelsMap(cmd.sources.head.labelsJson) - workflowSubmissionResponses.map(res => publishLabelsToMetadata(res.id, labelsMap)) + workflowSubmissionResponses.map(res => publishLabelsToMetadata(res.id, labelsMap, serviceRegistryActor)) sndr ! WorkflowsBatchSubmittedToStore( workflowSubmissionResponses.map(res => res.id), convertDatabaseStateToApiState(workflowSubmissionResponses.head.state) @@ -107,17 +107,6 @@ final case class WorkflowStoreSubmitActor(store: WorkflowStore, serviceRegistryA } } - private def publishLabelsToMetadata(rootWorkflowId: WorkflowId, customLabels: Map[String, String]): IOChecked[Unit] = { - val defaultLabel = "cromwell-workflow-id" -> s"cromwell-$rootWorkflowId" - Monad[IOChecked].pure(labelsToMetadata(customLabels + defaultLabel, rootWorkflowId)) - } - - protected def labelsToMetadata(labels: Map[String, String], workflowId: WorkflowId): Unit = { - labels foreach { case (k, v) => - serviceRegistryActor ! PutMetadataAction(MetadataEvent(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Labels}:$k"), MetadataValue(v))) - } - } - /** * Runs processing on workflow source files before they are stored. * diff --git a/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala b/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala index c96bc66a770..b43a5765106 100644 --- a/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala +++ b/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala @@ -41,7 +41,7 @@ case class JobStoreWriterActor(jsd: JobStore, case Success(_) => data foreach { case CommandAndReplyTo(c: JobStoreWriterCommand, r) => r ! JobStoreWriteSuccess(c) } case Failure(regerts) => - log.error("Failed to properly job store entries to database", regerts) + log.error(regerts, "Failed to write job store entries to database") data foreach { case CommandAndReplyTo(_, r) => r ! JobStoreWriteFailure(regerts) } } diff --git a/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala b/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala index f3a21135860..b0be24f7248 100644 --- a/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala +++ b/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala @@ -1,11 +1,11 @@ package cromwell.webservice import akka.actor.{Actor, ActorLogging, ActorRef, Props} -import common.collections.EnhancedCollections._ import cromwell.core._ -import cromwell.core.labels.Labels +import cromwell.core.labels.{Label, Labels} import cromwell.services.metadata.MetadataEvent import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.BuiltMetadataResponse import cromwell.webservice.LabelsManagerActor._ import spray.json.{DefaultJsonProtocol, JsObject, JsString} @@ -24,13 +24,6 @@ object LabelsManagerActor { sealed trait LabelsResponse extends LabelsMessage - def processLabelsResponse(workflowId: WorkflowId, labels: Map[String, String]): JsObject = { - JsObject(Map( - WorkflowMetadataKeys.Id -> JsString(workflowId.toString), - WorkflowMetadataKeys.Labels -> JsObject(labels safeMapValues JsString.apply) - )) - } - sealed abstract class LabelsManagerActorResponse final case class BuiltLabelsManagerResponse(response: JsObject) extends LabelsManagerActorResponse final case class FailedLabelsManagerResponse(reason: Throwable) extends LabelsManagerActorResponse @@ -59,7 +52,7 @@ class LabelsManagerActor(serviceRegistryActor: ActorRef) extends Actor with Acto At this point in the actor lifecycle, wfId has already been filled out so the .get is safe */ serviceRegistryActor ! GetLabels(wfId.get) - case LabelLookupResponse(id, origLabels) => + case BuiltMetadataResponse(_, jsObject) => /* There's some trickery going on here. We've updated the labels in the metadata store but almost certainly when the store received the GetLabels request above the summarizer will not have been run so our new values are @@ -73,8 +66,16 @@ class LabelsManagerActor(serviceRegistryActor: ActorRef) extends Actor with Acto At this point in the actor lifecycle, newLabels will have been filled in so the .get is safe */ - val updated = origLabels ++ newLabels.get.asMap - target ! BuiltLabelsManagerResponse(processLabelsResponse(id, updated)) + + def replaceOrAddLabel(originalJson: JsObject, label: Label): JsObject = { + val labels = originalJson.fields.get("labels").map(_.asJsObject.fields).getOrElse(Map.empty) + val updatedLabels = labels + (label.key -> JsString(label.value)) + + JsObject(originalJson.fields + ("labels" -> JsObject(updatedLabels))) + } + + val updatedJson = newLabels.get.value.foldLeft(jsObject)(replaceOrAddLabel) + target ! BuiltLabelsManagerResponse(updatedJson) context stop self case f: MetadataServiceFailure => /* diff --git a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala index d7215362079..4d8bc77c071 100644 --- a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala @@ -10,7 +10,6 @@ import cromwell.engine._ import cromwell.services.healthmonitor.ProtoHealthMonitorServiceActor.{StatusCheckResponse, SubsystemStatus} import cromwell.services.metadata.MetadataService._ import cromwell.util.JsonFormatting.WomValueJsonFormatter._ -import cromwell.webservice.metadata.MetadataBuilderActor.BuiltMetadataResponse import cromwell.webservice.routes.CromwellApiService.BackendResponse import spray.json.{DefaultJsonProtocol, JsString, JsValue, JsonFormat, RootJsonFormat} @@ -22,7 +21,6 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { implicit val callOutputResponseProtocol = jsonFormat3(CallOutputResponse) implicit val engineStatsProtocol = jsonFormat2(EngineStatsActor.EngineStats) implicit val BackendResponseFormat = jsonFormat2(BackendResponse) - implicit val BuiltStatusResponseFormat = jsonFormat1(BuiltMetadataResponse) implicit val callAttempt = jsonFormat2(CallAttempt) implicit val workflowOptionsFormatter: JsonFormat[WorkflowOptions] = new JsonFormat[WorkflowOptions] { diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderRegulatorActor.scala b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderRegulatorActor.scala deleted file mode 100644 index 530158c8404..00000000000 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderRegulatorActor.scala +++ /dev/null @@ -1,57 +0,0 @@ -package cromwell.webservice.metadata - -import akka.actor.{Actor, ActorLogging, ActorRef, Props} -import cromwell.core.Dispatcher.ApiDispatcher -import cromwell.services.metadata.MetadataService.MetadataServiceAction -import cromwell.webservice.metadata.MetadataBuilderActor.MetadataBuilderActorResponse - -import scala.collection.mutable - -class MetadataBuilderRegulatorActor(serviceRegistryActor: ActorRef) extends Actor with ActorLogging { - // This actor tracks all requests coming in from the API service and spins up new builders as needed to service them. - // If the processing of an identical request is already in flight the requester will be added to a set of requesters - // to notify when the response from the first request becomes available. - - // Map from requests (MetadataServiceActions) to requesters. - val apiRequests = new mutable.HashMap[MetadataServiceAction, Set[ActorRef]]() - // Map from ActorRefs of MetadataBuilderActors to requests. When a response comes back from a MetadataBuilderActor its - // ActorRef is used as the lookup key in this Map. The result of that lookup yields the request which in turn is used - // as the lookup key for requesters in the above Map. - val builderRequests = new mutable.HashMap[ActorRef, MetadataServiceAction]() - - override def receive: Receive = { - case action: MetadataServiceAction => - val currentRequesters = apiRequests.getOrElse(action, Set.empty) - apiRequests.put(action, currentRequesters + sender()) - if (currentRequesters.isEmpty) { - val metadataBuilderActor = context.actorOf( - MetadataBuilderActor.props(serviceRegistryActor).withDispatcher(ApiDispatcher), MetadataBuilderActor.uniqueActorName) - builderRequests.put(metadataBuilderActor, action) - metadataBuilderActor ! action - } - case response: MetadataBuilderActorResponse => - val sndr = sender() - builderRequests.get(sndr) match { - case Some(action) => - apiRequests.get(action) match { - case Some(requesters) => - apiRequests.remove(action) - requesters foreach { _ ! response} - case None => - // unpossible: there had to have been a request that corresponded to this response - log.error(s"MetadataBuilderRegulatorActor unpossible error: no requesters found for action: $action") - } - builderRequests.remove(sndr) - () - case None => - // unpossible: this actor should know about all the child MetadataBuilderActors it has begotten - log.error(s"MetadataBuilderRegulatorActor unpossible error: unrecognized sender $sndr") - } - } -} - -object MetadataBuilderRegulatorActor { - def props(serviceRegistryActor: ActorRef): Props = { - Props(new MetadataBuilderRegulatorActor(serviceRegistryActor)) - } -} diff --git a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala index 8b6e662eddc..819ef7a66ce 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala @@ -3,6 +3,7 @@ package cromwell.webservice.routes import java.util.UUID import akka.actor.{ActorRef, ActorRefFactory} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActorJsonFormatting.successfulResponseJsonFormatter import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.marshalling.ToResponseMarshallable import akka.http.scaladsl.model._ @@ -22,7 +23,7 @@ import cromwell.core.{path => _, _} import cromwell.engine.backend.BackendConfiguration import cromwell.engine.instrumentation.HttpInstrumentation import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{BuiltCallCacheDiffResponse, CachedCallNotFoundException, CallCacheDiffActorResponse, FailedCallCacheDiffResponse} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{CachedCallNotFoundException, CallCacheDiffActorResponse, FailedCallCacheDiffResponse, SuccessfulCallCacheDiffResponse} import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCacheDiffActor, CallCacheDiffQueryParameter} import cromwell.engine.workflow.workflowstore.SqlWorkflowStore.NotInOnHoldStateException import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreEngineActor, WorkflowStoreSubmitActor} @@ -30,7 +31,7 @@ import cromwell.server.CromwellShutdown import cromwell.services.healthmonitor.ProtoHealthMonitorServiceActor.{GetCurrentStatus, StatusCheckResponse} import cromwell.services.metadata.MetadataService._ import cromwell.webservice._ -import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} import cromwell.webservice.WorkflowJsonSupport._ import cromwell.webservice.WebServiceUtils import cromwell.webservice.WebServiceUtils.EnhancedThrowable @@ -88,7 +89,7 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w case Valid(queryParameter) => val diffActor = actorRefFactory.actorOf(CallCacheDiffActor.props(serviceRegistryActor), "CallCacheDiffActor-" + UUID.randomUUID()) onComplete(diffActor.ask(queryParameter).mapTo[CallCacheDiffActorResponse]) { - case Success(r: BuiltCallCacheDiffResponse) => complete(r.response) + case Success(r: SuccessfulCallCacheDiffResponse) => complete(r) case Success(r: FailedCallCacheDiffResponse) => r.reason.errorRequest(StatusCodes.InternalServerError) case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse case Failure(e: CachedCallNotFoundException) => e.errorRequest(StatusCodes.NotFound) @@ -161,19 +162,19 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w val includeKeys = NonEmptyList.of("start", "end", "executionStatus", "executionEvents", "subWorkflowMetadata") val readMetadataRequest = (w: WorkflowId) => GetSingleWorkflowMetadataAction(w, Option(includeKeys), None, expandSubWorkflows = true) - metadataBuilderRegulatorActor.ask(readMetadataRequest(workflowId)).mapTo[MetadataBuilderActorResponse] + serviceRegistryActor.ask(readMetadataRequest(workflowId)).mapTo[MetadataBuilderActorResponse] } private def completeTimingRouteResponse(metadataResponse: Future[MetadataBuilderActorResponse]) = { onComplete(metadataResponse) { - case Success(r: BuiltMetadataResponse) => { + case Success(r: BuiltMetadataResponse) => + Try(Source.fromResource("workflowTimings/workflowTimings.html").mkString) match { case Success(wfTimingsContent) => - val response = HttpResponse(entity = wfTimingsContent.replace("\"{{REPLACE_THIS_WITH_METADATA}}\"", r.response.toString)) + val response = HttpResponse(entity = wfTimingsContent.replace("\"{{REPLACE_THIS_WITH_METADATA}}\"", r.responseJson.toString)) complete(response.withEntity(response.entity.withContentType(`text/html(UTF-8)`))) case Failure(e) => completeResponse(StatusCodes.InternalServerError, APIResponse.fail(new RuntimeException("Error while loading workflowTimings.html", e)), Seq.empty) } - } case Success(r: FailedMetadataResponse) => r.reason.errorRequest(StatusCodes.InternalServerError) case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) diff --git a/engine/src/main/scala/cromwell/webservice/routes/MetadataRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/MetadataRouteSupport.scala index 96a87d14844..0b9e1009300 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/MetadataRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/MetadataRouteSupport.scala @@ -16,10 +16,9 @@ import cromwell.core.{WorkflowId, path => _} import cromwell.engine.instrumentation.HttpInstrumentation import cromwell.server.CromwellShutdown import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} import cromwell.webservice.LabelsManagerActor import cromwell.webservice.LabelsManagerActor._ -import cromwell.webservice.metadata.MetadataBuilderRegulatorActor -import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} import cromwell.webservice.routes.CromwellApiService.{InvalidWorkflowException, UnrecognizedWorkflowException, serviceShuttingDownResponse, validateWorkflowIdInMetadata, validateWorkflowIdInMetadataSummaries} import cromwell.webservice.routes.MetadataRouteSupport._ import cromwell.webservice.WebServiceUtils.EnhancedThrowable @@ -37,27 +36,25 @@ trait MetadataRouteSupport extends HttpInstrumentation { implicit val timeout: Timeout - lazy val metadataBuilderRegulatorActor = actorRefFactory.actorOf(MetadataBuilderRegulatorActor.props(serviceRegistryActor)) - val metadataRoutes = concat( path("workflows" / Segment / Segment / "status") { (_, possibleWorkflowId) => get { instrumentRequest { - metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetStatus(w), serviceRegistryActor, metadataBuilderRegulatorActor) + metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetStatus(w), serviceRegistryActor) } } }, path("workflows" / Segment / Segment / "outputs") { (_, possibleWorkflowId) => get { instrumentRequest { - metadataLookup(possibleWorkflowId, (w: WorkflowId) => WorkflowOutputs(w), serviceRegistryActor, metadataBuilderRegulatorActor) + metadataLookup(possibleWorkflowId, (w: WorkflowId) => WorkflowOutputs(w), serviceRegistryActor) } } }, path("workflows" / Segment / Segment / "logs") { (_, possibleWorkflowId) => get { instrumentRequest { - metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetLogs(w), serviceRegistryActor, metadataBuilderRegulatorActor) + metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetLogs(w), serviceRegistryActor) } } }, @@ -71,8 +68,7 @@ trait MetadataRouteSupport extends HttpInstrumentation { metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetSingleWorkflowMetadataAction(w, includeKeysOption, excludeKeysOption, expandSubWorkflows), - serviceRegistryActor, - metadataBuilderRegulatorActor) + serviceRegistryActor) } } } @@ -81,7 +77,7 @@ trait MetadataRouteSupport extends HttpInstrumentation { concat( get { instrumentRequest { - metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetLabels(w), serviceRegistryActor, metadataBuilderRegulatorActor) + metadataLookup(possibleWorkflowId, (w: WorkflowId) => GetLabels(w), serviceRegistryActor) } }, patch { @@ -131,12 +127,11 @@ trait MetadataRouteSupport extends HttpInstrumentation { object MetadataRouteSupport { def metadataLookup(possibleWorkflowId: String, - request: WorkflowId => ReadAction, - serviceRegistryActor: ActorRef, - metadataBuilderRegulatorActor: ActorRef) + request: WorkflowId => MetadataReadAction, + serviceRegistryActor: ActorRef) (implicit timeout: Timeout, ec: ExecutionContext): Route = { - completeMetadataBuilderResponse(metadataBuilderActorRequest(possibleWorkflowId, request, serviceRegistryActor, metadataBuilderRegulatorActor)) + completeMetadataBuilderResponse(metadataBuilderActorRequest(possibleWorkflowId, request, serviceRegistryActor)) } def queryMetadata(parameters: Seq[(String, String)], @@ -145,17 +140,16 @@ object MetadataRouteSupport { } def metadataBuilderActorRequest(possibleWorkflowId: String, - request: WorkflowId => ReadAction, - serviceRegistryActor: ActorRef, - metadataBuilderRegulatorActor: ActorRef) + request: WorkflowId => MetadataReadAction, + serviceRegistryActor: ActorRef) (implicit timeout: Timeout, ec: ExecutionContext): Future[MetadataBuilderActorResponse] = { - validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor) flatMap { w => metadataBuilderRegulatorActor.ask(request(w)).mapTo[MetadataBuilderActorResponse] } + validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor) flatMap { w => serviceRegistryActor.ask(request(w)).mapTo[MetadataBuilderActorResponse] } } def completeMetadataBuilderResponse(response: Future[MetadataBuilderActorResponse]): Route = { onComplete(response) { - case Success(r: BuiltMetadataResponse) => complete(r.response) + case Success(r: BuiltMetadataResponse) => complete(r.responseJson) case Success(r: FailedMetadataResponse) => r.reason.errorRequest(StatusCodes.InternalServerError) case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse case Failure(e: UnrecognizedWorkflowException) => e.failRequest(StatusCodes.NotFound) @@ -167,7 +161,7 @@ object MetadataRouteSupport { def metadataQueryRequest(parameters: Seq[(String, String)], serviceRegistryActor: ActorRef)(implicit timeout: Timeout): Future[MetadataQueryResponse] = { - serviceRegistryActor.ask(WorkflowQuery(parameters)).mapTo[MetadataQueryResponse] + serviceRegistryActor.ask(QueryForWorkflowsMatchingParameters(parameters)).mapTo[MetadataQueryResponse] } def completeMetadataQueryResponse(response: Future[MetadataQueryResponse]): Route = { diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala index a7bfd1709ec..4f6f53ed4e5 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala @@ -6,7 +6,7 @@ import akka.http.scaladsl.server.Route import akka.pattern.{AskTimeoutException, ask} import akka.util.Timeout import cromwell.engine.instrumentation.HttpInstrumentation -import cromwell.services.metadata.MetadataService.{GetStatus, MetadataServiceResponse, StatusLookupFailed, StatusLookupResponse} +import cromwell.services.metadata.MetadataService.{GetStatus, MetadataServiceResponse, StatusLookupFailed} import cromwell.webservice.routes.CromwellApiService.{UnrecognizedWorkflowException, validateWorkflowIdInMetadata} import cromwell.webservice.WebServiceUtils.EnhancedThrowable @@ -19,6 +19,7 @@ import WesRouteSupport._ import cromwell.core.abort.SuccessfulAbortResponse import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException import cromwell.server.CromwellShutdown +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.BuiltMetadataResponse import cromwell.webservice.routes.CromwellApiService trait WesRouteSupport extends HttpInstrumentation { @@ -56,9 +57,9 @@ trait WesRouteSupport extends HttpInstrumentation { val response = validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor).flatMap(w => serviceRegistryActor.ask(GetStatus(w)).mapTo[MetadataServiceResponse]) // WES can also return a 401 or a 403 but that requires user auth knowledge which Cromwell doesn't currently have onComplete(response) { - case Success(s: StatusLookupResponse) => - val wesState = WesState.fromCromwellStatus(s.status) - complete(WesRunStatus(s.workflowId.toString, wesState)) + case Success(BuiltMetadataResponse(_, jsObject)) => + val wesState = WesState.fromCromwellStatusJson(jsObject) + complete(WesRunStatus(possibleWorkflowId, wesState)) case Success(r: StatusLookupFailed) => r.reason.errorRequest(StatusCodes.InternalServerError) case Success(m: MetadataServiceResponse) => // This should never happen, but .... diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesState.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesState.scala index 15799ca5a53..3f468156a50 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesState.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesState.scala @@ -2,7 +2,7 @@ package cromwell.webservice.routes.wes import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport import cromwell.core._ -import spray.json.{DefaultJsonProtocol, JsString, JsValue, RootJsonFormat} +import spray.json.{DefaultJsonProtocol, JsObject, JsString, JsValue, RootJsonFormat} object WesState { sealed trait WesState extends Product with Serializable { val name: String } @@ -30,6 +30,15 @@ object WesState { } } + def fromCromwellStatusJson(jsonResponse: JsObject): WesState = { + + val statusString = jsonResponse.fields.get("status").collect { + case str: JsString => str.value + }.getOrElse(throw new IllegalArgumentException(s"Could not coerce Cromwell status response ${jsonResponse.compactPrint} into a valid WES status")) + + fromCromwellStatus(WorkflowState.withName(statusString)) + } + def fromString(status: String): WesState = { status match { case Unknown.name => Unknown diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala index d66072e04e5..0030e3043e0 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala @@ -1,17 +1,20 @@ package cromwell.engine.workflow.lifecycle +import cats.data.NonEmptyList import cats.data.Validated.{Invalid, Valid} import com.typesafe.config.{Config, ConfigFactory} import cromwell.core.WorkflowOptions import cromwell.core.callcaching.CallCachingMode import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor +import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{Assertion, FlatSpec, Matchers} import scala.collection.JavaConverters._ import scala.util.{Success, Try} -class CachingConfigSpec extends FlatSpec with Matchers { +class CachingConfigSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { + val applicationConfig = ConfigFactory.load val defaultConfig = Map("backend.backend" -> "local") val configWithCallCachingOn = defaultConfig + ("call-caching.enabled" -> "true") val configWithCallCachingOff = defaultConfig + ("call-caching.enabled" -> "false") @@ -21,6 +24,10 @@ class CachingConfigSpec extends FlatSpec with Matchers { val configs = Seq(defaultConfig, configWithCallCachingOn, configWithCallCachingOff) val options = Seq(None, Some(true), Some(false)) + private def makeConfig(configMap: Map[String, String]): Config = { + ConfigFactory.parseMap(configMap.asJava).withFallback(applicationConfig) + } + def makeOptions(writeOpt: Option[Boolean], readOpt: Option[Boolean]) = { val writeValue = writeOpt map { v => s""""write_to_cache": $v""" } val readValue = readOpt map { v => s""""read_from_cache": $v""" } @@ -33,21 +40,21 @@ class CachingConfigSpec extends FlatSpec with Matchers { config <- configs writeOption <- options readOption <- options - } yield (ConfigFactory.parseMap(config.asJava), makeOptions(writeOption, readOption))).toSet + } yield (makeConfig(config), makeOptions(writeOption, readOption))).toSet // writeCache is ON when config is ON and write_to_cache is None or true val writeCacheOnCombinations = (for { config <- configs if config == configWithCallCachingOn writeOption <- options if writeOption.isEmpty || writeOption.get readOption <- options - } yield (ConfigFactory.parseMap(config.asJava), makeOptions(writeOption, readOption))).toSet + } yield (makeConfig(config), makeOptions(writeOption, readOption))).toSet // readCache is ON when config is ON and read_from_cache is None or true val readCacheOnCombinations = (for { config <- configs if config == configWithCallCachingOn writeOption <- options readOption <- options if readOption.isEmpty || readOption.get - } yield (ConfigFactory.parseMap(config.asJava), makeOptions(writeOption, readOption))).toSet + } yield (makeConfig(config), makeOptions(writeOption, readOption))).toSet val writeCacheOffCombinations = allCombinations -- writeCacheOnCombinations val readCacheOffCombinations = allCombinations -- readCacheOnCombinations @@ -72,4 +79,47 @@ class CachingConfigSpec extends FlatSpec with Matchers { } } } + + it should "run invalid config tests" in { + val invalidConfigTests = Table( + ("config", "workflowOptions", "exceptionMessage"), + ( + "enabled:not-a-boolean", + WorkflowOptions.empty, + NonEmptyList.of("String: 2: enabled has type STRING rather than BOOLEAN"), + ), + ( + """|enabled:not-a-boolean + |invalidate-bad-cache-results:not-a-boolean + |""".stripMargin, + WorkflowOptions.empty, + NonEmptyList.of("String: 2: enabled has type STRING rather than BOOLEAN"), + ), + ( + "invalidate-bad-cache-results:not-a-boolean", + WorkflowOptions.empty, + NonEmptyList.of("String: 2: invalidate-bad-cache-results has type STRING rather than BOOLEAN"), + ), + ( + "", + WorkflowOptions.fromMap(Map( + "call_cache_hit_path_prefixes" -> "not-an-array" + )).get, + NonEmptyList.of("""Unsupported JsValue as JsArray: "not-an-array""""), + ), + ) + + forEvery(invalidConfigTests) { (config, workflowOptions, expectedErrors) => + val rootConfig = ConfigFactory.parseString( + s"""|call-caching { + |$config + |} + |""".stripMargin + ).withFallback(makeConfig(configWithCallCachingOn)) + MaterializeWorkflowDescriptorActor.validateCallCachingMode(workflowOptions, rootConfig) match { + case Valid(_) => fail("Config should not have been valid.") + case Invalid(actualErrors) => actualErrors should be(expectedErrors) + } + } + } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala index 60f0db22e42..26e3ff269d5 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala @@ -72,4 +72,30 @@ object ExecutionStoreBenchmark extends Bench[Double] with DefaultJsonProtocol { } } } + + performance of "CommandCallNode" in { + + measure method "default toString" in { + + val sizes = Gen.range("size")(from = 10000, upto = 10000, hop = 10000) + using(sizes) in { size => + 1 to size foreach { _ => + (scatterCall.toString, prepareCall.toString) + } + } + + } + + measure method "simple toString" in { + + val sizes = Gen.range("size")(from = 10000, upto = 10000, hop = 10000) + using(sizes) in { size => + 1 to size foreach { _ => + (scatterCall.identifier.fullyQualifiedName, prepareCall.identifier.fullyQualifiedName) + } + } + + } + + } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala index ac35f6d017e..3b3c6fd6e4b 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala @@ -5,10 +5,13 @@ import cats.data.NonEmptyList import cromwell.core._ import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor._ import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffQueryParameter.CallCacheDiffQueryCall -import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse, MetadataServiceKeyLookupFailed} -import cromwell.services.metadata._ +import cromwell.services.metadata.MetadataService.GetMetadataAction +import cromwell.services.metadata.{MetadataService, _} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse} import org.scalatest.concurrent.Eventually import org.scalatest.{FlatSpecLike, Matchers} +import spray.json.JsObject class CallCacheDiffActorSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender with Eventually { @@ -19,70 +22,77 @@ class CallCacheDiffActorSpec extends TestKitSuite with FlatSpecLike with Matcher val callFqnA = "callFqnA" val callFqnB = "callFqnB" - + val metadataJobKeyA = Option(MetadataJobKey(callFqnA, Option(1), 1)) val metadataJobKeyB = Option(MetadataJobKey(callFqnB, None, 1)) - + val callA = CallCacheDiffQueryCall(workflowIdA, callFqnA, Option(1)) val callB = CallCacheDiffQueryCall(workflowIdB, callFqnB, None) val queryA = MetadataQuery( - workflowIdA, - Option(MetadataQueryJobKey(callFqnA, Option(1), None)), - None, - Option(NonEmptyList.of("callCaching", "executionStatus")), - None, + workflowId = workflowIdA, + jobKey = Option(MetadataQueryJobKey(callFqnA, Option(1), None)), + key = None, + includeKeysOption = Option(NonEmptyList.of("callCaching", "executionStatus")), + excludeKeysOption = Option(NonEmptyList.of("callCaching:hitFailures")), expandSubWorkflows = false ) val queryB = MetadataQuery( - workflowIdB, - Option(MetadataQueryJobKey(callFqnB, None, None)), - None, - Option(NonEmptyList.of("callCaching", "executionStatus")), - None, + workflowId = workflowIdB, + jobKey = Option(MetadataQueryJobKey(callFqnB, None, None)), + key = None, + includeKeysOption = Option(NonEmptyList.of("callCaching", "executionStatus")), + excludeKeysOption = Option(NonEmptyList.of("callCaching:hitFailures")), expandSubWorkflows = false ) - + val eventsA = List( MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "executionStatus"), MetadataValue("Done")), MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:allowResultReuse"), MetadataValue(true)), - MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in only in A"), MetadataValue("hello")), - MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with same value"), MetadataValue(1)), - MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with different value"), MetadataValue("I'm the hash for A !")) + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in only in A"), MetadataValue("hello from A")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in A and B with same value"), MetadataValue("we are thinking the same thought")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in A and B with different value"), MetadataValue("I'm the hash for A !")) ) + val workflowMetadataA: JsObject = MetadataBuilderActor.workflowMetadataResponse(workflowIdA, eventsA, includeCallsIfEmpty = false, Map.empty) + val responseForA = BuiltMetadataResponse(MetadataService.GetMetadataAction(queryA), workflowMetadataA) val eventsB = List( MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "executionStatus"), MetadataValue("Failed")), MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:allowResultReuse"), MetadataValue(false)), - MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes: hash in only in B"), MetadataValue("hello")), - MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes: hash in A and B with same value"), MetadataValue(1)), - MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with different value"), MetadataValue("I'm the hash for B !")) + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes:hash in only in B"), MetadataValue("hello from B")), + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes:hash in A and B with same value"), MetadataValue("we are thinking the same thought")), + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes:hash in A and B with different value"), MetadataValue("I'm the hash for B !")) ) - + val workflowMetadataB: JsObject = MetadataBuilderActor.workflowMetadataResponse(workflowIdB, eventsB, includeCallsIfEmpty = false, Map.empty) + val responseForB = BuiltMetadataResponse(MetadataService.GetMetadataAction(queryB), workflowMetadataB) + it should "send correct queries to MetadataService when receiving a CallCacheDiffRequest" in { val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) actor ! CallCacheDiffQueryParameter(callA, callB) - mockServiceRegistryActor.expectMsg(GetMetadataQueryAction(queryA)) - mockServiceRegistryActor.expectMsg(GetMetadataQueryAction(queryB)) + mockServiceRegistryActor.expectMsg(GetMetadataAction(queryA)) + mockServiceRegistryActor.expectMsg(GetMetadataAction(queryB)) + + system.stop(actor) } it should "save response for callA and wait for callB" in { val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) - + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - val response = MetadataLookupResponse(queryA, eventsA) - actor ! response - + actor ! responseForA + eventually { - actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, Some(response), None, self) + actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, Some(WorkflowMetadataJson(workflowMetadataA)), None, self) actor.stateName shouldBe WaitingForMetadata } + + system.stop(actor) } it should "save response for callB and wait for callA" in { @@ -91,24 +101,24 @@ class CallCacheDiffActorSpec extends TestKitSuite with FlatSpecLike with Matcher actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - val response = MetadataLookupResponse(queryB, eventsB) - actor ! response + actor ! responseForB eventually { - actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, None, Some(response), self) + actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, None, Some(WorkflowMetadataJson(workflowMetadataB)), self) actor.stateName shouldBe WaitingForMetadata } + + system.stop(actor) } it should "build the response when receiving response for A and already has B" in { val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val responseB = MetadataLookupResponse(queryB, eventsB) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Some(WorkflowMetadataJson(workflowMetadataB)), self)) - actor ! MetadataLookupResponse(queryA, eventsA) + actor ! responseForA expectMsgClass(classOf[CallCacheDiffActorResponse]) expectTerminated(actor) @@ -118,150 +128,192 @@ class CallCacheDiffActorSpec extends TestKitSuite with FlatSpecLike with Matcher val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val responseA = MetadataLookupResponse(queryA, eventsA) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, Option(responseA), None, self)) + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, Some(WorkflowMetadataJson(workflowMetadataA)), None, self)) - actor ! MetadataLookupResponse(queryB, eventsB) + actor ! responseForB expectMsgClass(classOf[CallCacheDiffActorResponse]) expectTerminated(actor) } - it should "build a correct response" in { - import cromwell.services.metadata.MetadataService.MetadataLookupResponse + val correctCallCacheDiff = { import spray.json._ - - val mockServiceRegistryActor = TestProbe() - val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) - watch(actor) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - actor ! MetadataLookupResponse(queryB, eventsB) - actor ! MetadataLookupResponse(queryA, eventsA) - - val expectedJson: JsObject = - s""" - |{ - | "callA":{ - | "executionStatus": "Done", - | "allowResultReuse": true, - | "callFqn": "callFqnA", - | "jobIndex": 1, - | "workflowId": "971652a6-139c-4ef3-96b5-aeb611a40dbf" - | }, - | "callB":{ - | "executionStatus": "Failed", - | "allowResultReuse": false, - | "callFqn": "callFqnB", - | "jobIndex": -1, - | "workflowId": "bb85b3ec-e179-4f12-b90f-5191216da598" - | }, - | "hashDifferential":[ - | { - | "hashKey": "hash in only in A", - | "callA":"hello", - | "callB":null - | }, - | { - | "hashKey": "hash in A and B with different value", - | "callA":"I'm the hash for A !", - | "callB":"I'm the hash for B !" - | }, - | { - | "hashKey": "hash in only in B", - | "callA":null, - | "callB":"hello" - | } - | ] - |} + s""" + |{ + | "callA":{ + | "executionStatus": "Done", + | "allowResultReuse": true, + | "callFqn": "callFqnA", + | "jobIndex": 1, + | "workflowId": "971652a6-139c-4ef3-96b5-aeb611a40dbf" + | }, + | "callB":{ + | "executionStatus": "Failed", + | "allowResultReuse": false, + | "callFqn": "callFqnB", + | "jobIndex": -1, + | "workflowId": "bb85b3ec-e179-4f12-b90f-5191216da598" + | }, + | "hashDifferential":[ + | { + | "hashKey": "hash in only in A", + | "callA":"hello from A", + | "callB":null + | }, + | { + | "hashKey": "hash in A and B with different value", + | "callA":"I'm the hash for A !", + | "callB":"I'm the hash for B !" + | }, + | { + | "hashKey": "hash in only in B", + | "callA":null, + | "callB":"hello from B" + | } + | ] + |} """.stripMargin.parseJson.asJsObject - - val expectedResponse = BuiltCallCacheDiffResponse(expectedJson) - - expectMsg(expectedResponse) - expectTerminated(actor) } - it should "fail properly" in { - import scala.concurrent.duration._ - import scala.language.postfixOps + it should "build a correct response" in { + import spray.json._ + import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActorJsonFormatting.successfulResponseJsonFormatter val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val exception = new Exception("Query lookup failed - but it's ok ! this is a test !") - val responseA = MetadataServiceKeyLookupFailed(queryA, exception) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - actor ! responseA - - expectMsgPF(1 second) { - case FailedCallCacheDiffResponse(e: Throwable) => - e.getMessage shouldBe "Query lookup failed - but it's ok ! this is a test !" + actor ! responseForB + actor ! responseForA + + expectMsgPF() { + case r: SuccessfulCallCacheDiffResponse => + withClue(s""" + |Expected: + |${correctCallCacheDiff.prettyPrint} + | + |Actual: + |${r.toJson.prettyPrint}""".stripMargin) { + r.toJson should be(correctCallCacheDiff) + } + case other => fail(s"Expected SuccessfulCallCacheDiffResponse but got $other") } - expectTerminated(actor) } - it should "Respond with an appropriate message if hashes are missing" in { - import scala.concurrent.duration._ - import scala.language.postfixOps + it should "build a correct response from multiple attempts" in { + import spray.json._ + import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActorJsonFormatting.successfulResponseJsonFormatter val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val responseB = MetadataLookupResponse(queryB, eventsB.filterNot(_.key.key.contains("hashes"))) - - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) - - actor ! MetadataLookupResponse(queryA, eventsA.filterNot(_.key.key.contains("hashes"))) + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - expectMsgPF(1 second) { - case FailedCallCacheDiffResponse(e) => - e.getMessage shouldBe "callA and callB have not finished yet, or were run on a previous version of Cromwell on which this endpoint was not supported." + // Create a set of "failed" events for attempt 1: + val eventsAAttempt1 = List( + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "executionStatus"), MetadataValue("Failed")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:allowResultReuse"), MetadataValue(false)), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in only in A"), MetadataValue("ouch!")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in A and B with same value"), MetadataValue("ouch!")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes:hash in A and B with different value"), MetadataValue("ouch!")) + ) + // And update the old "eventsA" to represent attempt 2: + val eventsAAttempt2 = eventsA.map(event => event.copy(key = event.key.copy(jobKey = event.key.jobKey.map(_.copy(attempt = 2))))) + + val modifiedEventsA = eventsAAttempt1 ++ eventsAAttempt2 + + val workflowMetadataA: JsObject = MetadataBuilderActor.workflowMetadataResponse(workflowIdA, modifiedEventsA, includeCallsIfEmpty = false, Map.empty) + val responseForA = BuiltMetadataResponse(MetadataService.GetMetadataAction(queryA), workflowMetadataA) + + + actor ! responseForB + actor ! responseForA + + expectMsgPF() { + case r: SuccessfulCallCacheDiffResponse => + withClue(s""" + |Expected: + |${correctCallCacheDiff.prettyPrint} + | + |Actual: + |${r.toJson.prettyPrint}""".stripMargin) { + r.toJson should be(correctCallCacheDiff) + } + case other => + expectTerminated(actor) + fail(s"Expected SuccessfulCallCacheDiffResponse but got $other") } + expectTerminated(actor) } - it should "Respond with CachedCallNotFoundException if a call is missing" in { + it should "fail properly" in { import scala.concurrent.duration._ import scala.language.postfixOps val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val responseB = MetadataLookupResponse(queryB, eventsB.filterNot(_.key.key.contains("hashes"))) + val exception = new Exception("Query lookup failed - but it's ok ! this is a test !") + val responseA = FailedMetadataResponse(GetMetadataAction(queryA), exception) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - actor ! MetadataLookupResponse(queryA, List.empty) + actor ! responseA expectMsgPF(1 second) { - case FailedCallCacheDiffResponse(e) => - e.getMessage shouldBe "Cannot find call 971652a6-139c-4ef3-96b5-aeb611a40dbf:callFqnA:1" + case FailedCallCacheDiffResponse(e: Throwable) => + e.getMessage shouldBe "Query lookup failed - but it's ok ! this is a test !" } + expectTerminated(actor) } - it should "Respond with CachedCallNotFoundException if both calls are missing" in { + it should "respond with an appropriate error if calls' hashes are missing" in { + testExpectedErrorForModifiedMetadata( + metadataFilter = _.key.key.contains("hashes"), + error = s"""Failed to calculate diff for call A and call B: + |Failed to extract relevant metadata for call A (971652a6-139c-4ef3-96b5-aeb611a40dbf / callFqnA:1) (reason 1 of 1): No 'hashes' field found + |Failed to extract relevant metadata for call B (bb85b3ec-e179-4f12-b90f-5191216da598 / callFqnB:-1) (reason 1 of 1): No 'hashes' field found""".stripMargin + ) + } + + it should "respond with an appropriate error if both calls are missing" in { + testExpectedErrorForModifiedMetadata( + metadataFilter = _.key.jobKey.nonEmpty, + error = s"""Failed to calculate diff for call A and call B: + |Failed to extract relevant metadata for call A (971652a6-139c-4ef3-96b5-aeb611a40dbf / callFqnA:1) (reason 1 of 1): No 'calls' field found + |Failed to extract relevant metadata for call B (bb85b3ec-e179-4f12-b90f-5191216da598 / callFqnB:-1) (reason 1 of 1): No 'calls' field found""".stripMargin + ) + } + + def testExpectedErrorForModifiedMetadata(metadataFilter: MetadataEvent => Boolean, error: String) = { import scala.concurrent.duration._ import scala.language.postfixOps val mockServiceRegistryActor = TestProbe() val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) watch(actor) - val responseB = MetadataLookupResponse(queryB, List.empty) - actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + def getModifiedResponse(workflowId: WorkflowId, query: MetadataQuery, events: Seq[MetadataEvent]): BuiltMetadataResponse = { + val modifiedEvents = events.filterNot(metadataFilter) // filters out any "call" level metadata + val modifiedWorkflowMetadata = MetadataBuilderActor.workflowMetadataResponse(workflowId, modifiedEvents, includeCallsIfEmpty = false, Map.empty) + BuiltMetadataResponse(MetadataService.GetMetadataAction(query), modifiedWorkflowMetadata) + } + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) - actor ! MetadataLookupResponse(queryA, List.empty) + actor ! getModifiedResponse(workflowIdA, queryA, eventsA) + actor ! getModifiedResponse(workflowIdB, queryB, eventsB) expectMsgPF(1 second) { - case FailedCallCacheDiffResponse(e) => - e.getMessage shouldBe "Cannot find calls 971652a6-139c-4ef3-96b5-aeb611a40dbf:callFqnA:1, bb85b3ec-e179-4f12-b90f-5191216da598:callFqnB:-1" + case FailedCallCacheDiffResponse(e) => e.getMessage shouldBe error } expectTerminated(actor) } + } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala index bcd996a40b4..e3f823dfcda 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala @@ -30,7 +30,7 @@ class CallCachingSlickDatabaseSpec ) DatabaseSystem.All foreach { databaseSystem => - behavior of s"CallCachingSlickDatabase on ${databaseSystem.shortName}" + behavior of s"CallCachingSlickDatabase on ${databaseSystem.name}" lazy val dataAccess = DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem) @@ -125,5 +125,9 @@ class CallCachingSlickDatabaseSpec } } + + it should "close the database" taggedAs DbmsTest in { + dataAccess.close() + } } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStoreSpec.scala index c32ca48fc85..2e9093e4bfe 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStoreSpec.scala @@ -24,7 +24,7 @@ class SqlWorkflowStoreSpec extends FlatSpec with Matchers with ScalaFutures with DatabaseSystem.All foreach { databaseSystem => - behavior of s"SqlWorkflowStore on ${databaseSystem.shortName}" + behavior of s"SqlWorkflowStore on ${databaseSystem.name}" it should "honor the onHold flag" taggedAs DbmsTest in { runWithDatabase(databaseSystem) { workflowStore => diff --git a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala index 5306fffa8f1..694e4f9d45d 100644 --- a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala @@ -9,12 +9,13 @@ import akka.util.Timeout import cromwell.core._ import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ -import cromwell.webservice.metadata.MetadataBuilderActor -import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, MetadataBuilderActorResponse} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, MetadataBuilderActorResponse} import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{Assertion, AsyncFlatSpecLike, Matchers, Succeeded} import org.specs2.mock.Mockito import spray.json._ +import cromwell.util.AkkaTestUtil.EnhancedTestProbe import scala.concurrent.Future import scala.concurrent.duration._ @@ -33,13 +34,16 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp queryReply: MetadataQuery, events: Seq[MetadataEvent], expectedRes: String): Future[Assertion] = { - val mockServiceRegistry = TestProbe() - val mba = system.actorOf(MetadataBuilderActor.props(mockServiceRegistry.ref)) + val mockReadMetadataWorkerActor = TestProbe() + def readMetadataWorkerMaker = () => mockReadMetadataWorkerActor.props + + + val mba = system.actorOf(MetadataBuilderActor.props(readMetadataWorkerMaker)) val response = mba.ask(action).mapTo[MetadataBuilderActorResponse] - mockServiceRegistry.expectMsg(defaultTimeout, action) - mockServiceRegistry.reply(MetadataLookupResponse(queryReply, events)) + mockReadMetadataWorkerActor.expectMsg(defaultTimeout, action) + mockReadMetadataWorkerActor.reply(MetadataLookupResponse(queryReply, events)) response map { r => r shouldBe a [BuiltMetadataResponse] } - response.mapTo[BuiltMetadataResponse] map { b => b.response shouldBe expectedRes.parseJson} + response.mapTo[BuiltMetadataResponse] map { b => b.responseJson shouldBe expectedRes.parseJson} } it should "build workflow scope tree from metadata events" in { @@ -96,7 +100,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp |}""".stripMargin val mdQuery = MetadataQuery(workflowA, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, workflowAEvents, expectedRes) } @@ -350,7 +354,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp """.stripMargin val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -371,7 +375,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp """.stripMargin val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -391,14 +395,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp """.stripMargin val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } it should "render empty Json" in { val workflowId = WorkflowId.randomId() val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) val expectedEmptyResponse = """{}""" assertMetadataResponse(queryAction, mdQuery, List.empty, expectedEmptyResponse) } @@ -428,7 +432,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp """.stripMargin val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - val queryAction = GetMetadataQueryAction(mdQuery) + val queryAction = GetMetadataAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, emptyEvents, expectedEmptyResponse) val expectedNonEmptyResponse = @@ -456,20 +460,22 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp ) val mainQuery = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = true) - val mainQueryAction = GetMetadataQueryAction(mainQuery) + val mainQueryAction = GetMetadataAction(mainQuery) val subQuery = MetadataQuery(subWorkflowId, None, None, None, None, expandSubWorkflows = true) - val subQueryAction = GetMetadataQueryAction(subQuery) + val subQueryAction = GetMetadataAction(subQuery) val parentProbe = TestProbe() - val mockServiceRegistry = TestProbe() - val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + val mockReadMetadataWorkerActor = TestProbe() + def readMetadataWorkerMaker = () => mockReadMetadataWorkerActor.props + + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(readMetadataWorkerMaker), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") val response = metadataBuilder.ask(mainQueryAction).mapTo[MetadataBuilderActorResponse] - mockServiceRegistry.expectMsg(defaultTimeout, mainQueryAction) - mockServiceRegistry.reply(MetadataLookupResponse(mainQuery, mainEvents)) - mockServiceRegistry.expectMsg(defaultTimeout, subQueryAction) - mockServiceRegistry.reply(MetadataLookupResponse(subQuery, subEvents)) + mockReadMetadataWorkerActor.expectMsg(defaultTimeout, mainQueryAction) + mockReadMetadataWorkerActor.reply(MetadataLookupResponse(mainQuery, mainEvents)) + mockReadMetadataWorkerActor.expectMsg(defaultTimeout, subQueryAction) + mockReadMetadataWorkerActor.reply(MetadataLookupResponse(subQuery, subEvents)) val expandedRes = s""" @@ -493,7 +499,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp response map { r => r shouldBe a [BuiltMetadataResponse] } val bmr = response.mapTo[BuiltMetadataResponse] - bmr map { b => b.response shouldBe expandedRes.parseJson} + bmr map { b => b.responseJson shouldBe expandedRes.parseJson} } it should "NOT expand sub workflow metadata when NOT asked for" in { @@ -505,15 +511,17 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp ) val queryNoExpand = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = false) - val queryNoExpandAction = GetMetadataQueryAction(queryNoExpand) + val queryNoExpandAction = GetMetadataAction(queryNoExpand) val parentProbe = TestProbe() - val mockServiceRegistry = TestProbe() - val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + val mockReadMetadataWorkerActor = TestProbe() + def readMetadataWorkerMaker= () => mockReadMetadataWorkerActor.props + + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(readMetadataWorkerMaker), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") val response = metadataBuilder.ask(queryNoExpandAction).mapTo[MetadataBuilderActorResponse] - mockServiceRegistry.expectMsg(defaultTimeout, queryNoExpandAction) - mockServiceRegistry.reply(MetadataLookupResponse(queryNoExpand, mainEvents)) + mockReadMetadataWorkerActor.expectMsg(defaultTimeout, queryNoExpandAction) + mockReadMetadataWorkerActor.reply(MetadataLookupResponse(queryNoExpand, mainEvents)) val nonExpandedRes = @@ -534,7 +542,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSp response map { r => r shouldBe a [BuiltMetadataResponse] } val bmr = response.mapTo[BuiltMetadataResponse] - bmr map { b => b.response shouldBe nonExpandedRes.parseJson} + bmr map { b => b.responseJson shouldBe nonExpandedRes.parseJson} } diff --git a/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala index a805774e1a6..e819671e81b 100644 --- a/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala @@ -19,6 +19,8 @@ import cromwell.services.healthmonitor.ProtoHealthMonitorServiceActor.{GetCurren import cromwell.services.instrumentation.InstrumentationService.InstrumentationServiceMessage import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ +import cromwell.services.metadata.impl.builder.MetadataBuilderActor +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.BuiltMetadataResponse import cromwell.services.womtool.WomtoolServiceMessages.{DescribeFailure, DescribeRequest, DescribeSuccess} import cromwell.services.womtool.models.WorkflowDescription import cromwell.util.SampleWdl.HelloWorld @@ -543,11 +545,13 @@ object CromwellApiServiceSpec { ) } - def responseMetadataValues(workflowId: WorkflowId, withKeys: List[String], withoutKeys: List[String]) = { + def responseMetadataValues(workflowId: WorkflowId, withKeys: List[String], withoutKeys: List[String]): JsObject = { def keyFilter(keys: List[String])(m: MetadataEvent) = keys.exists(k => m.key.key.startsWith(k)) - fullMetadataResponse(workflowId) + val events = fullMetadataResponse(workflowId) .filter(m => withKeys.isEmpty || keyFilter(withKeys)(m)) .filter(m => withoutKeys.isEmpty || !keyFilter(withoutKeys)(m)) + + MetadataBuilderActor.workflowMetadataResponse(workflowId, events, includeCallsIfEmpty = false, Map.empty) } def metadataQuery(workflowId: WorkflowId) = @@ -565,7 +569,7 @@ object CromwellApiServiceSpec { import MockServiceRegistryActor._ override def receive = { - case WorkflowQuery(parameters) => + case QueryForWorkflowsMatchingParameters(parameters) => val labels: Option[Map[String, String]] = { parameters.contains(("additionalQueryResultFields", "labels")).option( Map("key1" -> "label1", "key2" -> "label2")) @@ -585,22 +589,28 @@ object CromwellApiServiceSpec { ok = true, systems = Map( "Engine Database" -> SubsystemStatus(ok = true, messages = None))) - case GetStatus(id) if id == OnHoldWorkflowId => sender ! StatusLookupResponse(id, WorkflowOnHold) - case GetStatus(id) if id == RunningWorkflowId => sender ! StatusLookupResponse(id, WorkflowRunning) - case GetStatus(id) if id == AbortingWorkflowId => sender ! StatusLookupResponse(id, WorkflowAborting) - case GetStatus(id) if id == AbortedWorkflowId => sender ! StatusLookupResponse(id, WorkflowAborted) - case GetStatus(id) if id == SucceededWorkflowId => sender ! StatusLookupResponse(id, WorkflowSucceeded) - case GetStatus(id) if id == FailedWorkflowId => sender ! StatusLookupResponse(id, WorkflowFailed) - case GetStatus(id) => sender ! StatusLookupResponse(id, WorkflowSubmitted) - case GetLabels(id) => sender ! LabelLookupResponse(id, Map("key1" -> "label1", "key2" -> "label2")) - case WorkflowOutputs(id) => + case request @ GetStatus(id) => + val status = id match { + case OnHoldWorkflowId => WorkflowOnHold + case RunningWorkflowId => WorkflowRunning + case AbortingWorkflowId => WorkflowAborting + case AbortedWorkflowId => WorkflowAborted + case SucceededWorkflowId => WorkflowSucceeded + case FailedWorkflowId => WorkflowFailed + case _ => WorkflowSubmitted + } + sender ! BuiltMetadataResponse(request, MetadataBuilderActor.processStatusResponse(id, status)) + case request @ GetLabels(id) => + sender ! BuiltMetadataResponse(request, MetadataBuilderActor.processLabelsResponse(id, Map("key1" -> "label1", "key2" -> "label2"))) + case request @ WorkflowOutputs(id) => val event = Vector(MetadataEvent(MetadataKey(id, None, "outputs:test.hello.salutation"), MetadataValue("Hello foo!", MetadataString))) - sender ! WorkflowOutputsResponse(id, event) - case GetLogs(id) => sender ! LogsResponse(id, logsEvents(id)) - case GetSingleWorkflowMetadataAction(id, withKeys, withoutKeys, _) => + sender ! BuiltMetadataResponse(request, MetadataBuilderActor.processOutputsResponse(id, event)) + case request @ GetLogs(id) => + sender ! BuiltMetadataResponse(request, MetadataBuilderActor.workflowMetadataResponse(id, logsEvents(id), includeCallsIfEmpty = false, Map.empty)) + case request @ GetMetadataAction(MetadataQuery(id, _, _, withKeys, withoutKeys, _)) => val withKeysList = withKeys.map(_.toList).getOrElse(List.empty) val withoutKeysList = withoutKeys.map(_.toList).getOrElse(List.empty) - sender ! MetadataLookupResponse(metadataQuery(id), responseMetadataValues(id, withKeysList, withoutKeysList)) + sender ! BuiltMetadataResponse(request, responseMetadataValues(id, withKeysList, withoutKeysList)) case PutMetadataActionAndRespond(events, _, _) => events.head.key.workflowId match { case CromwellApiServiceSpec.ExistingWorkflowId => sender ! MetadataWriteSuccess(events) diff --git a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsPathBuilderFactory.scala b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsPathBuilderFactory.scala index 7d9cbbaeffa..4f93e3ec7c3 100644 --- a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsPathBuilderFactory.scala +++ b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsPathBuilderFactory.scala @@ -87,7 +87,7 @@ class DrsPathBuilderFactory(globalConfig: Config, instanceConfig: Config, single for { serviceAccountJson <- serviceAccountJsonIo //Currently, Martha only supports resolving DRS paths to GCS paths - url <- IO.fromEither(DrsResolver.extractUrlForScheme(marthaResponse.dos.data_object.urls, GcsScheme)) + url <- IO.fromEither(DrsResolver.extractUrlForScheme(marthaResponse.drs.data_object.urls, GcsScheme)) readableByteChannel <- inputReadChannel(url, GcsScheme, serviceAccountJson, requesterPaysProjectIdOption) } yield readableByteChannel } diff --git a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala index 875bb106998..f84cfc1eb90 100644 --- a/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala +++ b/filesystems/drs/src/main/scala/cromwell/filesystems/drs/DrsResolver.scala @@ -31,7 +31,7 @@ object DrsResolver { drsFileSystemProvider <- toIO(drsFileSystemProviderOption, noFileSystemForDrsError) marthaResponse <- drsFileSystemProvider.drsPathResolver.resolveDrsThroughMartha(drsPath.pathAsString) //Currently, Martha only supports resolving DRS paths to GCS paths - relativePath <- IO.fromEither(extractUrlForScheme(marthaResponse.dos.data_object.urls, GcsScheme)) + relativePath <- IO.fromEither(extractUrlForScheme(marthaResponse.drs.data_object.urls, GcsScheme)) .map(_.substring(urlProtocolLength(GcsScheme))) .handleErrorWith(e => IO.raiseError(new RuntimeException(s"Error while resolving DRS path: $drsPath. Error: ${ExceptionUtils.getMessage(e)}"))) } yield relativePath diff --git a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsPathBuilderSpec.scala b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsPathBuilderSpec.scala index ae8cfa442c1..ee93ff45581 100644 --- a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsPathBuilderSpec.scala +++ b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/DrsPathBuilderSpec.scala @@ -28,12 +28,12 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi private def pathsToTruncate = Table( ("context", "file", "relative"), - ("dos://bucket/path/to/my/dir", "dos://bucket/path/to/my/dir/file", "file"), - ("dos://bucket/path/to/my/dir", "dos://bucket/path/to/my/dir//file", "file"), - ("dos://bucket/path/to/my//dir", "dos://bucket/path/to/my/dir/file", "dir/file"), - ("dos://bucket/path/to/my//dir", "dos://bucket/path/to/my/dir//file", "dir//file"), - ("dos://bucket/path/to/my/dir", "dos://bucket/path/./to/my/dir/file", "./to/my/dir/file"), - ("dos://bucket/path/to/my/dir/with/file", "dos://bucket/path/to/other/dir/with/file", "other/dir/with/file") + ("drs://bucket/path/to/my/dir", "drs://bucket/path/to/my/dir/file", "file"), + ("drs://bucket/path/to/my/dir", "drs://bucket/path/to/my/dir//file", "file"), + ("drs://bucket/path/to/my//dir", "drs://bucket/path/to/my/dir/file", "dir/file"), + ("drs://bucket/path/to/my//dir", "drs://bucket/path/to/my/dir//file", "dir//file"), + ("drs://bucket/path/to/my/dir", "drs://bucket/path/./to/my/dir/file", "./to/my/dir/file"), + ("drs://bucket/path/to/my/dir/with/file", "drs://bucket/path/to/other/dir/with/file", "other/dir/with/file") ) private def bucket = "mymadeupbucket" @@ -41,71 +41,71 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi private def goodPaths = Seq( GoodPath( description = "a path with spaces", - path = s"dos://$bucket/hello/world/with spaces", + path = s"drs://$bucket/hello/world/with spaces", normalize = false, - pathAsString = s"dos://$bucket/hello/world/with spaces", + pathAsString = s"drs://$bucket/hello/world/with spaces", pathWithoutScheme = s"$bucket/hello/world/with spaces", - parent = s"dos://$bucket/hello/world/", - getParent = s"dos://$bucket/hello/world/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/world/", + getParent = s"drs://$bucket/hello/world/", + root = s"drs://$bucket/", name = "with spaces", - getFileName = s"dos://$bucket/with spaces", + getFileName = s"drs://$bucket/with spaces", getNameCount = 3, isAbsolute = true), GoodPath( description = "a path with non-ascii", - path = s"dos://$bucket/hello/world/with non ascii £€", + path = s"drs://$bucket/hello/world/with non ascii £€", normalize = false, - pathAsString = s"dos://$bucket/hello/world/with non ascii £€", + pathAsString = s"drs://$bucket/hello/world/with non ascii £€", pathWithoutScheme = s"$bucket/hello/world/with non ascii £€", - parent = s"dos://$bucket/hello/world/", - getParent = s"dos://$bucket/hello/world/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/world/", + getParent = s"drs://$bucket/hello/world/", + root = s"drs://$bucket/", name = "with non ascii £€", - getFileName = s"dos://$bucket/with non ascii £€", + getFileName = s"drs://$bucket/with non ascii £€", getNameCount = 3, isAbsolute = true), GoodPath( description = "a gs uri path with encoded characters", - path = s"dos://$bucket/hello/world/encoded%20spaces", + path = s"drs://$bucket/hello/world/encoded%20spaces", normalize = false, - pathAsString = s"dos://$bucket/hello/world/encoded%20spaces", + pathAsString = s"drs://$bucket/hello/world/encoded%20spaces", pathWithoutScheme = s"$bucket/hello/world/encoded%20spaces", - parent = s"dos://$bucket/hello/world/", - getParent = s"dos://$bucket/hello/world/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/world/", + getParent = s"drs://$bucket/hello/world/", + root = s"drs://$bucket/", name = "encoded%20spaces", - getFileName = s"dos://$bucket/encoded%20spaces", + getFileName = s"drs://$bucket/encoded%20spaces", getNameCount = 3, isAbsolute = true), GoodPath( description = "a file at the top of the bucket", - path = s"dos://$bucket/hello", + path = s"drs://$bucket/hello", normalize = false, - pathAsString = s"dos://$bucket/hello", + pathAsString = s"drs://$bucket/hello", pathWithoutScheme = s"$bucket/hello", - parent = s"dos://$bucket/", - getParent = s"dos://$bucket/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/", + getParent = s"drs://$bucket/", + root = s"drs://$bucket/", name = "hello", - getFileName = s"dos://$bucket/hello", + getFileName = s"drs://$bucket/hello", getNameCount = 1, isAbsolute = true), GoodPath( description = "a path ending in /", - path = s"dos://$bucket/hello/world/", + path = s"drs://$bucket/hello/world/", normalize = false, - pathAsString = s"dos://$bucket/hello/world", + pathAsString = s"drs://$bucket/hello/world", pathWithoutScheme = s"$bucket/hello/world", - parent = s"dos://$bucket/hello/", - getParent = s"dos://$bucket/hello/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/", + getParent = s"drs://$bucket/hello/", + root = s"drs://$bucket/", name = "world", - getFileName = s"dos://$bucket/world", + getFileName = s"drs://$bucket/world", getNameCount = 2, isAbsolute = true), @@ -113,57 +113,57 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi GoodPath( description = "a bucket with a path .", - path = s"dos://$bucket/.", + path = s"drs://$bucket/.", normalize = false, - pathAsString = s"dos://$bucket/.", + pathAsString = s"drs://$bucket/.", pathWithoutScheme = s"$bucket/.", parent = null, - getParent = s"dos://$bucket/", - root = s"dos://$bucket/", + getParent = s"drs://$bucket/", + root = s"drs://$bucket/", name = "", - getFileName = s"dos://$bucket/.", + getFileName = s"drs://$bucket/.", getNameCount = 1, isAbsolute = true), GoodPath( description = "a bucket with a path ..", - path = s"dos://$bucket/..", + path = s"drs://$bucket/..", normalize = false, - pathAsString = s"dos://$bucket/..", + pathAsString = s"drs://$bucket/..", pathWithoutScheme = s"$bucket/..", parent = null, - getParent = s"dos://$bucket/", + getParent = s"drs://$bucket/", root = null, name = "", - getFileName = s"dos://$bucket/..", + getFileName = s"drs://$bucket/..", getNameCount = 1, isAbsolute = true), GoodPath( description = "a bucket including . in the path", - path = s"dos://$bucket/hello/./world", + path = s"drs://$bucket/hello/./world", normalize = false, - pathAsString = s"dos://$bucket/hello/./world", + pathAsString = s"drs://$bucket/hello/./world", pathWithoutScheme = s"$bucket/hello/./world", - parent = s"dos://$bucket/hello/", - getParent = s"dos://$bucket/hello/./", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/", + getParent = s"drs://$bucket/hello/./", + root = s"drs://$bucket/", name = "world", - getFileName = s"dos://$bucket/world", + getFileName = s"drs://$bucket/world", getNameCount = 3, isAbsolute = true), GoodPath( description = "a bucket including .. in the path", - path = s"dos://$bucket/hello/../world", + path = s"drs://$bucket/hello/../world", normalize = false, - pathAsString = s"dos://$bucket/hello/../world", + pathAsString = s"drs://$bucket/hello/../world", pathWithoutScheme = s"$bucket/hello/../world", - parent = s"dos://$bucket/", - getParent = s"dos://$bucket/hello/../", - root = s"dos://$bucket/", + parent = s"drs://$bucket/", + getParent = s"drs://$bucket/hello/../", + root = s"drs://$bucket/", name = "world", - getFileName = s"dos://$bucket/world", + getFileName = s"drs://$bucket/world", getNameCount = 3, isAbsolute = true), @@ -171,13 +171,13 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi GoodPath( description = "a bucket with a normalized path .", - path = s"dos://$bucket/.", + path = s"drs://$bucket/.", normalize = true, - pathAsString = s"dos://$bucket/", + pathAsString = s"drs://$bucket/", pathWithoutScheme = s"$bucket/", parent = null, getParent = null, - root = s"dos://$bucket/", + root = s"drs://$bucket/", name = "", getFileName = null, getNameCount = 0, @@ -185,13 +185,13 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi GoodPath( description = "a bucket with a normalized path ..", - path = s"dos://$bucket/..", + path = s"drs://$bucket/..", normalize = true, - pathAsString = s"dos://$bucket/", + pathAsString = s"drs://$bucket/", pathWithoutScheme = s"$bucket/", parent = null, getParent = null, - root = s"dos://$bucket/", + root = s"drs://$bucket/", name = "", getFileName = null, getNameCount = 1, @@ -199,83 +199,83 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi GoodPath( description = "a bucket including . in the normalized path", - path = s"dos://$bucket/hello/./world", + path = s"drs://$bucket/hello/./world", normalize = true, - pathAsString = s"dos://$bucket/hello/world", + pathAsString = s"drs://$bucket/hello/world", pathWithoutScheme = s"$bucket/hello/world", - parent = s"dos://$bucket/hello/", - getParent = s"dos://$bucket/hello/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/hello/", + getParent = s"drs://$bucket/hello/", + root = s"drs://$bucket/", name = "world", - getFileName = s"dos://$bucket/world", + getFileName = s"drs://$bucket/world", getNameCount = 2, isAbsolute = true), GoodPath( description = "a bucket including .. in the normalized path", - path = s"dos://$bucket/hello/../world", + path = s"drs://$bucket/hello/../world", normalize = true, - pathAsString = s"dos://$bucket/world", + pathAsString = s"drs://$bucket/world", pathWithoutScheme = s"$bucket/world", - parent = s"dos://$bucket/", - getParent = s"dos://$bucket/", - root = s"dos://$bucket/", + parent = s"drs://$bucket/", + getParent = s"drs://$bucket/", + root = s"drs://$bucket/", name = "world", - getFileName = s"dos://$bucket/world", + getFileName = s"drs://$bucket/world", getNameCount = 1, isAbsolute = true), GoodPath( description = "a bucket with an underscore", - path = s"dos://hello_underscore/world", + path = s"drs://hello_underscore/world", normalize = true, - pathAsString = s"dos://hello_underscore/world", + pathAsString = s"drs://hello_underscore/world", pathWithoutScheme = s"hello_underscore/world", - parent = s"dos://hello_underscore/", - getParent = s"dos://hello_underscore/", - root = s"dos://hello_underscore/", + parent = s"drs://hello_underscore/", + getParent = s"drs://hello_underscore/", + root = s"drs://hello_underscore/", name = "world", - getFileName = s"dos://hello_underscore/world", + getFileName = s"drs://hello_underscore/world", getNameCount = 1, isAbsolute = true), GoodPath( description = "a bucket named .", - path = s"dos://./hello/world", + path = s"drs://./hello/world", normalize = true, - pathAsString = s"dos://./hello/world", + pathAsString = s"drs://./hello/world", pathWithoutScheme = s"./hello/world", - parent = s"dos://./hello/", - getParent = s"dos://./hello/", - root = s"dos://./", + parent = s"drs://./hello/", + getParent = s"drs://./hello/", + root = s"drs://./", name = "world", - getFileName = s"dos://./world", + getFileName = s"drs://./world", getNameCount = 2, isAbsolute = true), GoodPath( description = "a non ascii bucket name", - path = s"dos://nonasciibucket£€/hello/world", + path = s"drs://nonasciibucket£€/hello/world", normalize = true, - pathAsString = s"dos://nonasciibucket£€/hello/world", + pathAsString = s"drs://nonasciibucket£€/hello/world", pathWithoutScheme = s"nonasciibucket£€/hello/world", - parent = s"dos://nonasciibucket£€/hello/", - getParent = s"dos://nonasciibucket£€/hello/", - root = s"dos://nonasciibucket£€/", + parent = s"drs://nonasciibucket£€/hello/", + getParent = s"drs://nonasciibucket£€/hello/", + root = s"drs://nonasciibucket£€/", name = "world", - getFileName = s"dos://nonasciibucket£€/world", + getFileName = s"drs://nonasciibucket£€/world", getNameCount = 2, isAbsolute = true), GoodPath( description = "an non-absolute path without a host", - path = s"dos://blah/", + path = s"drs://blah/", normalize = false, - pathAsString = s"dos://blah/", + pathAsString = s"drs://blah/", pathWithoutScheme = s"blah/", parent = null, getParent = null, - root = s"dos://blah/", + root = s"drs://blah/", name = "", getFileName = null, getNameCount = 1, @@ -283,13 +283,13 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi GoodPath( description = "an absolute path without a host", - path = s"dos://blah", + path = s"drs://blah", normalize = false, - pathAsString = s"dos://blah/", + pathAsString = s"drs://blah/", pathWithoutScheme = s"blah/", parent = null, getParent = null, - root = s"dos://blah/", + root = s"drs://blah/", name = "", getFileName = null, getNameCount = 1, @@ -297,13 +297,13 @@ class DrsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi ) private def badPaths = Seq( - BadPath("an empty path", "", " does not have a dos scheme."), - BadPath("a GCS path", s"gs://$bucket/hello/world", "gs://mymadeupbucket/hello/world does not have a dos scheme."), - BadPath("an bucketless path", "dos://", "Expected authority at index 6: dos://"), - BadPath("a https path", "https://hello/world", "https://hello/world does not have a dos scheme."), - BadPath("a file uri path", "file:///hello/world", "file:///hello/world does not have a dos scheme."), - BadPath("a relative file path", "hello/world", "hello/world does not have a dos scheme."), - BadPath("an absolute file path", "/hello/world", "/hello/world does not have a dos scheme."), + BadPath("an empty path", "", " does not have a drs scheme."), + BadPath("a GCS path", s"gs://$bucket/hello/world", "gs://mymadeupbucket/hello/world does not have a drs scheme."), + BadPath("an bucketless path", "drs://", "Expected authority at index 6: drs://"), + BadPath("a https path", "https://hello/world", "https://hello/world does not have a drs scheme."), + BadPath("a file uri path", "file:///hello/world", "file:///hello/world does not have a drs scheme."), + BadPath("a relative file path", "hello/world", "hello/world does not have a drs scheme."), + BadPath("an absolute file path", "/hello/world", "/hello/world does not have a drs scheme."), ) private def drsReadInterpreter(marthaResponse: MarthaResponse): IO[ReadableByteChannel] = diff --git a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/MockDrsPathResolver.scala b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/MockDrsPathResolver.scala index b52a576a9e4..a3e1b00c1b5 100644 --- a/filesystems/drs/src/test/scala/cromwell/filesystems/drs/MockDrsPathResolver.scala +++ b/filesystems/drs/src/test/scala/cromwell/filesystems/drs/MockDrsPathResolver.scala @@ -28,14 +28,14 @@ class MockDrsPathResolver(drsConfig: DrsConfig, httpClientBuilder: HttpClientBui private def createMarthaResponse(urlArray: Array[Url]): IO[MarthaResponse] = { - val dosDataObject = DosDataObject( + val drsDataObject = DrsDataObject( size = Option(123), checksums = Option(Array(checksumObj)), updated = None, urls = urlArray ) - IO(MarthaResponse(DosObject(dosDataObject), Option(SADataObject(Json.fromString("{}"))))) + IO(MarthaResponse(DrsObject(drsDataObject), Option(SADataObject(Json.fromString("{}"))))) } override def resolveDrsThroughMartha(drsPath: String, serviceAccount: Option[String]): IO[MarthaResponse] = { @@ -62,7 +62,7 @@ class MockDrsCloudNioFileSystemProvider(config: Config, object MockDrsPaths { - private val drsPathPrefix = "dos://drs-host/" + private val drsPathPrefix = "drs://drs-host/" val drsPathResolvingToOneGcsPath = s"$drsPathPrefix/4d427aa3-5640-4f00-81ae-c33443f84acf" diff --git a/languageFactories/cwl-v1-0/src/main/scala/languages/cwl/CwlV1_0LanguageFactory.scala b/languageFactories/cwl-v1-0/src/main/scala/languages/cwl/CwlV1_0LanguageFactory.scala index 3f353f28846..a86f539e203 100644 --- a/languageFactories/cwl-v1-0/src/main/scala/languages/cwl/CwlV1_0LanguageFactory.scala +++ b/languageFactories/cwl-v1-0/src/main/scala/languages/cwl/CwlV1_0LanguageFactory.scala @@ -66,7 +66,7 @@ class CwlV1_0LanguageFactory(override val config: Config) extends LanguageFactor override def createExecutable(womBundle: WomBundle, inputs: WorkflowJson, ioFunctions: IoFunctionSet): Checked[ValidatedWomNamespace] = enabledCheck flatMap { _ => "No createExecutable method implemented in CWL v1".invalidNelCheck } - override def looksParsable(content: String): Boolean = content.lines.exists { l => + override def looksParsable(content: String): Boolean = content.linesIterator.exists { l => val trimmed = l.trim.stripSuffix(",") trimmed == """"cwlVersion": "v1.0"""" || trimmed == "cwlVersion: v1.0" } diff --git a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala index db78589f96c..be547ec6896 100644 --- a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala +++ b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala @@ -17,10 +17,12 @@ import common.validation.Checked._ import common.validation.Validation._ import cromwell.core.path.{DefaultPathBuilder, Path} import java.nio.file.{Path => NioPath} +import java.security.MessageDigest import cromwell.core.WorkflowId import wom.ResolvedImportRecord import wom.core.WorkflowSource +import wom.values._ import scala.concurrent.duration._ import scala.concurrent.Await @@ -38,12 +40,16 @@ object ImportResolver { innerResolver(request.toResolve, request.currentResolvers).contextualizeErrors(s"resolve '${request.toResolve}' using resolver: '$name'") } def cleanupIfNecessary(): ErrorOr[Unit] + + // Used when checking that imports are unchanged when caching parse results. + // If it's impossible or infeasible to guarantee that imports are unchanged, returns an invalid response. + def hashKey: ErrorOr[String] } object DirectoryResolver { - def apply(directory: Path, allowEscapingDirectory: Boolean, customName: Option[String]): DirectoryResolver = { + private def apply(directory: Path, allowEscapingDirectory: Boolean, customName: Option[String]): DirectoryResolver = { val dontEscapeFrom = if (allowEscapingDirectory) None else Option(directory.toJava.getCanonicalPath) - DirectoryResolver(directory, dontEscapeFrom, customName) + DirectoryResolver(directory, dontEscapeFrom, customName, deleteOnClose = false, directoryHash = None) } def localFilesystemResolvers(baseWdl: Option[Path]) = List( @@ -69,14 +75,15 @@ object ImportResolver { case class DirectoryResolver(directory: Path, dontEscapeFrom: Option[String] = None, customName: Option[String], - deleteOnClose: Boolean = false) extends ImportResolver { + deleteOnClose: Boolean, + directoryHash: Option[String]) extends ImportResolver { lazy val absolutePathToDirectory: String = directory.toJava.getCanonicalPath override def innerResolver(path: String, currentResolvers: List[ImportResolver]): Checked[ResolvedImportBundle] = { def updatedResolverSet(oldRootDirectory: Path, newRootDirectory: Path, current: List[ImportResolver]): List[ImportResolver] = { current map { - case d if d == this => DirectoryResolver(newRootDirectory, dontEscapeFrom, customName) + case d if d == this => DirectoryResolver(newRootDirectory, dontEscapeFrom, customName, deleteOnClose = false, directoryHash = None) case other => other } } @@ -139,11 +146,15 @@ object ImportResolver { }.toErrorOr else ().validNel + + override def hashKey: ErrorOr[String] = directoryHash.map(_.validNel).getOrElse("No hashKey available for directory importer".invalidNel) } def zippedImportResolver(zippedImports: Array[Byte], workflowId: WorkflowId): ErrorOr[DirectoryResolver] = { + + val zipHash = new String(MessageDigest.getInstance("MD5").digest(zippedImports)) LanguageFactoryUtil.createImportsDirectory(zippedImports, workflowId) map { dir => - DirectoryResolver(dir, Option(dir.toJava.getCanonicalPath), None, deleteOnClose = true) + DirectoryResolver(dir, Option(dir.toJava.getCanonicalPath), None, deleteOnClose = true, directoryHash = Option(zipHash)) } } @@ -194,6 +205,8 @@ object ImportResolver { } override def cleanupIfNecessary(): ErrorOr[Unit] = ().validNel + + override def hashKey: ErrorOr[String] = relativeTo.toString.md5Sum.validNel } object HttpResolver { diff --git a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/LanguageFactoryUtil.scala b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/LanguageFactoryUtil.scala index e91151f3776..fd731893479 100644 --- a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/LanguageFactoryUtil.scala +++ b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/LanguageFactoryUtil.scala @@ -89,7 +89,7 @@ object LanguageFactoryUtil { } def simpleLooksParseable(startsWithOptions: List[String], commentIndicators: List[String])(content: String): Boolean = { - val fileWithoutInitialWhitespace = content.lines.toList.dropWhile { l => + val fileWithoutInitialWhitespace = content.linesIterator.toList.dropWhile { l => l.forall(_.isWhitespace) || commentIndicators.exists(l.dropWhile(_.isWhitespace).startsWith(_)) } diff --git a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ParserCache.scala b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ParserCache.scala new file mode 100644 index 00000000000..4d5008c5d46 --- /dev/null +++ b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ParserCache.scala @@ -0,0 +1,74 @@ +package cromwell.languages.util + +import java.util.concurrent.Callable + +import cats.data.Validated.{Invalid, Valid} +import cats.instances.list._ +import cats.syntax.traverse._ +import com.google.common.cache.{Cache, CacheBuilder} +import com.typesafe.config.Config +import com.typesafe.scalalogging.StrictLogging +import common.validation.ErrorOr.ErrorOr +import cromwell.core.CacheConfig +import cromwell.languages.LanguageFactory +import cromwell.languages.util.ImportResolver.ImportResolver +import cromwell.languages.util.ParserCache.ParserCacheInputs +import mouse.all._ +import net.ceedubs.ficus.Ficus._ +import wom.core.{WorkflowSource, WorkflowUrl} +import wom.values._ + +import scala.concurrent.duration._ + +trait ParserCache[A] extends StrictLogging { this: LanguageFactory => + + def retrieveOrCalculate(cacheInputs: ParserCacheInputs, + calculationCallable: Callable[ErrorOr[A]]): ErrorOr[A] = { + + (cache map { c: Cache[String, ErrorOr[A]] => + workflowHashKey(cacheInputs.workflowSource, cacheInputs.workflowUrl, cacheInputs.workflowRoot, cacheInputs.importResolvers) match { + case Valid(hashKey) => c.get(hashKey, calculationCallable) + case Invalid(errors) => + logger.info(s"Failed to calculate hash key for 'workflow source to WOM' cache: {}", errors.toList.mkString(", ")) + calculationCallable.call + } + }).getOrElse(calculationCallable.call()) + } + + private[this] def workflowHashKey(workflowSource: Option[WorkflowSource], + workflowUrl: Option[WorkflowUrl], + workflowRoot: Option[String], + importResolvers: List[ImportResolver]): ErrorOr[String] = { + def stringOptionToHash(opt: Option[String]): String = opt map { _.md5Sum } getOrElse "" + + val importResolversToHash: ErrorOr[String] = importResolvers.traverse(_.hashKey).map(_.mkString(":")) + + importResolversToHash map { importHash => + s"${stringOptionToHash(workflowSource)}:${stringOptionToHash(workflowUrl)}:${stringOptionToHash(workflowRoot)}:$importHash" + } + } + + private[this] lazy val cacheConfig: Option[CacheConfig] = { + // Caching is an opt-in activity: + for { + _ <- enabled.option(()) + cachingConfigSection <- config.as[Option[Config]]("caching") + cc <- CacheConfig.optionalConfig(cachingConfigSection, defaultConcurrency = 2, defaultSize = 1000L, defaultTtl = 20.minutes) + } yield cc + } + + private[this] lazy val cache: Option[Cache[String, ErrorOr[A]]] = cacheConfig map { c => + CacheBuilder.newBuilder() + .concurrencyLevel(c.concurrency) + .expireAfterAccess(c.ttl.length, c.ttl.unit) + .maximumSize(c.size) + .build[WorkflowSource, ErrorOr[A]]() + } +} + +object ParserCache { + final case class ParserCacheInputs(workflowSource: Option[WorkflowSource], + workflowUrl: Option[WorkflowUrl], + workflowRoot: Option[String], + importResolvers: List[ImportResolver]) +} diff --git a/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala b/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala index e097235f9ef..8b2fce79756 100644 --- a/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala +++ b/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala @@ -121,7 +121,7 @@ class ImportResolverSpec extends FlatSpec with Matchers { behavior of "directory resolver from root" val workingDirectory = sys.props("user.dir") - val rootDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/")), customName = None) + val rootDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/")), customName = None, deleteOnClose = false, directoryHash = None) it should "resolve a random path" in { val pathToLookup = rootDirectoryResolver.resolveAndMakeAbsolute("/path/to/file.wdl") @@ -140,10 +140,10 @@ class ImportResolverSpec extends FlatSpec with Matchers { behavior of "unprotected relative directory resolver" - val relativeDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/path/to/imports/")), customName = None) + val relativeDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/path/to/imports/")), customName = None, deleteOnClose = false, directoryHash = None) val relativeDirForSampleWf = s"$workingDirectory/languageFactories/language-factory-core/src/test/" - val relativeDirResolverForSampleWf = DirectoryResolver(DefaultPath(Paths.get(relativeDirForSampleWf)), customName = None) + val relativeDirResolverForSampleWf = DirectoryResolver(DefaultPath(Paths.get(relativeDirForSampleWf)), customName = None, deleteOnClose = false, directoryHash = None) it should "resolve an absolute path" in { val pathToLookup = relativeDirectoryResolver.resolveAndMakeAbsolute("/path/to/file.wdl") @@ -177,8 +177,8 @@ class ImportResolverSpec extends FlatSpec with Matchers { behavior of "protected relative directory resolver" - val protectedRelativeDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/path/to/imports/")), Some("/path/to/imports/"), customName = None) - val protectedRelativeDirResolverForSampleWf = DirectoryResolver(DefaultPath(Paths.get(relativeDirForSampleWf)), Some(relativeDirForSampleWf), customName = None) + val protectedRelativeDirectoryResolver = DirectoryResolver(DefaultPath(Paths.get("/path/to/imports/")), Some("/path/to/imports/"), customName = None, deleteOnClose = false, directoryHash = None) + val protectedRelativeDirResolverForSampleWf = DirectoryResolver(DefaultPath(Paths.get(relativeDirForSampleWf)), Some(relativeDirForSampleWf), customName = None, deleteOnClose = false, directoryHash = None) it should "resolve a good relative path" in { val pathToLookup = protectedRelativeDirectoryResolver.resolveAndMakeAbsolute("path/to/file.wdl") diff --git a/languageFactories/wdl-biscayne/src/main/scala/languages/wdl/biscayne/WdlBiscayneLanguageFactory.scala b/languageFactories/wdl-biscayne/src/main/scala/languages/wdl/biscayne/WdlBiscayneLanguageFactory.scala index f428432800c..f647e70560a 100644 --- a/languageFactories/wdl-biscayne/src/main/scala/languages/wdl/biscayne/WdlBiscayneLanguageFactory.scala +++ b/languageFactories/wdl-biscayne/src/main/scala/languages/wdl/biscayne/WdlBiscayneLanguageFactory.scala @@ -1,15 +1,20 @@ package languages.wdl.biscayne +import java.util.concurrent.Callable + +import cats.syntax.either._ import cats.data.EitherT.fromEither import cats.effect.IO import cats.instances.either._ import com.typesafe.config.Config import common.Checked import common.transforms.CheckedAtoB +import common.validation.ErrorOr.ErrorOr import common.validation.IOChecked.IOChecked import cromwell.core._ import cromwell.languages.util.ImportResolver._ -import cromwell.languages.util.LanguageFactoryUtil +import cromwell.languages.util.{LanguageFactoryUtil, ParserCache} +import cromwell.languages.util.ParserCache.ParserCacheInputs import cromwell.languages.{LanguageFactory, ValidatedWomNamespace} import wdl.transforms.base.wdlom2wom.WomBundleToWomExecutable._ import wdl.transforms.base.wdlom2wom._ @@ -22,7 +27,7 @@ import wom.executable.WomBundle import wom.expression.IoFunctionSet import wom.transforms.WomExecutableMaker.ops._ -class WdlBiscayneLanguageFactory(override val config: Config) extends LanguageFactory { +class WdlBiscayneLanguageFactory(override val config: Config) extends LanguageFactory with ParserCache[WomBundle] { override val languageName: String = "WDL" override val languageVersionName: String = "Biscayne" @@ -53,10 +58,21 @@ class WdlBiscayneLanguageFactory(override val config: Config) extends LanguageFa importResolvers: List[ImportResolver], languageFactories: List[LanguageFactory], convertNestedScatterToSubworkflow : Boolean = true): Checked[WomBundle] = { - val checkEnabled: CheckedAtoB[FileStringParserInput, FileStringParserInput] = CheckedAtoB.fromCheck(x => enabledCheck map(_ => x)) - val converter: CheckedAtoB[FileStringParserInput, WomBundle] = checkEnabled andThen stringToAst andThen wrapAst andThen astToFileElement.map(FileElementToWomBundleInputs(_, workflowOptionsJson, convertNestedScatterToSubworkflow, importResolvers, languageFactories, workflowDefinitionElementToWomWorkflowDefinition, taskDefinitionElementToWomTaskDefinition)) andThen fileElementToWomBundle - converter.run(FileStringParserInput(workflowSource, "input.wdl")) - .map(b => b.copyResolvedImportRecord(b, workflowSourceOrigin)) + + val converter: CheckedAtoB[FileStringParserInput, WomBundle] = stringToAst andThen wrapAst andThen astToFileElement.map(FileElementToWomBundleInputs(_, workflowOptionsJson, convertNestedScatterToSubworkflow, importResolvers, languageFactories, workflowDefinitionElementToWomWorkflowDefinition, taskDefinitionElementToWomTaskDefinition)) andThen fileElementToWomBundle + + lazy val validationCallable = new Callable[ErrorOr[WomBundle]] { + def call: ErrorOr[WomBundle] = converter + .run(FileStringParserInput(workflowSource, workflowSourceOrigin.map(_.importPath).getOrElse("input.wdl"))) + .map(b => b.copyResolvedImportRecord(b, workflowSourceOrigin)).toValidated + } + + lazy val parserCacheInputs = ParserCacheInputs(Option(workflowSource), workflowSourceOrigin.map(_.importPath), None, importResolvers) + + for { + _ <- enabledCheck + womBundle <- retrieveOrCalculate(parserCacheInputs, validationCallable).toEither + } yield womBundle } override def createExecutable(womBundle: WomBundle, inputsJson: WorkflowJson, ioFunctions: IoFunctionSet): Checked[ValidatedWomNamespace] = { diff --git a/languageFactories/wdl-draft2/src/main/scala/languages/wdl/draft2/WdlDraft2LanguageFactory.scala b/languageFactories/wdl-draft2/src/main/scala/languages/wdl/draft2/WdlDraft2LanguageFactory.scala index 8ede9953213..35208f6c43d 100644 --- a/languageFactories/wdl-draft2/src/main/scala/languages/wdl/draft2/WdlDraft2LanguageFactory.scala +++ b/languageFactories/wdl-draft2/src/main/scala/languages/wdl/draft2/WdlDraft2LanguageFactory.scala @@ -1,6 +1,5 @@ package languages.wdl.draft2 -import java.security.MessageDigest import java.util.concurrent.Callable import cats.data.EitherT.fromEither @@ -9,8 +8,8 @@ import cats.instances.either._ import cats.instances.list._ import cats.syntax.functor._ import cats.syntax.traverse._ -import com.google.common.cache.{Cache, CacheBuilder} import com.typesafe.config.Config +import com.typesafe.scalalogging.StrictLogging import common.Checked import common.validation.Checked._ import common.validation.ErrorOr._ @@ -18,13 +17,14 @@ import common.validation.IOChecked.IOChecked import common.validation.Validation._ import cromwell.core._ import cromwell.languages.util.ImportResolver.{ImportResolutionRequest, ImportResolver} -import cromwell.languages.util.{ImportResolver, LanguageFactoryUtil} +import cromwell.languages.util.ParserCache.ParserCacheInputs +import cromwell.languages.util.{ImportResolver, LanguageFactoryUtil, ParserCache} import cromwell.languages.{LanguageFactory, ValidatedWomNamespace} import languages.wdl.draft2.WdlDraft2LanguageFactory._ import mouse.all._ import net.ceedubs.ficus.Ficus._ import wdl.draft2.Draft2ResolvedImportBundle -import wdl.draft2.model.{Draft2ImportResolver, WdlNamespace, WdlNamespaceWithWorkflow} +import wdl.draft2.model.{Draft2ImportResolver, WdlNamespace, WdlNamespaceWithWorkflow, WdlNamespaceWithoutWorkflow} import wdl.shared.transforms.wdlom2wom.WdlSharedInputParsing import wdl.transforms.draft2.wdlom2wom.WdlDraft2WomBundleMakers._ import wdl.transforms.draft2.wdlom2wom.WdlDraft2WomExecutableMakers._ @@ -40,7 +40,7 @@ import wom.values._ import scala.concurrent.duration._ import scala.language.postfixOps -class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFactory { +class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFactory with ParserCache[WdlNamespace] with StrictLogging { override val languageName: String = "WDL" override val languageVersionName: String = "draft-2" @@ -53,28 +53,30 @@ class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFact ioFunctions: IoFunctionSet, importResolvers: List[ImportResolver]): IOChecked[ValidatedWomNamespace] = { - def checkTypes(namespace: WdlNamespaceWithWorkflow, inputs: Map[OutputPort, WomValue]): Checked[Unit] = { - val allDeclarations = namespace.workflow.declarations ++ namespace.workflow.calls.flatMap(_.declarations) - val list: List[Checked[Unit]] = inputs.map({ case (k, v) => - allDeclarations.find(_.fullyQualifiedName == k) match { - case Some(decl) if decl.womType.coerceRawValue(v).isFailure => - s"Invalid right-side type of '$k'. Expecting ${decl.womType.stableName}, got ${v.womType.stableName}".invalidNelCheck[Unit] - case _ => ().validNelCheck - } - }).toList - - list.sequence[Checked, Unit].void - } + def checkTypes(namespace: WdlNamespace, inputs: Map[OutputPort, WomValue]): Checked[Unit] = namespace match { + + case namespaceWithWorkflow: WdlNamespaceWithWorkflow => + val allDeclarations = namespaceWithWorkflow.workflow.declarations ++ namespaceWithWorkflow.workflow.calls.flatMap(_.declarations) + val list: List[Checked[Unit]] = inputs.map({ case (k, v) => + allDeclarations.find(_.fullyQualifiedName == k) match { + case Some(decl) if decl.womType.coerceRawValue(v).isFailure => + s"Invalid right-side type of '$k'. Expecting ${decl.womType.stableName}, got ${v.womType.stableName}".invalidNelCheck[Unit] + case _ => ().validNelCheck + } + }).toList - def workflowHashKey: String = { - workflowSource.md5Sum + (source.importsZipFileOption map { bytes => new String(MessageDigest.getInstance("MD5").digest(bytes)) }).getOrElse("") + list.sequence[Checked, Unit].void + + case _: WdlNamespaceWithoutWorkflow => + logger.error("Programmer Error: validateNamespace should never get called on WdlNamespaceWithoutWorkflow") + "Cannot execute this WDL: no primary workflow provided".invalidNelCheck } - def validationCallable = new Callable[ErrorOr[WdlNamespaceWithWorkflow]] { - def call: ErrorOr[WdlNamespaceWithWorkflow] = WdlNamespaceWithWorkflow.load(workflowSource, importResolvers map resolverConverter).toErrorOr + def validationCallable = new Callable[ErrorOr[WdlNamespace]] { + def call: ErrorOr[WdlNamespace] = WdlNamespaceWithWorkflow.load(workflowSource, importResolvers map resolverConverter).toErrorOr } - lazy val wdlNamespaceValidation: ErrorOr[WdlNamespaceWithWorkflow] = namespaceCache.map(_.get(workflowHashKey, validationCallable)).getOrElse(validationCallable.call) + lazy val wdlNamespaceValidation: ErrorOr[WdlNamespace] = retrieveOrCalculate(ParserCacheInputs(Option(workflowSource), None, None, importResolvers), validationCallable) def evaluateImports(wdlNamespace: WdlNamespace): Map[String, String] = { // Descend the namespace looking for imports and construct `MetadataEvent`s for them. @@ -104,7 +106,7 @@ class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFact fromEither[IO](checked) } - private def validateWorkflowNameLengths(namespace: WdlNamespaceWithWorkflow): Checked[Unit] = { + private def validateWorkflowNameLengths(namespace: WdlNamespace): Checked[Unit] = { import common.validation.Checked._ def allWorkflowNames(n: WdlNamespace): Seq[String] = n.workflows.map(_.unqualifiedName) ++ n.namespaces.flatMap(allWorkflowNames) val tooLong = allWorkflowNames(namespace).filter(_.length >= 100) @@ -115,16 +117,21 @@ class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFact } } - override def getWomBundle(workflowSource: WorkflowSource, workflowSourceOrigin: Option[ResolvedImportRecord], workflowOptionsJson: WorkflowOptionsJson, importResolvers: List[ImportResolver], languageFactories: List[LanguageFactory], convertNestedScatterToSubworkflow : Boolean = true): Checked[WomBundle] = { + lazy val validationCallable = new Callable[ErrorOr[WdlNamespace]] { + def call: ErrorOr[WdlNamespace] = WdlNamespace.loadUsingSource(workflowSource, None, Some(importResolvers map resolverConverter)).toErrorOr + } + + lazy val parserCacheInputs = ParserCacheInputs(Option(workflowSource), workflowSourceOrigin.map(_.importPath), None, importResolvers) + for { _ <- enabledCheck - namespace <- WdlNamespace.loadUsingSource(workflowSource, None, Some(importResolvers map resolverConverter)).toChecked + namespace <- retrieveOrCalculate(parserCacheInputs, validationCallable).toEither womBundle <- namespace.toWomBundle } yield womBundle.copyResolvedImportRecord(womBundle, workflowSourceOrigin) } @@ -146,14 +153,6 @@ class WdlDraft2LanguageFactory(override val config: Config) extends LanguageFact cc <- CacheConfig.optionalConfig(caching, defaultConcurrency = 2, defaultSize = 1000L, defaultTtl = 20 minutes) } yield cc } - - private[draft2] lazy val namespaceCache: Option[Cache[String, ErrorOr[WdlNamespaceWithWorkflow]]] = cacheConfig map { c => - CacheBuilder.newBuilder() - .concurrencyLevel(c.concurrency) - .expireAfterAccess(c.ttl.length, c.ttl.unit) - .maximumSize(c.size) - .build[WorkflowSource, ErrorOr[WdlNamespaceWithWorkflow]]() - } } object WdlDraft2LanguageFactory { diff --git a/languageFactories/wdl-draft3/src/main/scala/languages/wdl/draft3/WdlDraft3LanguageFactory.scala b/languageFactories/wdl-draft3/src/main/scala/languages/wdl/draft3/WdlDraft3LanguageFactory.scala index 0470c0a2b0b..b4da915d475 100644 --- a/languageFactories/wdl-draft3/src/main/scala/languages/wdl/draft3/WdlDraft3LanguageFactory.scala +++ b/languageFactories/wdl-draft3/src/main/scala/languages/wdl/draft3/WdlDraft3LanguageFactory.scala @@ -1,15 +1,20 @@ package languages.wdl.draft3 +import java.util.concurrent.Callable + import cats.data.EitherT.fromEither import cats.effect.IO import cats.instances.either._ +import cats.syntax.either._ import com.typesafe.config.Config import common.Checked import common.transforms.CheckedAtoB +import common.validation.ErrorOr.ErrorOr import common.validation.IOChecked.IOChecked import cromwell.core._ import cromwell.languages.util.ImportResolver._ -import cromwell.languages.util.LanguageFactoryUtil +import cromwell.languages.util.ParserCache.ParserCacheInputs +import cromwell.languages.util.{LanguageFactoryUtil, ParserCache} import cromwell.languages.{LanguageFactory, ValidatedWomNamespace} import wdl.draft3.transforms.ast2wdlom._ import wdl.draft3.transforms.parsing._ @@ -22,7 +27,7 @@ import wom.executable.WomBundle import wom.expression.IoFunctionSet import wom.transforms.WomExecutableMaker.ops._ -class WdlDraft3LanguageFactory(override val config: Config) extends LanguageFactory { +class WdlDraft3LanguageFactory(override val config: Config) extends LanguageFactory with ParserCache[WomBundle] { override val languageName: String = "WDL" override val languageVersionName: String = "1.0" @@ -47,16 +52,39 @@ class WdlDraft3LanguageFactory(override val config: Config) extends LanguageFact fromEither[IO](checked) } + + // The only reason this isn't a sub-def inside 'getWomBundle' is that it gets overridden in test cases: + protected def makeWomBundle(workflowSource: WorkflowSource, + workflowSourceOrigin: Option[ResolvedImportRecord], + workflowOptionsJson: WorkflowOptionsJson, + importResolvers: List[ImportResolver], + languageFactories: List[LanguageFactory], + convertNestedScatterToSubworkflow : Boolean = true): ErrorOr[WomBundle] = { + + val converter: CheckedAtoB[FileStringParserInput, WomBundle] = stringToAst andThen wrapAst andThen astToFileElement.map(FileElementToWomBundleInputs(_, workflowOptionsJson, convertNestedScatterToSubworkflow, importResolvers, languageFactories, workflowDefinitionElementToWomWorkflowDefinition, taskDefinitionElementToWomTaskDefinition)) andThen fileElementToWomBundle + + converter + .run(FileStringParserInput(workflowSource, workflowSourceOrigin.map(_.importPath).getOrElse("input.wdl"))) + .map(b => b.copyResolvedImportRecord(b, workflowSourceOrigin)).toValidated + } + override def getWomBundle(workflowSource: WorkflowSource, workflowSourceOrigin: Option[ResolvedImportRecord], workflowOptionsJson: WorkflowOptionsJson, importResolvers: List[ImportResolver], languageFactories: List[LanguageFactory], convertNestedScatterToSubworkflow : Boolean = true): Checked[WomBundle] = { - val checkEnabled: CheckedAtoB[FileStringParserInput, FileStringParserInput] = CheckedAtoB.fromCheck(x => enabledCheck map(_ => x)) - val converter: CheckedAtoB[FileStringParserInput, WomBundle] = checkEnabled andThen stringToAst andThen wrapAst andThen astToFileElement.map(FileElementToWomBundleInputs(_, workflowOptionsJson, convertNestedScatterToSubworkflow, importResolvers, languageFactories, workflowDefinitionElementToWomWorkflowDefinition, taskDefinitionElementToWomTaskDefinition)) andThen fileElementToWomBundle - converter.run(FileStringParserInput(workflowSource, "input.wdl")) - .map(b => b.copyResolvedImportRecord(b, workflowSourceOrigin)) + + lazy val validationCallable = new Callable[ErrorOr[WomBundle]] { + def call: ErrorOr[WomBundle] = makeWomBundle(workflowSource, workflowSourceOrigin, workflowOptionsJson, importResolvers, languageFactories, convertNestedScatterToSubworkflow) + } + + lazy val parserCacheInputs = ParserCacheInputs(Option(workflowSource), workflowSourceOrigin.map(_.importPath), None, importResolvers) + + for { + _ <- enabledCheck + womBundle <- retrieveOrCalculate(parserCacheInputs, validationCallable).toEither + } yield womBundle } override def createExecutable(womBundle: WomBundle, inputsJson: WorkflowJson, ioFunctions: IoFunctionSet): Checked[ValidatedWomNamespace] = { diff --git a/languageFactories/wdl-draft3/src/test/scala/languages/wdl/draft3/WdlDraft3CachingSpec.scala b/languageFactories/wdl-draft3/src/test/scala/languages/wdl/draft3/WdlDraft3CachingSpec.scala new file mode 100644 index 00000000000..bc266e306ec --- /dev/null +++ b/languageFactories/wdl-draft3/src/test/scala/languages/wdl/draft3/WdlDraft3CachingSpec.scala @@ -0,0 +1,129 @@ +package languages.wdl.draft3 + +import com.typesafe.config.{Config, ConfigFactory} +import common.validation.ErrorOr.ErrorOr +import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFilesWithoutImports} +import cromwell.languages.LanguageFactory +import cromwell.languages.util.ImportResolver +import languages.wdl.draft3.WdlDraft3CachingSpec.EvaluationCountingDraft3Factory +import org.scalatest.{FlatSpec, Matchers} +import wom.ResolvedImportRecord +import wom.core.{WorkflowOptionsJson, WorkflowSource} +import wom.executable.WomBundle +import wom.expression.NoIoFunctionSet + +class WdlDraft3CachingSpec extends FlatSpec with Matchers { + + val languageConfig = ConfigFactory.parseString( + """{ + | strict-validation: true + | enabled: true + | caching { + | enabled: true + | ttl: 3 minutes + | size: 50 + | concurrency: 9 + | } + |} + |""".stripMargin + + ) + + + + it should "only evaluate files once" in { + val invalidWorkflowSource = + """ + |blah blah this isn't a real workflow + """.stripMargin + + val validWorkflowSource = + """version 1.0 + | + |task hello { + | input { + | String addressee + | } + | command { + | echo "Hello ${addressee}!" + | } + | runtime { + | docker: "ubuntu:latest" + | } + | output { + | String salutation = read_string(stdout()) + | } + |} + | + |workflow wf_hello { + | String wf_hello_input = "world" + | + | call hello { input: addressee = wf_hello_input } + | + | output { + | String salutation = hello.salutation + | } + |} + """.stripMargin + + val factory = new EvaluationCountingDraft3Factory(languageConfig) + + def validate(workflowSource: WorkflowSource) = factory.validateNamespace( + WorkflowSourceFilesWithoutImports( + Option(workflowSource), + None, + None, + None, + None, + "{}", + WorkflowOptions.empty, + "{}", + workflowOnHold = false, + Seq.empty + ), + workflowSource, + WorkflowOptions.empty, + importLocalFilesystem = false, + WorkflowId.randomId(), + NoIoFunctionSet, + List.empty + ) + + // Check the valid workflow twice: + validate(validWorkflowSource).isRight.unsafeRunSync() should be(true) + validate(validWorkflowSource).isRight.unsafeRunSync() should be(true) + + // But we only evaludated it once: + factory.evaluationCount should be(1) + + // Check the invalid workflow twice: + validate(invalidWorkflowSource).isRight.unsafeRunSync() should be(false) + validate(invalidWorkflowSource).isRight.unsafeRunSync() should be(false) + + // The factory only ran one extra evaluation: + factory.evaluationCount should be(2) + + // Run over the two workflows a few more times: + validate(validWorkflowSource).isRight.unsafeRunSync() should be(true) + validate(validWorkflowSource).isRight.unsafeRunSync() should be(true) + validate(invalidWorkflowSource).isRight.unsafeRunSync() should be(false) + validate(invalidWorkflowSource).isRight.unsafeRunSync() should be(false) + + // No additional evaluations were needed: + factory.evaluationCount should be(2) + } + +} + +object WdlDraft3CachingSpec { + class EvaluationCountingDraft3Factory(languageConfig: Config) extends WdlDraft3LanguageFactory(languageConfig) { + + var evaluationCount = 0 + + override protected def makeWomBundle(workflowSource: WorkflowSource, workflowSourceOrigin: Option[ResolvedImportRecord], workflowOptionsJson: WorkflowOptionsJson, importResolvers: List[ImportResolver.ImportResolver], languageFactories: List[LanguageFactory], convertNestedScatterToSubworkflow: Boolean): ErrorOr[WomBundle] = { + evaluationCount = evaluationCount + 1 + super.makeWomBundle(workflowSource, workflowSourceOrigin, workflowOptionsJson, importResolvers, languageFactories, convertNestedScatterToSubworkflow) + } + + } +} diff --git a/minnie-kenny.gitconfig b/minnie-kenny.gitconfig new file mode 100644 index 00000000000..1bfec70e27c --- /dev/null +++ b/minnie-kenny.gitconfig @@ -0,0 +1,15 @@ +[secrets] + providers = git secrets --aws-provider + patterns = (A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16} + patterns = (\"|')?(AWS|aws|Aws)?_?(SECRET|secret|Secret)?_?(ACCESS|access|Access)?_?(KEY|key|Key)(\"|')?\\s*(:|=>|=)\\s*(\"|')?[A-Za-z0-9/\\+=]{40}(\"|')? + patterns = (\"|')?(AWS|aws|Aws)?_?(ACCOUNT|account|Account)_?(ID|id|Id)?(\"|')?\\s*(:|=>|=)\\s*(\"|')?[0-9]{4}\\-?[0-9]{4}\\-?[0-9]{4}(\"|')? + allowed = AKIAIOSFODNN7EXAMPLE + allowed = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + patterns = private_key + patterns = private_key_id + allowed = \"private_key_id\": \"OMITTED\" + allowed = \"private_key\": \"-----BEGIN PRIVATE KEY-----\\\\nBASE64 ENCODED KEY WITH \\\\n TO REPRESENT NEWLINES\\\\n-----END PRIVATE KEY-----\\\\n\" + allowed = \"client_id\": \"22377410244549202395\" + allowed = `private_key` portion needs + allowed = .Data.private_key + allowed = .Data.service_account.private_key diff --git a/minnie-kenny.sh b/minnie-kenny.sh new file mode 100755 index 00000000000..55da944178d --- /dev/null +++ b/minnie-kenny.sh @@ -0,0 +1,200 @@ +#!/bin/sh +# Use this script to ensure git-secrets are setup +# https://minnie-kenny.readthedocs.io/ + +set -eu # -o pipefail isn't supported by POSIX + +minnie_kenny_command_name=${0##*/} +minnie_kenny_quiet=0 +minnie_kenny_strict=0 +minnie_kenny_modify=0 +minnie_kenny_gitconfig="minnie-kenny.gitconfig" + +usage() { + if [ ${minnie_kenny_quiet} -ne 1 ]; then + cat <&2 +Usage: + ${minnie_kenny_command_name} + -f | --force Modify the git config to run git secrets + -n | --no-force Do not modify the git config, only verify installation + -s | --strict Require git-secrets to be setup or fail + -q | --quiet Do not output any status messages + -i | --include=FILE Path to the include for git-config (default: "minnie-kenny.gitconfig") +USAGE + fi + exit 1 +} + +run_command() { if [ ${minnie_kenny_quiet} -ne 1 ]; then "$@"; else "$@" >/dev/null 2>&1; fi; } +echo_out() { if [ ${minnie_kenny_quiet} -ne 1 ]; then echo "$@"; fi; } +echo_err() { if [ ${minnie_kenny_quiet} -ne 1 ]; then echo "$@" 1>&2; fi; } + +process_arguments() { + while [ $# -gt 0 ]; do + case "$1" in + -q | --quiet) + minnie_kenny_quiet=1 + shift 1 + ;; + -s | --strict) + minnie_kenny_strict=1 + shift 1 + ;; + -f | --force) + minnie_kenny_modify=1 + shift 1 + ;; + -n | --no-force) + minnie_kenny_modify=0 + shift 1 + ;; + -i) + shift 1 + minnie_kenny_gitconfig="${1:-}" + if [ "${minnie_kenny_gitconfig}" = "" ]; then break; fi + shift 1 + ;; + --include=*) + minnie_kenny_gitconfig="${1#*=}" + shift 1 + ;; + --help) + usage + ;; + *) + echo_err "Unknown argument: $1" + usage + ;; + esac + done + + if [ "${minnie_kenny_gitconfig}" = "" ]; then + echo_err "Error: you need to provide a git-config include file." + usage + fi +} + +# Exits if this system or directory is not setup to run git / minnie-kenny.sh / git-secrets +validate_setup() { + if ! command -v git >/dev/null 2>&1; then + if [ ${minnie_kenny_strict} -eq 0 ]; then + echo_out "\`git\` not found. Not checking for git-secrets." + exit 0 + else + echo_err "Error: \`git\` not found." + exit 1 + fi + fi + + minnie_kenny_is_work_tree="$(git rev-parse --is-inside-work-tree 2>/dev/null || echo false)" + + if [ "${minnie_kenny_is_work_tree}" != "true" ]; then + if [ ${minnie_kenny_strict} -eq 0 ]; then + echo_out "Not a git working tree. Not checking for git-secrets." + exit 0 + else + echo_err "Error: Not a git working tree." + exit 1 + fi + fi + + # Get the git absolute directory, even when older versions of git do not support --absolute-git-dir + minnie_kenny_git_dir="$(cd "$(git rev-parse --git-dir)" && pwd)" + if [ ! -f "${minnie_kenny_git_dir}/../${minnie_kenny_gitconfig}" ]; then + echo_err "Error: ${minnie_kenny_gitconfig} was not found next to the directory ${minnie_kenny_git_dir}" + exit 1 + fi + + if ! command -v git-secrets >/dev/null 2>&1; then + echo_err "\`git-secrets\` was not found while \`git\` was found." \ + "\`git-secrets\` must be installed first before using ${minnie_kenny_command_name}." \ + "See https://github.com/awslabs/git-secrets#installing-git-secrets" + exit 1 + fi +} + +# Echo 1 if the hook contains a line that starts with "git secrets" otherwise echo 0 +check_hook() { + path="${minnie_kenny_git_dir}/hooks/$1" + if grep -q "^git secrets " "${path}" 2>/dev/null; then + echo 1 + else + echo 0 + fi +} + +# Ensures git secrets hooks are installed along with the configuration to read in the minnie-kenny.gitconfig +check_installation() { + expected_hooks=0 + actual_hooks=0 + + for path in "commit-msg" "pre-commit" "prepare-commit-msg"; do + increment=$(check_hook ${path}) + actual_hooks=$((actual_hooks + increment)) + expected_hooks=$((expected_hooks + 1)) + done + + if [ 0 -lt ${actual_hooks} ] && [ ${actual_hooks} -lt ${expected_hooks} ]; then + # Only some of the hooks are setup, meaning someone updated the hook files in an unexpected way. + # Warn and exit as we cannot fix this with a simple `git secrets --install`. + echo_err "Error: git-secrets is not installed into all of the expected git hooks." \ + "Double check the 'commit-msg' 'pre-commit' and 'prepare-commit-msg' under the directory" \ + "${minnie_kenny_git_dir}/hooks and consider running \`git secrets --install --force\`." + exit 1 + fi + + # Begin checking for fixable errors + found_fixable_errors=0 + + if [ ${actual_hooks} -eq 0 ]; then + if [ ${minnie_kenny_modify} -eq 1 ]; then + run_command git secrets --install + else + echo_err "Error: git-secrets is not installed into the expected git hooks" \ + "'commit-msg' 'pre-commit' and 'prepare-commit-msg'." + found_fixable_errors=1 + fi + fi + + # Allow the minnie-kenny.gitconfig in `git secrets --scan` + if ! git config --get-all secrets.allowed | grep -Fxq "^${minnie_kenny_gitconfig}:[0-9]+:"; then + if [ ${minnie_kenny_modify} -eq 1 ]; then + run_command git config --add secrets.allowed "^${minnie_kenny_gitconfig}:[0-9]+:" + else + echo_err "Error: The expression '^${minnie_kenny_gitconfig}:[0-9]+:' should be allowed by git secrets." + found_fixable_errors=1 + fi + fi + + # Allow minnie-kenny.gitconfig to appear in `git secrets --scan-history` + if ! git config --get-all secrets.allowed | grep -Fxq "^[0-9a-f]+:${minnie_kenny_gitconfig}:[0-9]+:"; then + if [ ${minnie_kenny_modify} -eq 1 ]; then + run_command git config --add secrets.allowed "^[0-9a-f]+:${minnie_kenny_gitconfig}:[0-9]+:" + else + echo_err "Error: The expression '^[0-9a-f]+:${minnie_kenny_gitconfig}:[0-9]+:' should be allowed by git secrets." + found_fixable_errors=1 + fi + fi + + if ! git config --get-all include.path | grep -Fxq "../${minnie_kenny_gitconfig}"; then + if [ ${minnie_kenny_modify} -eq 1 ]; then + run_command git config --add include.path "../${minnie_kenny_gitconfig}" + else + echo_err "Error: The path '../${minnie_kenny_gitconfig}' should be an included path in the git config." + found_fixable_errors=1 + fi + fi + + if [ ${found_fixable_errors} -ne 0 ]; then + echo_err "Error: The above errors may be fixed by re-running ${minnie_kenny_command_name} with -f / --force." + exit 1 + fi +} + +main() { + process_arguments "$@" + validate_setup + check_installation +} + +main "$@" diff --git a/processes/external-contributions/README.MD b/processes/external-contributions/README.MD new file mode 100644 index 00000000000..385d5e478ab --- /dev/null +++ b/processes/external-contributions/README.MD @@ -0,0 +1,96 @@ +# How to Handle External Contributions + +## Overview + +- Decide whether the PR is adding community features or affects "supported" functionality. +- Review the concept +- Review the changes in the PR +- Run CI against the PR +- Cycle through Review and CI until satisfied +- Merge the PR + +## Process + +### Decide "Community" or "Supported" + +**Community Supported:** If the PR only changes parts of Cromwell which are not part of the supported feature set, treat them with a little more +flexibility and with the assumption that the contributor probably knows better than us what they want the feature to do. + +**Officially Supported:** If the PR changes core/supported features in Cromwell, review them as thoroughly as you would PRs from within the team. Remember +that one day you might need to support this code! + +### Reviewing the Concept + +Ask the questions: + +- Will Cromwell be a better product with this change adopted. +- Will it be better enough to warrant the time necessary to review the PR + - Note: The answer to this is almost always a yes if the first answer was yes + - However, overly long, opaque, or "risky" changes might benefit from requests to break the PR up and merge/review things in stages. + +### Review the changes in the PR + +- For PRs changing "supported" features, treat it like any other PR coming from within the team. + - Remember: we will have to support these changes in the future. Possibly forever! +- For PRs only making changes to features we don't officially support - be generous. But make sure: + - That any new optional functionality is opt-in rather than becoming the default. + - That any community features are flagged in documentation and config examples as originating from the community (and thus may not be supported by the team if bugs are found). + +### Run CI against the PR + +Note: inspired by the community answer [here](https://github.community/t5/How-to-use-Git-and-GitHub/Checkout-a-branch-from-a-fork/td-p/77). + +- Problem: our CI will only run against branches of the `broadinstitute/cromwell` repo submitted by team members. +- To turn a community contribution into a PR that travis will run against: + - Identify a reference to use for the remote branch and check it out. + - Example: for pull request 938 there will be a reference `refs/pull/938/head` + - We can fetch that reference to a new branch using: `git fetch -f origin refs/pull/938/head:938_pr_clone` + - Push the branch to github + - Create a new PR for the clone branch. Indicate that it only exists to test a community contribution. + - Example title: `[PR 938 Clone] PR for CI only` + +### Cycle through Review and CI + +- If the community contributor makes changes following your reviews or the CI results: + - Glance at the changes to make sure they still seem reasonable. + - Make any additional comments + - Re-fetch the remote reference: `git fetch -f origin refs/pull/860/head:938_pr_clone` + - Push the changes back up to github to re-trigger the CI on your clone PR. + +### Merge the PR + +- Once the tests have completed successfully and the PR has two approvals, it can be merged. +- Remember to delete your branch clone PR (and the cloned branch itself too!) + +## Shortcuts + +### Git Command Shortcut + +Note: also inspired by the community answer [here](https://github.community/t5/How-to-use-Git-and-GitHub/Checkout-a-branch-from-a-fork/td-p/77 and the reference gitconfig file [here](https://github.com/lee-dohm/dotfiles/blob/8d3c59004154571578c2b32df2cdebb013517630/gitconfig#L8)). + +It's tedious to have to remember the syntax for `git fetch -f origin refs/pull/938/head:938_pr_clone` isn't it? Well +luckily you don't have to! + +**Step 1:** add this line into your `~/.gitconfig` file under the `[alias]` section: +``` +clone-pr = !sh -c 'git fetch -f origin pull/$1/head:$1_pr_clone && git checkout $1_pr_clone' - +``` + +**Step 2:** Your regular `git` command line now has new superpowers: +``` +[develop] $ git clone-pr 938 +From https://github.com/broadinstitute/cromwell + * [new ref] refs/pull/938/head -> 938_pr_clone +Switched to branch '938_pr_clone' + +[938_pr_clone] $ git push +``` + +Note: The final `git push` command may not work as-is, depending on your ~/.gitconfig value of `push.default`. +If it doesn't work then one of the following solutions may work: + * Setting `git`'s `push.default` config value to be '`current`'. + * Using `git push --set-upstream origin 5070_pr_clone` instead + * Using `git push origin HEAD` instead + +**Step 3:** If you need to re-sync your cloned PR against changes on their remote branch - no problem! The +exact same `git clone-pr 938` will *update* your local reference allowing you to push changes up to github easily! diff --git a/processes/release_processes/README.MD b/processes/release_processes/README.MD index 648b57ae1ae..f839ae56752 100644 --- a/processes/release_processes/README.MD +++ b/processes/release_processes/README.MD @@ -5,11 +5,15 @@ Have a better idea about how the deployment processes should work? See our "updating the process" [process](../README.MD)! -## How to Publish Cromwell +## Core Process: How to Publish and Release Cromwell -![release-cromwell-version](release-cromwell-version.dot.png) +If you're the lucky "Release Rotation Engineer" this time, you should do the following three things: + +First: Run the publish script to create a new version of Cromwell. +Second: Run through the "How to Release Cromwell into Firecloud" process. +Third: Run through the "How to Deploy Cromwell in CAAS prod" process. -## How to Release Cromwell into Firecloud +### How to Release Cromwell into Firecloud **Note:** How to accomplish some of these steps might be non-obvious to you. If so, refer to the additional details in the [full document](https://docs.google.com/document/d/1EEzwemE8IedCplIwL506fiqXr0262Pz4G0x6Cr6V-5E). @@ -30,13 +34,17 @@ for any performance changes since the previous release. runs and fill in any missing entries in the table. -## How to Deploy Cromwell in CAAS prod +### How to Deploy Cromwell in CAAS prod **Note:** Full details on how to complete each of these steps is found in the [Quick CAAS Deployment Guide](https://docs.google.com/document/d/1s0YC-oohJ7o-OGcgnH_-YBtIEKmLIPTRpG36yvWxUpE) ![caas-prod](caas-prod.dot.png) -## How to Generate and Publish Swagger Client Library +## Bonus Processes + +The swagger client library is not part of our core publish/release process but can be performed from time to time, as required. + +### How to Generate and Publish Swagger Client Library The first step is to generate the client library. From the root of the repo run diff --git a/processes/release_processes/firecloud-develop.dot b/processes/release_processes/firecloud-develop.dot index 922b95a544c..f33c8b50ce6 100644 --- a/processes/release_processes/firecloud-develop.dot +++ b/processes/release_processes/firecloud-develop.dot @@ -8,7 +8,7 @@ digraph { node [style=filled,color=white]; color="lightgrey" - release_cromwell [shape=oval label="PUBLISH: We have a new Cromwell version! Woohoo!"]; + release_cromwell [shape=oval label="PUBLISH: Run the publish script to create a new Cromwell version"]; } subgraph cluster_1 { @@ -44,7 +44,7 @@ digraph { } - fcdev_upnext [shape=oval label="Create a 'work in progress' firecloud-develop PR\nfor the next Cromwell version. Mark it 'Do Not Merge'"]; + update_caas [shape=oval label="Now follow the 'How to Deploy Cromwell in CAAS prod' Process"]; # Edges @@ -66,5 +66,5 @@ digraph { qa_perf -> dspjenkins_merge qa_perf -> fcdev_merge - fcdev_merge -> fcdev_upnext + fcdev_merge -> update_caas } diff --git a/processes/release_processes/firecloud-develop.dot.png b/processes/release_processes/firecloud-develop.dot.png index 052f282661c..180c9608a65 100644 Binary files a/processes/release_processes/firecloud-develop.dot.png and b/processes/release_processes/firecloud-develop.dot.png differ diff --git a/processes/release_processes/release-cromwell-version.dot b/processes/release_processes/release-cromwell-version.dot deleted file mode 100644 index 1753d1fc261..00000000000 --- a/processes/release_processes/release-cromwell-version.dot +++ /dev/null @@ -1,10 +0,0 @@ -digraph { - - # Nodes - - release_cromwell [shape=oval label="Run the release script"]; - - # Edges - - # TBD...? -} diff --git a/processes/release_processes/release-cromwell-version.dot.png b/processes/release_processes/release-cromwell-version.dot.png deleted file mode 100644 index a2d247a1cc6..00000000000 Binary files a/processes/release_processes/release-cromwell-version.dot.png and /dev/null differ diff --git a/project/ContinuousIntegration.scala b/project/ContinuousIntegration.scala index ec6a3107a42..e1ddf99e08d 100644 --- a/project/ContinuousIntegration.scala +++ b/project/ContinuousIntegration.scala @@ -1,3 +1,4 @@ +import Testing._ import sbt.Keys._ import sbt._ import sbt.io.Path._ @@ -5,14 +6,7 @@ import sbt.io.Path._ import scala.sys.process._ object ContinuousIntegration { - val copyCiResources: TaskKey[Unit] = taskKey[Unit](s"Copy CI resources.") - val renderCiResources: TaskKey[Unit] = taskKey[Unit](s"Render CI resources with Hashicorp Vault.") - - val srcCiResources: SettingKey[File] = settingKey[File]("Source directory for CI resources") - val targetCiResources: SettingKey[File] = settingKey[File]("Target directory for CI resources") - val vaultToken: SettingKey[File] = settingKey[File]("File with the vault token") - - val ciSettings: Seq[Setting[_]] = List( + lazy val ciSettings: Seq[Setting[_]] = List( srcCiResources := sourceDirectory.value / "ci" / "resources", targetCiResources := target.value / "ci" / "resources", vaultToken := userHome / ".vault-token", @@ -20,6 +14,7 @@ object ContinuousIntegration { IO.copyDirectory(srcCiResources.value, targetCiResources.value) }, renderCiResources := { + minnieKenny.toTask("").value copyCiResources.value val log = streams.value.log if (!vaultToken.value.exists()) { @@ -52,4 +47,62 @@ object ContinuousIntegration { } }, ) + + def aggregateSettings(rootProject: Project): Seq[Setting[_]] = List( + // Before compiling, check if the expected projects are aggregated so that they will be compiled-and-tested too. + compile in Compile := { + streams.value.log // make sure logger is loaded + validateAggregatedProjects(rootProject, state.value) + (compile in Compile).value + }, + ) + + private val copyCiResources: TaskKey[Unit] = taskKey[Unit](s"Copy CI resources.") + private val renderCiResources: TaskKey[Unit] = taskKey[Unit](s"Render CI resources with Hashicorp Vault.") + + private val srcCiResources: SettingKey[File] = settingKey[File]("Source directory for CI resources") + private val targetCiResources: SettingKey[File] = settingKey[File]("Target directory for CI resources") + private val vaultToken: SettingKey[File] = settingKey[File]("File with the vault token") + + /** + * For "reasons" these projects are excluded from the root aggregation in build.sbt. + */ + private val unaggregatedProjects = Map( + "cwlEncoder" -> "not sure what this is", + "hybridCarboniteMetadataService" -> "not sure why this is excluded", + ) + + /** + * Get the list of projects defined in build.sbt excluding the passed in root project. + */ + private def getBuildSbtNames(rootProject: Project, state: State): Set[String] = { + val extracted = Project.extract(state) + extracted.structure.units.flatMap({ + case (_, loadedBuildUnit) => loadedBuildUnit.defined.keys + }).toSet - rootProject.id + } + + /** + * Validates that projects are aggregated. + */ + private def validateAggregatedProjects(rootProject: Project, state: State): Unit = { + // Get the list of projects explicitly aggregated + val projectReferences: Seq[ProjectReference] = rootProject.aggregate + val localProjectReferences = projectReferences collect { + case localProject: LocalProject => localProject + } + val aggregatedNames = localProjectReferences.map(_.project).toSet + + val buildSbtNames = getBuildSbtNames(rootProject, state) + val missingNames = buildSbtNames.diff(aggregatedNames ++ unaggregatedProjects.keySet).toList.sorted + if (missingNames.nonEmpty) { + sys.error(s"There are projects defined in build.sbt that are not aggregated: ${missingNames.mkString(", ")}") + } + + val falseNames = unaggregatedProjects.filterKeys(aggregatedNames.contains) + if (falseNames.nonEmpty) { + val reasons = falseNames.map({case (name, reason) => s" ${name}: ${reason}"}).mkString("\n") + sys.error(s"There are projects aggregated in build.sbt that shouldn't be:\n$reasons") + } + } } diff --git a/project/Settings.scala b/project/Settings.scala index 0b6fa6427d8..048dc7ccc3e 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -178,7 +178,6 @@ object Settings { } } - // Adds settings to build the root project implicit class ProjectRootSettings(val project: Project) extends AnyVal { def withRootSettings(): Project = { @@ -195,6 +194,14 @@ object Settings { buildProject(project, "root", Nil, builders) } + + /** + * After aggregations have been added to the root project, we can do additional tasks like checking if every + * sub-project in build.sbt will also be tested by the root-aggregated `sbt test` command. + */ + def withAggregateSettings(): Project = { + project.settings(aggregateSettings(project)) + } } } diff --git a/project/Testing.scala b/project/Testing.scala index c9b32bb2c30..7868d52d958 100644 --- a/project/Testing.scala +++ b/project/Testing.scala @@ -2,6 +2,10 @@ import Dependencies._ import sbt.Defaults._ import sbt.Keys._ import sbt._ +import complete.DefaultParsers._ +import sbt.util.Logger + +import scala.sys.process._ object Testing { private val AllTests = config("alltests") extend Test @@ -21,6 +25,15 @@ object Testing { DbmsTestTag ) + val minnieKenny = inputKey[Unit]("Run minnie-kenny.") + + private val includeTestTags: Seq[String] = + sys.env + .get("CROMWELL_SBT_TEST_INCLUDE_TAGS") + .filter(_.nonEmpty) + .map(_.split(",").toList.map(_.trim)) + .getOrElse(Nil) + private val excludeTestTags: Seq[String] = sys.env .get("CROMWELL_SBT_TEST_EXCLUDE_TAGS") @@ -34,22 +47,70 @@ object Testing { The arguments that will be added to the default test config, but removed from all other configs. `sbt coverage test` adds other arguments added to generate the coverage reports. Tracking the arguments we add to the default allows one to later remove them when building up other configurations. - */ + */ + private val includeTestArgs = includeTestTags.map(Tests.Argument(TestFrameworks.ScalaTest, "-n", _)) private val excludeTestArgs = excludeTestTags.map(Tests.Argument(TestFrameworks.ScalaTest, "-l", _)) + private val filterTestArgs = if (includeTestArgs.nonEmpty) includeTestArgs else excludeTestArgs private val TestReportArgs = - Tests.Argument(TestFrameworks.ScalaTest, "-oDSI", "-h", "target/test-reports", "-u", "target/test-reports", "-F", spanScaleFactor) + Tests.Argument( + TestFrameworks.ScalaTest, + "-oDSI", + "-h", + "target/test-reports", + "-u", + "target/test-reports", + "-F", + spanScaleFactor, + "-W", + "300", + "300", + ) + + /** Run minnie-kenny only once per sbt invocation. */ + class MinnieKennySingleRunner() { + private val mutex = new Object + private var resultOption: Option[Int] = None + + /** Run using the logger, throwing an exception only on the first failure. */ + def runOnce(log: Logger, args: Seq[String]): Unit = { + mutex synchronized { + if (resultOption.isEmpty) { + log.debug(s"Running minnie-kenny.sh${args.mkString(" ", " ", "")}") + val result = ("./minnie-kenny.sh" +: args) ! log + resultOption = Option(result) + if (result == 0) + log.debug("Successfully ran minnie-kenny.sh") + else + sys.error("Running minnie-kenny.sh failed. Please double check for errors above.") + } + } + } + } + + // Only run one minnie-kenny.sh at a time! + private lazy val minnieKennySingleRunner = new MinnieKennySingleRunner val testSettings = List( libraryDependencies ++= testDependencies.map(_ % Test), - // `test` (or `assembly`) - Run all tests, except docker and integration and DBMS - testOptions in Test ++= Seq(TestReportArgs) ++ excludeTestArgs, + // `test` (or `assembly`) - Run most tests + testOptions in Test ++= Seq(TestReportArgs) ++ filterTestArgs, // `alltests:test` - Run all tests - testOptions in AllTests := (testOptions in Test).value.diff(excludeTestArgs), + testOptions in AllTests := (testOptions in Test).value.diff(filterTestArgs), // Add scalameter as a test framework in the CromwellBenchmarkTest scope testFrameworks in CromwellBenchmarkTest += new TestFramework("org.scalameter.ScalaMeterFramework"), // Don't execute benchmarks in parallel - parallelExecution in CromwellBenchmarkTest := false + parallelExecution in CromwellBenchmarkTest := false, + // Make sure no secrets are commited to git + minnieKenny := { + val log = streams.value.log + val args = spaceDelimited("").parsed + minnieKennySingleRunner.runOnce(log, args) + }, + test in Test := { + minnieKenny.toTask("").value + (test in Test).value + }, ) val integrationTestSettings = List( diff --git a/project/Version.scala b/project/Version.scala index 7e6fb33efbf..d54b8e35969 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -5,7 +5,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "45" + val cromwellVersion = "46" /** * Returns true if this project should be considered a snapshot. diff --git a/scripts/docker-compose-mysql/docker-compose-horicromtal.yml b/scripts/docker-compose-mysql/docker-compose-horicromtal.yml index f33b519ff1a..ab22700171b 100644 --- a/scripts/docker-compose-mysql/docker-compose-horicromtal.yml +++ b/scripts/docker-compose-mysql/docker-compose-horicromtal.yml @@ -32,8 +32,6 @@ services: - CROMWELL_BUILD_RESOURCES_DIRECTORY - CROMWELL_BUILD_CENTAUR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_JDBC_DRIVER - - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME - - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD - CROMWELL_BUILD_CENTAUR_JDBC_URL - CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER @@ -61,8 +59,6 @@ services: - CROMWELL_BUILD_RESOURCES_DIRECTORY - CROMWELL_BUILD_CENTAUR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_JDBC_DRIVER - - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME - - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD - CROMWELL_BUILD_CENTAUR_JDBC_URL - CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER @@ -97,8 +93,6 @@ services: - CROMWELL_BUILD_RESOURCES_DIRECTORY - CROMWELL_BUILD_CENTAUR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_JDBC_DRIVER - - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME - - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD - CROMWELL_BUILD_CENTAUR_JDBC_URL - CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER diff --git a/server/src/main/scala/cromwell/CommandLineParser.scala b/server/src/main/scala/cromwell/CommandLineParser.scala index 78c9019a309..80108e0586d 100644 --- a/server/src/main/scala/cromwell/CommandLineParser.scala +++ b/server/src/main/scala/cromwell/CommandLineParser.scala @@ -30,7 +30,7 @@ object CommandLineParser { // -l, --labels Workflow labels file. // -p, --imports A directory or zipfile to search for workflow imports. // -m, --metadata-output -// An optional directory path to output metadata. +// An optional JSON file path to output metadata. // -h, --host Cromwell server URL class CommandLineParser extends scopt.OptionParser[CommandLineArguments]("java -jar /path/to/cromwell.jar") { @@ -77,7 +77,7 @@ class CommandLineParser extends scopt.OptionParser[CommandLineArguments]("java - children( commonSubmissionArguments ++ List( opt[String]('m', "metadata-output").text( - "An optional directory path to output metadata."). + "An optional JSON file path to output metadata."). action((s, c) => c.copy(metadataOutput = Option(DefaultPathBuilder.get(s)))) ): _* diff --git a/server/src/main/scala/cromwell/CromwellEntryPoint.scala b/server/src/main/scala/cromwell/CromwellEntryPoint.scala index 3e31de8499b..8a2938935a3 100644 --- a/server/src/main/scala/cromwell/CromwellEntryPoint.scala +++ b/server/src/main/scala/cromwell/CromwellEntryPoint.scala @@ -200,11 +200,7 @@ object CromwellEntryPoint extends GracefulStopSupport { val validation = args.validateSubmission(EntryPointLogger) map { case ValidSubmission(w, u, r, i, o, l, z) => - val finalWorkflowSourceAndUrl: WorkflowSourceOrUrl = { - if (w.isDefined) WorkflowSourceOrUrl(w,u) // submission has CWL workflow file path and no imports - else if (u.get.startsWith("http")) WorkflowSourceOrUrl(w, u) - else WorkflowSourceOrUrl(Option(DefaultPathBuilder.get(u.get).contentAsString), None) //case where url is a WDL/CWL file - } + val finalWorkflowSourceAndUrl: WorkflowSourceOrUrl = getFinalWorkflowSourceAndUrl(w, u) WorkflowSingleSubmission( workflowSource = finalWorkflowSourceAndUrl.source, @@ -225,10 +221,11 @@ object CromwellEntryPoint extends GracefulStopSupport { val sourceFileCollection = (args.validateSubmission(EntryPointLogger), writeableMetadataPath(args.metadataOutput)) mapN { case (ValidSubmission(w, u, r, i, o, l, Some(z)), _) => + val finalWorkflowSourceAndUrl: WorkflowSourceOrUrl = getFinalWorkflowSourceAndUrl(w, u) //noinspection RedundantDefaultArgument WorkflowSourceFilesWithDependenciesZip.apply( - workflowSource = w, - workflowUrl = u, + workflowSource = finalWorkflowSourceAndUrl.source, + workflowUrl = finalWorkflowSourceAndUrl.url, workflowRoot = r, workflowType = args.workflowType, workflowTypeVersion = args.workflowTypeVersion, @@ -239,10 +236,11 @@ object CromwellEntryPoint extends GracefulStopSupport { warnings = Vector.empty, workflowOnHold = false) case (ValidSubmission(w, u, r, i, o, l, None), _) => + val finalWorkflowSourceAndUrl: WorkflowSourceOrUrl = getFinalWorkflowSourceAndUrl(w, u) //noinspection RedundantDefaultArgument WorkflowSourceFilesWithoutImports.apply( - workflowSource = w, - workflowUrl = u, + workflowSource = finalWorkflowSourceAndUrl.source, + workflowUrl = finalWorkflowSourceAndUrl.url, workflowRoot = r, workflowType = args.workflowType, workflowTypeVersion = args.workflowTypeVersion, @@ -268,6 +266,12 @@ object CromwellEntryPoint extends GracefulStopSupport { }) } + private def getFinalWorkflowSourceAndUrl(workflowSource: Option[String], workflowUrl: Option[String]): WorkflowSourceOrUrl = { + if (workflowSource.isDefined) WorkflowSourceOrUrl(workflowSource, workflowUrl) // submission has CWL workflow file path and no imports + else if (workflowUrl.get.startsWith("http")) WorkflowSourceOrUrl(workflowSource, workflowUrl) + else WorkflowSourceOrUrl(Option(DefaultPathBuilder.get(workflowUrl.get).contentAsString), None) //case where url is a WDL/CWL file + } + private def writeableMetadataPath(path: Option[Path]): ErrorOr[Unit] = { path match { case Some(p) if !metadataPathIsWriteable(p) => s"Unable to write to metadata directory: $p".invalidNel diff --git a/server/src/test/scala/cromwell/CromwellCommandLineSpec.scala b/server/src/test/scala/cromwell/CromwellCommandLineSpec.scala index f15ea0b73ee..2227306bdac 100644 --- a/server/src/test/scala/cromwell/CromwellCommandLineSpec.scala +++ b/server/src/test/scala/cromwell/CromwellCommandLineSpec.scala @@ -60,8 +60,8 @@ class CromwellCommandLineSpec extends FlatSpec with Matchers with BeforeAndAfter val validation = Try(CromwellEntryPoint.validateRunArguments(optionsFirst)) validation.isSuccess shouldBe true - validation.get.workflowSource shouldBe None - validation.get.workflowUrl shouldBe Some(threeStep.wdl) + validation.get.workflowSource shouldBe Some(threeStep.sampleWdl.workflowSource()) + validation.get.workflowUrl shouldBe None } it should "run single when supplying workflow using url" in { diff --git a/server/src/test/scala/cromwell/CromwellTestKitSpec.scala b/server/src/test/scala/cromwell/CromwellTestKitSpec.scala index 68bb814faa2..9eb606a2d76 100644 --- a/server/src/test/scala/cromwell/CromwellTestKitSpec.scala +++ b/server/src/test/scala/cromwell/CromwellTestKitSpec.scala @@ -27,8 +27,7 @@ import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataService._ import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.util.SampleWdl -import cromwell.webservice.metadata.MetadataBuilderActor -import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} import org.scalactic.Equality import org.scalatest._ import org.scalatest.concurrent.{Eventually, ScalaFutures} @@ -47,6 +46,7 @@ case class OutputNotFoundException(outputFqn: String, actualOutputs: String) ext case class LogNotFoundException(log: String) extends RuntimeException(s"Expected log $log was not found") object CromwellTestKitSpec { + val ConfigText = """ |akka { @@ -158,6 +158,7 @@ object CromwellTestKitSpec { } } + /** * Special case for validating outputs. Used when the test wants to check that an output exists, but doesn't care what * the actual value was. @@ -327,7 +328,9 @@ abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = default labels = customLabels ) val workflowId = rootActor.underlyingActor.submitWorkflow(sources) - eventually { verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) + eventually { verifyWorkflowComplete(rootActor.underlyingActor.serviceRegistryActor, workflowId) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) + verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) + val outcome = getWorkflowOutputsFromMetadata(workflowId, rootActor.underlyingActor.serviceRegistryActor) system.stop(rootActor) // And return the outcome: @@ -348,7 +351,8 @@ abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = default val sources = sampleWdl.asWorkflowSources(runtime, workflowOptions) val workflowId = rootActor.underlyingActor.submitWorkflow(sources) - eventually { verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) + eventually { verifyWorkflowComplete(rootActor.underlyingActor.serviceRegistryActor, workflowId) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) + verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, WorkflowSucceeded) val outputs = getWorkflowOutputsFromMetadata(workflowId, rootActor.underlyingActor.serviceRegistryActor) val actualOutputNames = outputs.keys mkString ", " @@ -369,32 +373,38 @@ abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = default workflowId } + private def getWorkflowState(workflowId: WorkflowId, serviceRegistryActor: ActorRef)(implicit ec: ExecutionContext): WorkflowState = { + val statusResponse = serviceRegistryActor.ask(GetStatus(workflowId))(TimeoutDuration).collect { + case BuiltMetadataResponse(_, jsObject) => WorkflowState.withName(jsObject.fields("status").asInstanceOf[JsString].value) + case f => throw new RuntimeException(s"Unexpected status response for $workflowId: $f") + } + Await.result(statusResponse, TimeoutDuration) + } + /** - * Verifies that a state is correct. // TODO: There must be a better way...? + * Verifies that a workflow is complete */ - protected def verifyWorkflowState(serviceRegistryActor: ActorRef, workflowId: WorkflowId, expectedState: WorkflowState)(implicit ec: ExecutionContext): Unit = { - def getWorkflowState(workflowId: WorkflowId, serviceRegistryActor: ActorRef)(implicit ec: ExecutionContext): WorkflowState = { - val statusResponse = serviceRegistryActor.ask(GetStatus(workflowId))(TimeoutDuration).collect { - case StatusLookupResponse(_, state) => state - case f => throw new RuntimeException(s"Unexpected status response for $workflowId: $f") - } - Await.result(statusResponse, TimeoutDuration) - } + protected def verifyWorkflowComplete(serviceRegistryActor: ActorRef, workflowId: WorkflowId)(implicit ec: ExecutionContext): Unit = { + List(WorkflowSucceeded, WorkflowFailed, WorkflowAborted) should contain(getWorkflowState(workflowId, serviceRegistryActor)) + () + } + /** + * Verifies that a state is correct. + */ + protected def verifyWorkflowState(serviceRegistryActor: ActorRef, workflowId: WorkflowId, expectedState: WorkflowState)(implicit ec: ExecutionContext): Unit = { getWorkflowState(workflowId, serviceRegistryActor) should equal (expectedState) () } private def getWorkflowOutputsFromMetadata(id: WorkflowId, serviceRegistryActor: ActorRef): Map[FullyQualifiedName, WomValue] = { - val mba = system.actorOf(MetadataBuilderActor.props(serviceRegistryActor)) - val response = mba.ask(WorkflowOutputs(id)).mapTo[MetadataBuilderActorResponse] collect { - case BuiltMetadataResponse(r) => r - case FailedMetadataResponse(e) => throw e + + val response = serviceRegistryActor.ask(WorkflowOutputs(id)).mapTo[MetadataBuilderActorResponse] collect { + case BuiltMetadataResponse(_, r) => r + case FailedMetadataResponse(_, e) => throw e } val jsObject = Await.result(response, TimeoutDuration) - system.stop(mba) - jsObject.getFields(WorkflowMetadataKeys.Outputs).toList match { case head::_ => head.asInstanceOf[JsObject].fields.map( x => (x._1, jsValueToWdlValue(x._2))) case _ => Map.empty diff --git a/server/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala b/server/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala index d512006fed0..4626977b069 100644 --- a/server/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala @@ -1,6 +1,7 @@ package cromwell.engine import java.time.OffsetDateTime +import java.util.UUID import akka.testkit._ import cats.data.{NonEmptyList, NonEmptyVector} @@ -20,8 +21,8 @@ import cromwell.engine.workflow.workflowstore._ import cromwell.services.EngineServicesStore import cromwell.services.ServicesStore.EnhancedSqlDatabase import cromwell.services.metadata.MetadataQuery -import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse} -import cromwell.services.metadata.impl.ReadMetadataActor +import cromwell.services.metadata.MetadataService.{GetMetadataAction, MetadataLookupResponse} +import cromwell.services.metadata.impl.ReadDatabaseMetadataWorkerActor import cromwell.util.EncryptionSpec import cromwell.util.SampleWdl.HelloWorld import cromwell.{CromwellTestKitSpec, CromwellTestKitWordSpec} @@ -185,10 +186,6 @@ class WorkflowStoreActorSpec extends CromwellTestKitWordSpec with CoordinatedWor ), "WorkflowStoreActor-FetchEncryptedWorkflowOptions" ) - val readMetadataActor = system.actorOf( - ReadMetadataActor.props(metadataReadTimeout = 30 seconds), - "ReadMetadataActor-FetchEncryptedOptions" - ) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(optionedSourceFiles)) val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList @@ -213,8 +210,14 @@ class WorkflowStoreActorSpec extends CromwellTestKitWordSpec with CoordinatedWor Seq("iv", "ciphertext") // We need to wait for workflow metadata to be flushed before we can successfully query for it - eventually(timeout(15 seconds), interval(5 seconds)) { - readMetadataActor ! GetMetadataQueryAction(MetadataQuery.forWorkflow(id)) + eventually(timeout(15.seconds.dilated), interval(500.millis.dilated)) { + val actorNameUniquificationString = UUID.randomUUID().toString.take(7) + val readMetadataActor = system.actorOf( + ReadDatabaseMetadataWorkerActor.props(metadataReadTimeout = 30 seconds), + s"ReadMetadataActor-FetchEncryptedOptions-$actorNameUniquificationString" + ) + + readMetadataActor ! GetMetadataAction(MetadataQuery.forWorkflow(id)) expectMsgPF(10 seconds) { case MetadataLookupResponse(_, eventList) => val optionsEvent = eventList.find(_.key.key == "submittedFiles:options").get diff --git a/server/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala deleted file mode 100644 index 29ddac905a0..00000000000 --- a/server/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ /dev/null @@ -1,332 +0,0 @@ -package cromwell.engine.workflow - -import java.time.OffsetDateTime - -import akka.actor._ -import akka.pattern.ask -import akka.stream.ActorMaterializer -import akka.testkit._ -import akka.util.Timeout -import cromwell.CromwellTestKitSpec._ -import cromwell._ -import cromwell.core.path.{DefaultPathBuilder, Path} -import cromwell.core.{SimpleIoActor, WorkflowSourceFilesCollection} -import cromwell.engine.MockCromwellTerminator -import cromwell.engine.backend.BackendSingletonCollection -import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow -import cromwell.engine.workflow.SingleWorkflowRunnerActorSpec._ -import cromwell.engine.workflow.tokens.DynamicRateLimiter.Rate -import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor -import cromwell.engine.workflow.workflowstore._ -import cromwell.util.SampleWdl -import cromwell.util.SampleWdl.{ExpressionsInInputs, GoodbyeWorld, ThreeStep} -import mouse.all._ -import org.scalatest.prop.{TableDrivenPropertyChecks, TableFor3} -import org.specs2.mock.Mockito -import spray.json._ - -import scala.concurrent.Await -import scala.concurrent.duration._ -import scala.util._ -import scala.util.control.NoStackTrace - -/** - * A series of tests of the SingleWorkflowRunnerActor. Currently uses live versions of the SingleWorkflowRunnerActor and - * the WorkflowManagerActor communicating with each other, instead of TestActor/TestProbe. - * - * Currently, as instance of the actor system are created via an instance of CromwellTestkitSpec, and the - * SingleWorkflowRunnerActor also tests halting its actor system, each spec is currently in a separate instance of the - * CromwellTestKitSpec. - */ -object SingleWorkflowRunnerActorSpec { - - def tempFile() = DefaultPathBuilder.createTempFile("metadata.", ".json") - - def tempDir() = DefaultPathBuilder.createTempDirectory("metadata.dir.") - - implicit class OptionJsValueEnhancer(val jsValue: Option[JsValue]) extends AnyVal { - def toOffsetDateTime = OffsetDateTime.parse(jsValue.toStringValue) - def toStringValue = jsValue.getOrElse(JsString("{}")).asInstanceOf[JsString].value - def toFields = jsValue.get.asJsObject.fields - } - - class TestSingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, - metadataOutputPath: Option[Path])(implicit materializer: ActorMaterializer) - extends SingleWorkflowRunnerActor( - source = source, - metadataOutputPath = metadataOutputPath, - terminator = MockCromwellTerminator, - gracefulShutdown = false, - abortJobsOnTerminate = false, - config = CromwellTestKitSpec.DefaultConfig - ) { - override lazy val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance - override private [workflow] def done() = context.stop(self) - } -} - -abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitWordSpec with CoordinatedWorkflowStoreBuilder with Mockito { - private val workflowHeartbeatConfig = WorkflowHeartbeatConfig(CromwellTestKitSpec.DefaultConfig) - val store = new InMemoryWorkflowStore - private val workflowStore = - system.actorOf( - WorkflowStoreActor.props( - store, - store |> access, - dummyServiceRegistryActor, - MockCromwellTerminator, - abortAllJobsOnTerminate = false, - workflowHeartbeatConfig - ), - "WorkflowStoreActor" - ) - private val serviceRegistry = TestProbe("ServiceRegistryProbe").ref - private val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props, "AlwaysHappyJobStoreActor") - private val ioActor = system.actorOf(SimpleIoActor.props, "SimpleIoActor") - private val subWorkflowStore = system.actorOf( - AlwaysHappySubWorkflowStoreActor.props, - "AlwaysHappySubWorkflowStoreActor" - ) - private val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props, "EmptyCallCacheReadActor") - private val callCacheWriteActor = system.actorOf(EmptyCallCacheWriteActor.props, "EmptyCallCacheWriteActor") - private val dockerHashActor = system.actorOf(EmptyDockerHashActor.props, "EmptyDockerHashActor") - private val jobTokenDispenserActor = system.actorOf( - JobExecutionTokenDispenserActor.props(serviceRegistry, Rate(100, 1.second), None), - "JobExecutionTokenDispenserActor" - ) - - - def workflowManagerActor(): ActorRef = { - val params = WorkflowManagerActorParams( - CromwellTestKitSpec.DefaultConfig, - workflowStore = workflowStore, - ioActor = ioActor, - serviceRegistryActor = dummyServiceRegistryActor, - workflowLogCopyRouter = dummyLogCopyRouter, - jobStoreActor = jobStore, - subWorkflowStoreActor = subWorkflowStore, - callCacheReadActor = callCacheReadActor, - callCacheWriteActor = callCacheWriteActor, - dockerHashActor = dockerHashActor, - jobTokenDispenserActor = jobTokenDispenserActor, - backendSingletonCollection = BackendSingletonCollection(Map.empty), - serverMode = false, - workflowHeartbeatConfig) - system.actorOf(Props(new WorkflowManagerActor(params)), "WorkflowManagerActor") - } - - def createRunnerActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), - outputFile: => Option[Path] = None): ActorRef = { - system.actorOf( - Props(new TestSingleWorkflowRunnerActor(sampleWdl.asWorkflowSources(), outputFile)), - "TestSingleWorkflowRunnerActor" - ) - } - - def singleWorkflowActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), - outputFile: => Option[Path] = None): Unit = { - val actorRef = createRunnerActor(sampleWdl, managerActor, outputFile) - val futureResult = actorRef.ask(RunWorkflow)(timeout = new Timeout(TimeoutDuration)) - Await.ready(futureResult, Duration.Inf) - () - } -} - -class SingleWorkflowRunnerActorNormalSpec extends SingleWorkflowRunnerActorSpec { - "A SingleWorkflowRunnerActor" should { - "successfully run a workflow" in { - within(TimeoutDuration) { - waitForInfo("workflow finished with status 'Succeeded'.") { - singleWorkflowActor() - } - } - } - } -} - -class SingleWorkflowRunnerActorWithMetadataSpec extends SingleWorkflowRunnerActorSpec with TableDrivenPropertyChecks { - val metadataFile = tempFile() - - override protected def afterAll() = { - metadataFile.delete(swallowIOExceptions = true) - super.afterAll() - } - - private def doTheTest(wdlFile: SampleWdl, expectedCalls: TableFor3[String, Long, Long], workflowInputs: Long, workflowOutputs: Long) = { - val testStart = OffsetDateTime.now - within(TimeoutDuration) { - singleWorkflowActor( - sampleWdl = wdlFile, - outputFile = Option(metadataFile)) - } - eventually { - val metadataFileContent = metadataFile.contentAsString - val metadata = metadataFileContent.parseJson.asJsObject.fields - metadata.get("id") shouldNot be(empty) - metadata.get("status").toStringValue should be("Succeeded") - metadata.get("submission").toOffsetDateTime should be >= testStart - val workflowStart = metadata.get("start").toOffsetDateTime - workflowStart should be >= metadata.get("submission").toOffsetDateTime - val workflowEnd = metadata.get("end").toOffsetDateTime - workflowEnd should be >= metadata.get("start").toOffsetDateTime - metadata.get("inputs").toFields should have size workflowInputs - metadata.get("outputs").toFields should have size workflowOutputs - val calls = metadata.get("calls").toFields - calls should not be empty - - forAll(expectedCalls) { (callName, numInputs, numOutputs) => - val callSeq = calls(callName).asInstanceOf[JsArray].elements - callSeq should have size 1 - val call = callSeq.head.asJsObject.fields - val inputs = call.get("inputs").toFields - inputs should have size numInputs - call.get("executionStatus").toStringValue should be("Done") - call.get("backend").toStringValue should be("Local") - call.get("backendStatus").toStringValue should be("Done") - call.get("outputs").toFields should have size numOutputs - val callStart = call.get("start").toOffsetDateTime - callStart should be >= workflowStart - val callEnd = call.get("end").toOffsetDateTime - callEnd should be >= callStart - callEnd should be <= workflowEnd - call.get("jobId") shouldNot be(empty) - call("returnCode").asInstanceOf[JsNumber].value should be(0) - call.get("stdout") shouldNot be(empty) - call.get("stderr") shouldNot be(empty) - call("attempt").asInstanceOf[JsNumber].value should be(1) - } - } - } - - "A SingleWorkflowRunnerActor" should { - // TODO WOM: needs FQNs - "successfully run a workflow outputting metadata" in { - val expectedCalls = Table( - ("callName", "numInputs", "numOutputs"), - ("three_step.wc", 1L, 1L), - ("three_step.ps", 0L, 1L), - ("three_step.cgrep", 2L, 1L)) - - doTheTest(ThreeStep, expectedCalls, 1L, 3L) - } - "run a workflow outputting metadata with no remaining input expressions" in { - val expectedCalls = Table( - ("callName", "numInputs", "numOutputs"), - ("wf.echo", 1L, 1L), - ("wf.echo2", 1L, 1L)) - doTheTest(ExpressionsInInputs, expectedCalls, 2L, 2L) - } - } -} - -class SingleWorkflowRunnerActorWithMetadataOnFailureSpec extends SingleWorkflowRunnerActorSpec { - val metadataFile = tempFile() - - override protected def afterAll() = { - metadataFile.delete(swallowIOExceptions = true) - super.afterAll() - } - - "A SingleWorkflowRunnerActor" should { - "fail to run a workflow and still output metadata" in { - val testStart = OffsetDateTime.now - within(TimeoutDuration) { - singleWorkflowActor(sampleWdl = GoodbyeWorld, outputFile = Option(metadataFile)) - } - - val metadata = metadataFile.contentAsString.parseJson.asJsObject.fields - metadata.get("id") shouldNot be(empty) - metadata.get("status").toStringValue should be("Failed") - val workflowStart = metadata.get("start").toOffsetDateTime - workflowStart should be >= metadata.get("submission").toOffsetDateTime - val workflowEnd = metadata.get("end").toOffsetDateTime - workflowEnd should be >= metadata.get("start").toOffsetDateTime - metadata.get("submission").toOffsetDateTime should be >= testStart - metadata.get("inputs").toFields should have size 0 - metadata.get("outputs").toFields should have size 0 - val calls = metadata.get("calls").toFields - calls should not be empty - - val callSeq = calls("wf_goodbye.goodbye").asInstanceOf[JsArray].elements - callSeq should have size 1 - val call = callSeq.head.asJsObject.fields - val inputs = call.get("inputs").toFields - inputs should have size 0 - call.get("executionStatus").toStringValue should be("Failed") - call.get("backend").toStringValue should be("Local") - call.get("backendStatus").toStringValue should be("Done") - call.get("outputs") shouldBe empty - val callStart = call.get("start").toOffsetDateTime - callStart should be >= workflowStart - val callEnd = call.get("end").toOffsetDateTime - callEnd should be >= callStart - callEnd should be <= workflowEnd - call.get("jobId") shouldNot be(empty) - call("returnCode").asInstanceOf[JsNumber].value shouldNot be (0) - call.get("stdout") shouldNot be(empty) - call.get("stderr") shouldNot be(empty) - call("attempt").asInstanceOf[JsNumber].value should be (1) - call("failures").asInstanceOf[JsArray].elements shouldNot be(empty) - } - } -} - -class SingleWorkflowRunnerActorWithBadMetadataSpec extends SingleWorkflowRunnerActorSpec { - val metadataDir = tempDir() - - override protected def afterAll() = { - metadataDir.delete(swallowIOExceptions = true) - super.afterAll() - } - - "A SingleWorkflowRunnerActor" should { - "successfully run a workflow requesting a bad metadata path" in { - within(TimeoutDuration) { - val runner = createRunnerActor(outputFile = Option(metadataDir)) - waitForErrorWithException(s"Specified metadata path is a directory, should be a file: $metadataDir") { - val futureResult = runner.ask(RunWorkflow)(30.seconds.dilated, implicitly) - Await.ready(futureResult, Duration.Inf) - futureResult.value.get match { - case Success(_) => - case Failure(e) => - e.printStackTrace() - fail(e) - } - } - } - } - } -} - -class SingleWorkflowRunnerActorFailureSpec extends SingleWorkflowRunnerActorSpec { - "A SingleWorkflowRunnerActor" should { - "successfully terminate the system on an exception" in { - within(TimeoutDuration) { - val runner = createRunnerActor() - val futureResult = runner ? RunWorkflow - val ex = new RuntimeException("expected error") with NoStackTrace - runner ! Status.Failure(ex) - Await.ready(futureResult, Duration.Inf) - futureResult.value.get match { - case Success(_) => fail("Unexpected success") - case Failure(e) => e.getMessage should include("expected error") - } - } - } - } -} - -class SingleWorkflowRunnerActorUnexpectedSpec extends SingleWorkflowRunnerActorSpec { - "A SingleWorkflowRunnerActor" should { - "successfully warn about unexpected output" in { - within(TimeoutDuration) { - val runner = createRunnerActor() - waitForWarning("SingleWorkflowRunnerActor: received unexpected message: expected unexpected") { - runner ? RunWorkflow - runner ! "expected unexpected" - } - assert(!system.whenTerminated.isCompleted) - } - } - } -} diff --git a/server/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala index f5a93b10fb8..a970767b2f5 100644 --- a/server/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala @@ -3,7 +3,7 @@ package cromwell.engine.workflow.lifecycle import akka.actor.Props import akka.testkit.TestDuration import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestKitWordSpec +import cromwell.{CromwellTestKitSpec, CromwellTestKitWordSpec} import cromwell.core.CromwellGraphNode._ import cromwell.core.labels.Labels import cromwell.core._ @@ -28,7 +28,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestKitWordSpec wit |backend { | default = "Local" |} - """.stripMargin) + |""".stripMargin + ).withFallback(CromwellTestKitSpec.DefaultConfig) val differentDefaultBackendConf = ConfigFactory.parseString( """ |backend { @@ -39,9 +40,10 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestKitWordSpec wit | SpecifiedBackend {} | } |} - """.stripMargin) + |""".stripMargin + ).withFallback(CromwellTestKitSpec.DefaultConfig) val unstructuredFile = "fubar badness!" - val validOptions = WorkflowOptions.fromJsonString(""" { "write_to_cache": "true" } """).get + val validOptions = WorkflowOptions.fromJsonString(""" { "write_to_cache": true } """).get val validCustomLabelsFile="""{ "label1": "value1", "label2": "value2", "Label1": "valu£1" }""" val badCustomLabelsFile="""{ "key with characters more than 255-at vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpas": "value with characters more than 255-at vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa" }""" diff --git a/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala b/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala index 77690b23724..c88c72bf590 100644 --- a/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala @@ -107,7 +107,15 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, Option(helper.ejhaProbe.ref), cacheHit)) + ejea.stateData should be(ResponsePendingData( + helper.backendJobDescriptor, + helper.bjeaProps, + initialHashData, + Option(helper.ejhaProbe.ref), + cacheHit, + None, + 1, + )) } s"not invalidate a call for caching if backend coping failed when invalidation is disabled, when it was going to receive $hashComboName, if call caching is $mode" in { @@ -147,7 +155,15 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, finalHashData, Option(helper.ejhaProbe.ref), cacheHit)) + ejea.stateData should be(ResponsePendingData( + helper.backendJobDescriptor, + helper.bjeaProps, + finalHashData, + Option(helper.ejhaProbe.ref), + cacheHit, + None, + 1, + )) } } } diff --git a/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorTransitionsSpec.scala b/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorTransitionsSpec.scala new file mode 100644 index 00000000000..c0d5d51edbb --- /dev/null +++ b/server/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorTransitionsSpec.scala @@ -0,0 +1,62 @@ +package cromwell.engine.workflow.lifecycle.execution.ejea + +import cromwell.engine.workflow.lifecycle.execution.job.EngineJobExecutionActor._ +import org.scalatest.{FlatSpec, Matchers} + +class EngineJobExecutionActorTransitionsSpec extends FlatSpec with Matchers { + + val callCachingStateCycle = List( + CheckingCallCache, + FetchingCachedOutputsFromDatabase, + BackendIsCopyingCachedOutputs, + InvalidatingCacheEntry + ) + + "EngineJobExecutionActor transitions" should "not list all cache cycle iterations" in { + + import EngineJobExecutionActorTransitionsSpec.MultipliableList + + val cacheReadCycles = 5 + + val longCallCachingCycleStateSequence = List( + Pending, + RequestingExecutionToken, + CheckingJobStore, + CheckingCallCache, + FetchingCachedOutputsFromDatabase, + CheckingCacheEntryExistence) ++ callCachingStateCycle * cacheReadCycles ++ List( + WaitingForValueStore, + PreparingJob, + RunningJob, + UpdatingCallCache, + UpdatingJobStore + ) + + longCallCachingCycleStateSequence.length should be(11 + cacheReadCycles * callCachingStateCycle.size) + + val transitionSequence = longCallCachingCycleStateSequence.sliding(2) map { + case fromState :: toState :: _ => EngineJobExecutionActorState.transitionEventString(fromState, toState) + case _ => fail("Programmer blunder. This test writer had one job to do...") + } collect { + case Some(stateName) => stateName + } + + transitionSequence.toList should be(List( + // "Pending", <-- NB: There's no transition into "Pending" because that was the start state + "RequestingExecutionToken", + "CheckingJobStore", + "CallCacheReading", + "WaitingForValueStore", + "PreparingJob", + "RunningJob", + "UpdatingCallCache", + "UpdatingJobStore", + )) + } +} + +object EngineJobExecutionActorTransitionsSpec { + implicit class MultipliableList[A](val list: List[A]) extends AnyVal { + final def *(i: Int): List[A] = if (i == 0 ) List.empty else if (i == 1) list else list ++ (list * (i - 1)) + } +} diff --git a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala index 8d66bfbe66a..87d3c62b9b9 100644 --- a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala +++ b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala @@ -32,7 +32,6 @@ object CallMetadataKeys { val ReadResultMetadataKey = CallCaching + MetadataKey.KeySeparator + "result" val HitResultMetadataKey = CallCaching + MetadataKey.KeySeparator + "hit" val AllowReuseMetadataKey = CallCaching + MetadataKey.KeySeparator + "allowResultReuse" - val HitFailuresKey = CallCaching + MetadataKey.KeySeparator + "hitFailures" val HashFailuresKey = CallCaching + MetadataKey.KeySeparator + "hashFailures" val HashesKey = CallCaching + MetadataKey.KeySeparator + "hashes" } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala index 2a91afea3e3..d3264b839c3 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala @@ -94,7 +94,9 @@ object MetadataQueryJobKey { def forMetadataJobKey(jobKey: MetadataJobKey) = MetadataQueryJobKey(jobKey.callFqn, jobKey.index, Option(jobKey.attempt)) } -case class MetadataQuery(workflowId: WorkflowId, jobKey: Option[MetadataQueryJobKey], key: Option[String], +case class MetadataQuery(workflowId: WorkflowId, + jobKey: Option[MetadataQueryJobKey], + key: Option[String], includeKeysOption: Option[NonEmptyList[String]], excludeKeysOption: Option[NonEmptyList[String]], expandSubWorkflows: Boolean) diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index 9fb078de984..bad99484d91 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -2,6 +2,7 @@ package cromwell.services.metadata import java.time.OffsetDateTime +import io.circe.Json import akka.actor.ActorRef import cats.data.NonEmptyList import cromwell.core._ @@ -12,7 +13,6 @@ import wom.values._ import scala.util.Random - object MetadataService { final val MetadataServiceName = "MetadataService" @@ -38,7 +38,12 @@ object MetadataService { trait MetadataServiceAction extends MetadataServiceMessage with ServiceRegistryMessage { def serviceName = MetadataServiceName } - trait ReadAction extends MetadataServiceAction + trait MetadataReadAction extends MetadataServiceAction + + trait WorkflowMetadataReadAction extends MetadataReadAction { + def workflowId: WorkflowId + } + object PutMetadataAction { def apply(event: MetadataEvent, others: MetadataEvent*) = new PutMetadataAction(List(event) ++ others) } @@ -82,17 +87,25 @@ object MetadataService { final case object ListenToMetadataWriteActor extends MetadataServiceAction with ListenToMessage - final case class GetSingleWorkflowMetadataAction(workflowId: WorkflowId, - includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]], - expandSubWorkflows: Boolean) - extends ReadAction - final case class GetMetadataQueryAction(key: MetadataQuery) extends ReadAction - final case class GetStatus(workflowId: WorkflowId) extends ReadAction - final case class GetLabels(workflowId: WorkflowId) extends ReadAction - final case class WorkflowQuery(parameters: Seq[(String, String)]) extends ReadAction - final case class WorkflowOutputs(workflowId: WorkflowId) extends ReadAction - final case class GetLogs(workflowId: WorkflowId) extends ReadAction + // Utility object to get GetMetadataAction's for a workflow-only query: + object GetSingleWorkflowMetadataAction { + def apply(workflowId: WorkflowId, + includeKeysOption: Option[NonEmptyList[String]], + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean): WorkflowMetadataReadAction = { + GetMetadataAction(MetadataQuery(workflowId, None, None, includeKeysOption, excludeKeysOption, expandSubWorkflows)) + } + } + + + final case class GetMetadataAction(key: MetadataQuery) extends WorkflowMetadataReadAction { + override def workflowId: WorkflowId = key.workflowId + } + final case class GetStatus(workflowId: WorkflowId) extends WorkflowMetadataReadAction + final case class GetLabels(workflowId: WorkflowId) extends WorkflowMetadataReadAction + final case class QueryForWorkflowsMatchingParameters(parameters: Seq[(String, String)]) extends MetadataReadAction + final case class WorkflowOutputs(workflowId: WorkflowId) extends WorkflowMetadataReadAction + final case class GetLogs(workflowId: WorkflowId) extends WorkflowMetadataReadAction case object RefreshSummary extends MetadataServiceAction trait ValidationCallback { def onMalformed(possibleWorkflowId: String): Unit @@ -112,6 +125,9 @@ object MetadataService { def reason: Throwable } + final case class MetadataLookupJsonResponse(query: MetadataQuery, result: Json) extends MetadataServiceResponse + final case class MetadataLookupFailed(query: MetadataQuery, reason: Throwable) + final case class MetadataLookupResponse(query: MetadataQuery, eventList: Seq[MetadataEvent]) extends MetadataServiceResponse final case class MetadataServiceKeyLookupFailed(query: MetadataQuery, reason: Throwable) extends MetadataServiceFailure diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala index 56c727504ba..fc5bec6b920 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala @@ -10,6 +10,7 @@ import cromwell.core.{LoadConfig, WorkflowId} import cromwell.services.MetadataServicesStore import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata.impl.MetadataSummaryRefreshActor.{MetadataSummaryFailure, MetadataSummarySuccess, SummarizeMetadata} +import cromwell.services.metadata.impl.builder.MetadataBuilderActor import cromwell.util.GracefulShutdownHelper import cromwell.util.GracefulShutdownHelper.ShutdownCommand import net.ceedubs.ficus.Ficus._ @@ -24,7 +25,7 @@ object MetadataServiceActor { def props(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) = Props(MetadataServiceActor(serviceConfig, globalConfig, serviceRegistryActor)).withDispatcher(ServiceDispatcher) } -final case class MetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) +case class MetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) extends Actor with ActorLogging with MetadataDatabaseAccess with MetadataServicesStore with GracefulShutdownHelper { private val decider: Decider = { @@ -35,7 +36,7 @@ final case class MetadataServiceActor(serviceConfig: Config, globalConfig: Confi override val supervisorStrategy = new OneForOneStrategy()(decider) { override def logFailure(context: ActorContext, child: ActorRef, cause: Throwable, decision: Directive) = { val childName = if (child == readActor) "Read" else "Write" - log.error(s"The $childName Metadata Actor died unexpectedly, metadata events might have been lost. Restarting it...", cause) + log.error(cause, s"The $childName Metadata Actor died unexpectedly, metadata events might have been lost. Restarting it...") } } @@ -49,7 +50,10 @@ final case class MetadataServiceActor(serviceConfig: Config, globalConfig: Confi private val metadataReadTimeout: Duration = serviceConfig.getOrElse[Duration]("metadata-read-query-timeout", Duration.Inf) - val readActor = context.actorOf(ReadMetadataActor.props(metadataReadTimeout), "read-metadata-actor") + def readMetadataWorkerActorProps(): Props = ReadDatabaseMetadataWorkerActor.props(metadataReadTimeout).withDispatcher(ServiceDispatcher) + def metadataBuilderActorProps(): Props = MetadataBuilderActor.props(readMetadataWorkerActorProps).withDispatcher(ServiceDispatcher) + + val readActor = context.actorOf(ReadMetadataRegulatorActor.props(metadataBuilderActorProps, readMetadataWorkerActorProps), "singleton-ReadMetadataRegulatorActor") val dbFlushRate = serviceConfig.getOrElse("db-flush-rate", 5.seconds) val dbBatchSize = serviceConfig.getOrElse("db-batch-size", 200) @@ -109,7 +113,7 @@ final case class MetadataServiceActor(serviceConfig: Config, globalConfig: Confi case listen: Listen => writeActor forward listen case v: ValidateWorkflowIdInMetadata => validateWorkflowIdInMetadata(v.possibleWorkflowId, sender()) case v: ValidateWorkflowIdInMetadataSummaries => validateWorkflowIdInMetadataSummaries(v.possibleWorkflowId, sender()) - case action: ReadAction => readActor forward action + case action: MetadataReadAction => readActor forward action case RefreshSummary => summaryActor foreach { _ ! SummarizeMetadata(metadataSummaryRefreshLimit, sender()) } case MetadataSummarySuccess => scheduleSummary() case MetadataSummaryFailure(t) => diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadDatabaseMetadataWorkerActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadDatabaseMetadataWorkerActor.scala new file mode 100644 index 00000000000..b07490af68d --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/ReadDatabaseMetadataWorkerActor.scala @@ -0,0 +1,104 @@ +package cromwell.services.metadata.impl + +import akka.actor.{Actor, ActorLogging, ActorRef, PoisonPill, Props} +import cromwell.core.Dispatcher.ServiceDispatcher +import cromwell.core.{WorkflowId, WorkflowSubmitted} +import cromwell.services.MetadataServicesStore +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.{MetadataQuery, WorkflowQueryParameters} + +import scala.concurrent.Future +import scala.concurrent.duration.Duration +import scala.util.Try + +object ReadDatabaseMetadataWorkerActor { + def props(metadataReadTimeout: Duration) = Props(new ReadDatabaseMetadataWorkerActor(metadataReadTimeout)).withDispatcher(ServiceDispatcher) +} + +class ReadDatabaseMetadataWorkerActor(metadataReadTimeout: Duration) extends Actor with ActorLogging with MetadataDatabaseAccess with MetadataServicesStore { + + implicit val ec = context.dispatcher + + def receive = { + case GetMetadataAction(query@MetadataQuery(_, _, _, _, _, _)) => evaluateRespondAndStop(sender(), getMetadata(query)) + case GetStatus(workflowId) => evaluateRespondAndStop(sender(), getStatus(workflowId)) + case GetLabels(workflowId) => evaluateRespondAndStop(sender(), queryLabelsAndRespond(workflowId)) + case GetLogs(workflowId) => evaluateRespondAndStop(sender(), queryLogsAndRespond(workflowId)) + case query: QueryForWorkflowsMatchingParameters => evaluateRespondAndStop(sender(), queryWorkflowsAndRespond(query.parameters)) + case WorkflowOutputs(id) => evaluateRespondAndStop(sender(), queryWorkflowOutputsAndRespond(id)) + } + + private def evaluateRespondAndStop(sndr: ActorRef, f: Future[Any]) = { + f map { result => + sndr ! result + } andThen { + case _ => self ! PoisonPill + } recover { + case t => log.error(t, s"Programmer Error! Unexpected error fall-through to 'evaluateRespondAndStop in ${getClass.getSimpleName}'") + } + () + } + + private def getMetadata(query: MetadataQuery): Future[MetadataServiceResponse] = { + + queryMetadataEvents(query, metadataReadTimeout) map { + m => MetadataLookupResponse(query, m) + } recover { + case t => MetadataServiceKeyLookupFailed(query, t) + } + } + + private def getStatus(id: WorkflowId): Future[MetadataServiceResponse] = { + + getWorkflowStatus(id) map { + case Some(s) => StatusLookupResponse(id, s) + // There's a workflow existence check at the API layer. If the request has made it this far in the system + // then the workflow exists but it must not have generated a status yet. + case None => StatusLookupResponse(id, WorkflowSubmitted) + } recover { + case t => StatusLookupFailed(id, t) + } + } + + private def queryLabelsAndRespond(id: WorkflowId): Future[MetadataServiceResponse] = { + + getWorkflowLabels(id) map { + ls => LabelLookupResponse(id, ls) + } recover { + case t => LabelLookupFailed(id, t) + } + } + + private def queryWorkflowsAndRespond(rawParameters: Seq[(String, String)]): Future[MetadataServiceResponse] = { + def queryWorkflows: Future[(WorkflowQueryResponse, Option[QueryMetadata])] = { + for { + // Future/Try to wrap the exception that might be thrown from WorkflowQueryParameters.apply. + parameters <- Future.fromTry(Try(WorkflowQueryParameters(rawParameters))) + response <- queryWorkflowSummaries(parameters) + } yield response + } + + queryWorkflows map { + case (response, metadata) => WorkflowQuerySuccess(response, metadata) + } recover { + case t => WorkflowQueryFailure(t) + } + } + + private def queryWorkflowOutputsAndRespond(id: WorkflowId): Future[MetadataServiceResponse] = { + queryWorkflowOutputs(id, metadataReadTimeout) map { + o => WorkflowOutputsResponse(id, o) + } recover { + case t => WorkflowOutputsFailure(id, t) + } + } + + private def queryLogsAndRespond(id: WorkflowId): Future[MetadataServiceResponse] = { + queryLogs(id, metadataReadTimeout) map { + s => LogsResponse(id, s) + } recover { + case t => LogsFailure(id, t) + } + } + +} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala deleted file mode 100644 index c755f321538..00000000000 --- a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala +++ /dev/null @@ -1,96 +0,0 @@ -package cromwell.services.metadata.impl - -import akka.actor.{Actor, ActorLogging, Props} -import cromwell.core.Dispatcher.ApiDispatcher -import cromwell.core.{WorkflowId, WorkflowSubmitted} -import cromwell.services.MetadataServicesStore -import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{CallMetadataKeys, MetadataQuery, WorkflowQueryParameters} - -import scala.concurrent.duration.Duration -import scala.concurrent.Future -import scala.util.{Failure, Success, Try} - -object ReadMetadataActor { - def props(metadataReadTimeout: Duration) = Props(new ReadMetadataActor(metadataReadTimeout)).withDispatcher(ApiDispatcher) -} - -class ReadMetadataActor(metadataReadTimeout: Duration) extends Actor with ActorLogging with MetadataDatabaseAccess with MetadataServicesStore { - - implicit val ec = context.dispatcher - - def receive = { - case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption, expandSubWorkflows) => - val includeKeys = if (expandSubWorkflows) { - includeKeysOption map { _ :+ CallMetadataKeys.SubWorkflowId } - } else includeKeysOption - queryAndRespond(MetadataQuery(workflowId, None, None, includeKeys, excludeKeysOption, expandSubWorkflows)) - case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _, _)) => queryAndRespond(query) - case GetStatus(workflowId) => queryStatusAndRespond(workflowId) - case GetLabels(workflowId) => queryLabelsAndRespond(workflowId) - case GetLogs(workflowId) => queryLogsAndRespond(workflowId) - case query: WorkflowQuery => queryWorkflowsAndRespond(query.parameters) - case WorkflowOutputs(id) => queryWorkflowOutputsAndRespond(id) - } - - private def queryAndRespond(query: MetadataQuery): Unit = { - val sndr = sender() - queryMetadataEvents(query, metadataReadTimeout) onComplete { - case Success(m) => sndr ! MetadataLookupResponse(query, m) - case Failure(t) => sndr ! MetadataServiceKeyLookupFailed(query, t) - } - } - - private def queryStatusAndRespond(id: WorkflowId): Unit = { - val sndr = sender() - getWorkflowStatus(id) onComplete { - case Success(Some(s)) => sndr ! StatusLookupResponse(id, s) - // There's a workflow existence check at the API layer. If the request has made it this far in the system - // then the workflow exists but it must not have generated a status yet. - case Success(None) => sndr ! StatusLookupResponse(id, WorkflowSubmitted) - case Failure(t) => sndr ! StatusLookupFailed(id, t) - } - } - - private def queryLabelsAndRespond(id: WorkflowId): Unit = { - val sndr = sender() - getWorkflowLabels(id) onComplete { - case Success(ls) => sndr ! LabelLookupResponse(id, ls) - case Failure(t) => sndr ! LabelLookupFailed(id, t) - } - } - - private def queryWorkflowsAndRespond(rawParameters: Seq[(String, String)]): Unit = { - def queryWorkflows: Future[(WorkflowQueryResponse, Option[QueryMetadata])] = { - for { - // Future/Try to wrap the exception that might be thrown from WorkflowQueryParameters.apply. - parameters <- Future.fromTry(Try(WorkflowQueryParameters(rawParameters))) - response <- queryWorkflowSummaries(parameters) - } yield response - } - - val sndr = sender() - - queryWorkflows onComplete { - case Success((response, metadata)) => sndr ! WorkflowQuerySuccess(response, metadata) - case Failure(t) => sndr ! WorkflowQueryFailure(t) - } - } - - private def queryWorkflowOutputsAndRespond(id: WorkflowId): Unit = { - val replyTo = sender() - queryWorkflowOutputs(id, metadataReadTimeout) onComplete { - case Success(o) => replyTo ! WorkflowOutputsResponse(id, o) - case Failure(t) => replyTo ! WorkflowOutputsFailure(id, t) - } - } - - private def queryLogsAndRespond(id: WorkflowId): Unit = { - val replyTo = sender() - queryLogs(id, metadataReadTimeout) onComplete { - case Success(s) => replyTo ! LogsResponse(id, s) - case Failure(t) => replyTo ! LogsFailure(id, t) - } - } - -} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataRegulatorActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataRegulatorActor.scala new file mode 100644 index 00000000000..998e04f1864 --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataRegulatorActor.scala @@ -0,0 +1,85 @@ +package cromwell.services.metadata.impl + +import java.util.UUID + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cromwell.core.Dispatcher.ApiDispatcher +import cromwell.services.metadata.MetadataService +import cromwell.services.metadata.MetadataService.{MetadataQueryResponse, MetadataReadAction, MetadataServiceAction, MetadataServiceResponse, WorkflowMetadataReadAction} +import cromwell.services.metadata.impl.ReadMetadataRegulatorActor.ReadMetadataWorkerMaker +import cromwell.services.metadata.impl.builder.MetadataBuilderActor +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.MetadataBuilderActorResponse + +import scala.collection.mutable + +class ReadMetadataRegulatorActor(metadataBuilderActorProps: ReadMetadataWorkerMaker, readMetadataWorkerProps: ReadMetadataWorkerMaker) extends Actor with ActorLogging { + // This actor tracks all requests coming in from the API service and spins up new builders as needed to service them. + // If the processing of an identical request is already in flight the requester will be added to a set of requesters + // to notify when the response from the first request becomes available. + + // Map from requests (MetadataServiceActions) to requesters. + val apiRequests = new mutable.HashMap[MetadataServiceAction, Set[ActorRef]]() + // Map from ActorRefs of MetadataBuilderActors to requests. When a response comes back from a MetadataBuilderActor its + // ActorRef is used as the lookup key in this Map. The result of that lookup yields the request which in turn is used + // as the lookup key for requesters in the above Map. + val builderRequests = new mutable.HashMap[ActorRef, MetadataServiceAction]() + + override def receive: Receive = { + // This indirection via 'MetadataReadAction' lets the compiler make sure we cover all cases in the sealed trait: + case action: MetadataReadAction => + action match { + case singleWorkflowAction: WorkflowMetadataReadAction => + val currentRequesters = apiRequests.getOrElse(singleWorkflowAction, Set.empty) + apiRequests.put(singleWorkflowAction, currentRequesters + sender()) + if (currentRequesters.isEmpty) { + + val builderActor = context.actorOf(metadataBuilderActorProps().withDispatcher(ApiDispatcher), MetadataBuilderActor.uniqueActorName(singleWorkflowAction.workflowId.toString)) + builderRequests.put(builderActor, singleWorkflowAction) + builderActor ! singleWorkflowAction + } + case crossWorkflowAction: MetadataService.QueryForWorkflowsMatchingParameters => + val currentRequesters = apiRequests.getOrElse(crossWorkflowAction, Set.empty) + apiRequests.put(crossWorkflowAction, currentRequesters + sender()) + if (currentRequesters.isEmpty) { + val readMetadataActor = context.actorOf(readMetadataWorkerProps.apply().withDispatcher(ApiDispatcher), s"MetadataQueryWorker-${UUID.randomUUID()}") + builderRequests.put(readMetadataActor, crossWorkflowAction) + readMetadataActor ! crossWorkflowAction + } + } + case serviceResponse: MetadataServiceResponse => + serviceResponse match { + case response: MetadataBuilderActorResponse => handleResponseFromMetadataWorker(response) + case response: MetadataQueryResponse => handleResponseFromMetadataWorker(response) + } + case other => log.error(s"Programmer Error: Unexpected message $other received from $sender") + } + + def handleResponseFromMetadataWorker(response: Any): Unit = { + val sndr = sender() + builderRequests.get(sndr) match { + case Some(action) => + apiRequests.get(action) match { + case Some(requesters) => + apiRequests.remove(action) + requesters foreach { _ ! response} + case None => + // unpossible: there had to have been a request that corresponded to this response + log.error(s"Programmer Error: MetadataBuilderRegulatorActor has no registered requesters found for action: $action") + } + builderRequests.remove(sndr) + () + case None => + // unpossible: this actor should know about all the child MetadataBuilderActors it has begotten + log.error(s"Programmer Error: MetadataBuilderRegulatorActor received a metadata response from an unrecognized sender $sndr") + } + } +} + +object ReadMetadataRegulatorActor { + + type ReadMetadataWorkerMaker = () => Props + + def props(metadataBuilderActorProps: ReadMetadataWorkerMaker, readMetadataWorkerProps: ReadMetadataWorkerMaker): Props = { + Props(new ReadMetadataRegulatorActor(metadataBuilderActorProps, readMetadataWorkerProps)) + } +} diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataBuilderActor.scala similarity index 63% rename from engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala rename to services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataBuilderActor.scala index f54c36630ee..80c6860a370 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataBuilderActor.scala @@ -1,18 +1,16 @@ -package cromwell.webservice.metadata +package cromwell.services.metadata.impl.builder import java.time.OffsetDateTime -import java.util.UUID +import java.util.concurrent.atomic.AtomicLong -import akka.actor.{ActorRef, LoggingFSM, Props} +import akka.actor.{ActorRef, LoggingFSM, PoisonPill, Props} import common.collections.EnhancedCollections._ -import cromwell.webservice.metadata.MetadataComponent._ -import cromwell.core.Dispatcher.ApiDispatcher +import cromwell.services.metadata.impl.builder.MetadataComponent._ import cromwell.core.ExecutionIndex.ExecutionIndex import cromwell.core._ -import cromwell.services.ServiceRegistryActor.ServiceRegistryFailure import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ -import cromwell.webservice.metadata.MetadataBuilderActor._ +import cromwell.services.metadata.impl.builder.MetadataBuilderActor._ import mouse.all._ import org.slf4j.LoggerFactory import spray.json._ @@ -21,21 +19,26 @@ import scala.language.postfixOps object MetadataBuilderActor { - sealed abstract class MetadataBuilderActorResponse - case class BuiltMetadataResponse(response: JsObject) extends MetadataBuilderActorResponse - case class FailedMetadataResponse(reason: Throwable) extends MetadataBuilderActorResponse + sealed trait MetadataBuilderActorResponse extends MetadataServiceResponse { def originalRequest: MetadataReadAction } + final case class BuiltMetadataResponse(originalRequest: MetadataReadAction, responseJson: JsObject) extends MetadataBuilderActorResponse + final case class FailedMetadataResponse(originalRequest: MetadataReadAction, reason: Throwable) extends MetadataBuilderActorResponse sealed trait MetadataBuilderActorState case object Idle extends MetadataBuilderActorState case object WaitingForMetadataService extends MetadataBuilderActorState case object WaitingForSubWorkflows extends MetadataBuilderActorState - case class MetadataBuilderActorData( - originalQuery: MetadataQuery, - originalEvents: Seq[MetadataEvent], - subWorkflowsMetadata: Map[String, JsValue], - waitFor: Int - ) { + sealed trait MetadataBuilderActorData + + case object IdleData extends MetadataBuilderActorData + final case class HasWorkData(target: ActorRef, + originalRequest: MetadataReadAction) extends MetadataBuilderActorData + final case class HasReceivedEventsData(target: ActorRef, + originalRequest: MetadataReadAction, + originalQuery: MetadataQuery, + originalEvents: Seq[MetadataEvent], + subWorkflowsMetadata: Map[String, JsValue], + waitFor: Int) extends MetadataBuilderActorData { def withSubWorkflow(id: String, metadata: JsValue) = { this.copy(subWorkflowsMetadata = subWorkflowsMetadata + ((id, metadata))) } @@ -43,8 +46,8 @@ object MetadataBuilderActor { def isComplete = subWorkflowsMetadata.size == waitFor } - def props(serviceRegistryActor: ActorRef) = { - Props(new MetadataBuilderActor(serviceRegistryActor)).withDispatcher(ApiDispatcher) + def props(readMetadataWorkerMaker: () => Props) = { + Props(new MetadataBuilderActor(readMetadataWorkerMaker)) } val log = LoggerFactory.getLogger("MetadataBuilder") @@ -137,7 +140,9 @@ object MetadataBuilderActor { JsObject(events.groupBy(_.key.workflowId.toString) safeMapValues parseWorkflowEvents(includeCallsIfEmpty = true, expandedValues)) } - def uniqueActorName: String = List("MetadataBuilderActor", UUID.randomUUID()).mkString("-") + val actorIdIterator = new AtomicLong(0) + + def uniqueActorName(workflowId: String): String = s"${getClass.getSimpleName}.${actorIdIterator.getAndIncrement()}-for-$workflowId" case class JobKeyAndGrouping(jobKey: MetadataJobKey, grouping: String) @@ -194,100 +199,149 @@ object MetadataBuilderActor { val tupledGrouper = (makeSyntheticGroupedExecutionEvents _).tupled nonExecutionEvents ++ ungroupedExecutionEvents.values.toList.flatten ++ (groupedExecutionEventsByGrouping.toList flatMap tupledGrouper) } + + + + def processMetadataEvents(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { + // Should we send back some message ? Or even fail the request instead ? + if (eventsList.isEmpty) JsObject(Map.empty[String, JsValue]) + else { + query match { + case MetadataQuery(w, _, _, _, _, _) => workflowMetadataResponse(w, eventsList, includeCallsIfEmpty = true, expandedValues) + case _ => MetadataBuilderActor.parse(eventsList, expandedValues) + } + } + } + + def processStatusResponse(workflowId: WorkflowId, status: WorkflowState): JsObject = { + JsObject(Map( + WorkflowMetadataKeys.Status -> JsString(status.toString), + WorkflowMetadataKeys.Id -> JsString(workflowId.toString) + )) + } + + def processLabelsResponse(workflowId: WorkflowId, labels: Map[String, String]): JsObject = { + val jsLabels = labels map { case (k, v) => k -> JsString(v) } + JsObject(Map( + WorkflowMetadataKeys.Id -> JsString(workflowId.toString), + WorkflowMetadataKeys.Labels -> JsObject(jsLabels) + )) + } + + def processOutputsResponse(id: WorkflowId, events: Seq[MetadataEvent]): JsObject = { + // Add in an empty output event if there aren't already any output events. + val hasOutputs = events exists { _.key.key.startsWith(WorkflowMetadataKeys.Outputs + ":") } + val updatedEvents = if (hasOutputs) events else MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)) +: events + + workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false, Map.empty) + } + + def workflowMetadataResponse(workflowId: WorkflowId, + eventsList: Seq[MetadataEvent], + includeCallsIfEmpty: Boolean, + expandedValues: Map[String, JsValue]): JsObject = { + JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty, expandedValues)(eventsList).fields + ("id" -> JsString(workflowId.toString))) + } } -class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Option[MetadataBuilderActorData]] - with DefaultJsonProtocol { - import MetadataBuilderActor._ +class MetadataBuilderActor(readMetadataWorkerMaker: () => Props) + extends LoggingFSM[MetadataBuilderActorState, MetadataBuilderActorData] with DefaultJsonProtocol { - private var target: ActorRef = ActorRef.noSender + import MetadataBuilderActor._ - startWith(Idle, None) + startWith(Idle, IdleData) val tag = self.path.name when(Idle) { - case Event(action: MetadataServiceAction, _) => - target = sender() - serviceRegistryActor ! action - goto(WaitingForMetadataService) + case Event(action: MetadataReadAction, IdleData) => + + val readActor = context.actorOf(readMetadataWorkerMaker.apply()) + + readActor ! action + goto(WaitingForMetadataService) using HasWorkData(sender(), action) } - private def allDone = { + private def allDone() = { context stop self stay() } when(WaitingForMetadataService) { - case Event(StatusLookupResponse(w, status), _) => - target ! BuiltMetadataResponse(processStatusResponse(w, status)) - allDone - case Event(LabelLookupResponse(w, labels), _) => - target ! BuiltMetadataResponse(processLabelsResponse(w, labels)) - allDone - case Event(WorkflowOutputsResponse(id, events), _) => - // Add in an empty output event if there aren't already any output events. - val hasOutputs = events exists { _.key.key.startsWith(WorkflowMetadataKeys.Outputs + ":") } - val updatedEvents = if (hasOutputs) events else MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)) +: events - target ! BuiltMetadataResponse(workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false, Map.empty)) - allDone - case Event(LogsResponse(w, l), _) => - target ! BuiltMetadataResponse(workflowMetadataResponse(w, l, includeCallsIfEmpty = false, Map.empty)) - allDone - case Event(MetadataLookupResponse(query, metadata), None) => processMetadataResponse(query, metadata) - case Event(_: ServiceRegistryFailure, _) => - target ! FailedMetadataResponse(new RuntimeException("Can't find metadata service")) - allDone - case Event(failure: MetadataServiceFailure, _) => - target ! FailedMetadataResponse(failure.reason) - allDone - case Event(unexpectedMessage, stateData) => - target ! FailedMetadataResponse(new RuntimeException(s"MetadataBuilderActor $tag(WaitingForMetadataService, $stateData) got an unexpected message: $unexpectedMessage")) - context stop self - stay() + case Event(StatusLookupResponse(w, status), HasWorkData(target, originalRequest)) => + target ! BuiltMetadataResponse(originalRequest, processStatusResponse(w, status)) + allDone() + case Event(LabelLookupResponse(w, labels), HasWorkData(target, originalRequest)) => + target ! BuiltMetadataResponse(originalRequest, processLabelsResponse(w, labels)) + allDone() + case Event(WorkflowOutputsResponse(id, events), HasWorkData(target, originalRequest)) => + target ! BuiltMetadataResponse(originalRequest, processOutputsResponse(id, events)) + allDone() + case Event(LogsResponse(w, l), HasWorkData(target, originalRequest)) => + target ! BuiltMetadataResponse(originalRequest, workflowMetadataResponse(w, l, includeCallsIfEmpty = false, Map.empty)) + allDone() + case Event(MetadataLookupResponse(query, metadata), HasWorkData(target, originalRequest)) => + processMetadataResponse(query, metadata, target, originalRequest) + case Event(failure: MetadataServiceFailure, HasWorkData(target, originalRequest)) => + target ! FailedMetadataResponse(originalRequest, failure.reason) + allDone() } when(WaitingForSubWorkflows) { - case Event(mbr: MetadataBuilderActorResponse, Some(data)) => + case Event(mbr: MetadataBuilderActorResponse, data: HasReceivedEventsData) => processSubWorkflowMetadata(mbr, data) + case Event(failure: MetadataServiceFailure, data: HasReceivedEventsData) => + data.target ! FailedMetadataResponse(data.originalRequest, failure.reason) + allDone() } whenUnhandled { - case Event(message, data) => - log.error(s"Received unexpected message $message in state $stateName with data $data") + case Event(message, IdleData) => + log.error(s"Received unexpected message $message in state $stateName with $IdleData") stay() + case Event(message, HasWorkData(target, _)) => + log.error(s"Received unexpected message $message in state $stateName with target: $target") + self ! PoisonPill + stay + case Event(message, MetadataBuilderActor.HasReceivedEventsData(target, _, _, _, _, _)) => + log.error(s"Received unexpected message $message in state $stateName with target: $target") + self ! PoisonPill + stay } - def processSubWorkflowMetadata(metadataResponse: MetadataBuilderActorResponse, data: MetadataBuilderActorData) = { + def processSubWorkflowMetadata(metadataResponse: MetadataBuilderActorResponse, data: HasReceivedEventsData) = { metadataResponse match { - case BuiltMetadataResponse(js) => - js.fields.get(WorkflowMetadataKeys.Id) match { - case Some(subId: JsString) => - val newData = data.withSubWorkflow(subId.value, js) - - if (newData.isComplete) { - buildAndStop(data.originalQuery, data.originalEvents, newData.subWorkflowsMetadata) - } else { - stay() using Option(newData) - } - case _ => failAndDie(new RuntimeException("Received unexpected response while waiting for sub workflow metadata.")) + case BuiltMetadataResponse(GetMetadataAction(queryKey), js) => + val subId: WorkflowId = queryKey.workflowId + val newData = data.withSubWorkflow(subId.toString, js) + + if (newData.isComplete) { + buildAndStop(data.originalQuery, data.originalEvents, newData.subWorkflowsMetadata, data.target, data.originalRequest) + } else { + stay() using newData } - case FailedMetadataResponse(e) => failAndDie(new RuntimeException("Failed to retrieve metadata for a sub workflow.", e)) + case FailedMetadataResponse(originalRequest, e) => + failAndDie(new RuntimeException(s"Failed to retrieve metadata for a sub workflow ($originalRequest)", e), data.target, data.originalRequest) + + case other => + val message = s"Programmer Error: MetadataBuilderActor expected subworkflow metadata response type but got ${other.getClass.getSimpleName}" + log.error(message) + failAndDie(new Exception(message), data.target, data.originalRequest) } } - def failAndDie(reason: Throwable) = { - target ! FailedMetadataResponse(reason) + def failAndDie(reason: Throwable, target: ActorRef, originalRequest: MetadataReadAction) = { + target ! FailedMetadataResponse(originalRequest, reason) context stop self stay() } - def buildAndStop(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]) = { + def buildAndStop(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue], target: ActorRef, originalRequest: MetadataReadAction) = { val groupedEvents = groupEvents(eventsList) - target ! BuiltMetadataResponse(processMetadataEvents(query, groupedEvents, expandedValues)) - allDone + target ! BuiltMetadataResponse(originalRequest, processMetadataEvents(query, groupedEvents, expandedValues)) + allDone() } - def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]) = { + def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent], target: ActorRef, originalRequest: MetadataReadAction) = { if (query.expandSubWorkflows) { // Scan events for sub workflow ids val subWorkflowIds = eventsList.collect({ @@ -295,50 +349,17 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me }).flatten.distinct // If none is found just proceed to build metadata - if (subWorkflowIds.isEmpty) buildAndStop(query, eventsList, Map.empty) + if (subWorkflowIds.isEmpty) buildAndStop(query, eventsList, Map.empty, target, originalRequest) else { // Otherwise spin up a metadata builder actor for each sub workflow subWorkflowIds foreach { subId => - val subMetadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), uniqueActorName) - subMetadataBuilder ! GetMetadataQueryAction(query.copy(workflowId = WorkflowId.fromString(subId))) + val subMetadataBuilder = context.actorOf(MetadataBuilderActor.props(readMetadataWorkerMaker), uniqueActorName(subId)) + subMetadataBuilder ! GetMetadataAction(query.copy(workflowId = WorkflowId.fromString(subId))) } - goto(WaitingForSubWorkflows) using Option(MetadataBuilderActorData(query, eventsList, Map.empty, subWorkflowIds.size)) + goto(WaitingForSubWorkflows) using HasReceivedEventsData(target, originalRequest, query, eventsList, Map.empty, subWorkflowIds.size) } } else { - buildAndStop(query, eventsList, Map.empty) - } - } - - def processMetadataEvents(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { - // Should we send back some message ? Or even fail the request instead ? - if (eventsList.isEmpty) JsObject(Map.empty[String, JsValue]) - else { - query match { - case MetadataQuery(w, _, _, _, _, _) => workflowMetadataResponse(w, eventsList, includeCallsIfEmpty = true, expandedValues) - case _ => MetadataBuilderActor.parse(eventsList, expandedValues) - } + buildAndStop(query, eventsList, Map.empty, target, originalRequest) } } - - def processStatusResponse(workflowId: WorkflowId, status: WorkflowState): JsObject = { - JsObject(Map( - WorkflowMetadataKeys.Status -> JsString(status.toString), - WorkflowMetadataKeys.Id -> JsString(workflowId.toString) - )) - } - - def processLabelsResponse(workflowId: WorkflowId, labels: Map[String, String]): JsObject = { - val jsLabels = labels map { case (k, v) => k -> JsString(v) } - JsObject(Map( - WorkflowMetadataKeys.Id -> JsString(workflowId.toString), - WorkflowMetadataKeys.Labels -> JsObject(jsLabels) - )) - } - - private def workflowMetadataResponse(workflowId: WorkflowId, - eventsList: Seq[MetadataEvent], - includeCallsIfEmpty: Boolean, - expandedValues: Map[String, JsValue]): JsObject = { - JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty, expandedValues)(eventsList).fields + ("id" -> JsString(workflowId.toString))) - } } diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala b/services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataComponent.scala similarity index 99% rename from engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala rename to services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataComponent.scala index d0173a50fe3..63241d8c6d9 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/builder/MetadataComponent.scala @@ -1,4 +1,4 @@ -package cromwell.webservice.metadata +package cromwell.services.metadata.impl.builder import cats.instances.list._ import cats.instances.map._ diff --git a/services/src/test/scala/cromwell/services/database/ConnectionMetadata.scala b/services/src/test/scala/cromwell/services/database/ConnectionMetadata.scala new file mode 100644 index 00000000000..0ba8478ad4a --- /dev/null +++ b/services/src/test/scala/cromwell/services/database/ConnectionMetadata.scala @@ -0,0 +1,18 @@ +package cromwell.services.database + +/** + * Metadata from the JDBC connection and driver. + */ +case class ConnectionMetadata +( + databaseProductName: String, + databaseProductVersion: String, + databaseMajorVersion: Int, + databaseMinorVersion: Int, + driverName: String, + driverVersion: String, + driverMajorVersion: Int, + driverMinorVersion: Int, + jdbcMajorVersion: Int, + jdbcMinorVersion: Int, +) diff --git a/services/src/test/scala/cromwell/services/database/CromwellDatabaseType.scala b/services/src/test/scala/cromwell/services/database/CromwellDatabaseType.scala index cadc329480c..8a8ede69e19 100644 --- a/services/src/test/scala/cromwell/services/database/CromwellDatabaseType.scala +++ b/services/src/test/scala/cromwell/services/database/CromwellDatabaseType.scala @@ -12,7 +12,7 @@ sealed trait CromwellDatabaseType[T <: SlickDatabase] { val name: String val liquibaseSettings: LiquibaseSettings - def newDatabase(config: Config): T + def newDatabase(config: Config): T with TestSlickDatabase override def toString: String = name } @@ -28,12 +28,16 @@ object EngineDatabaseType extends CromwellDatabaseType[EngineSlickDatabase] { override val name: String = "Engine" override val liquibaseSettings: LiquibaseSettings = EngineServicesStore.EngineLiquibaseSettings - override def newDatabase(config: Config): EngineSlickDatabase = new EngineSlickDatabase(config) + override def newDatabase(config: Config): EngineSlickDatabase with TestSlickDatabase = { + new EngineSlickDatabase(config) with TestSlickDatabase + } } object MetadataDatabaseType extends CromwellDatabaseType[MetadataSlickDatabase] { override val name: String = "Metadata" override val liquibaseSettings: LiquibaseSettings = MetadataServicesStore.MetadataLiquibaseSettings - override def newDatabase(config: Config): MetadataSlickDatabase = new MetadataSlickDatabase(config) + override def newDatabase(config: Config): MetadataSlickDatabase with TestSlickDatabase = { + new MetadataSlickDatabase(config) with TestSlickDatabase + } } diff --git a/services/src/test/scala/cromwell/services/database/DatabasePlatform.scala b/services/src/test/scala/cromwell/services/database/DatabasePlatform.scala new file mode 100644 index 00000000000..db4ecd671b5 --- /dev/null +++ b/services/src/test/scala/cromwell/services/database/DatabasePlatform.scala @@ -0,0 +1,26 @@ +package cromwell.services.database + +/** + * Cromwell supported DBMS platforms. + */ +sealed trait DatabasePlatform { + def name: String + + override def toString: String = name +} + +case object HsqldbDatabasePlatform extends DatabasePlatform { + override val name: String = "HSQLDB" +} + +case object MariadbDatabasePlatform extends DatabasePlatform { + override val name: String = "MariaDB" +} + +case object MysqlDatabasePlatform extends DatabasePlatform { + override val name: String = "MySQL" +} + +case object PostgresqlDatabasePlatform extends DatabasePlatform { + override val name: String = "PostgreSQL" +} diff --git a/services/src/test/scala/cromwell/services/database/DatabaseSystem.scala b/services/src/test/scala/cromwell/services/database/DatabaseSystem.scala index 5d04236b879..453aeaa344c 100644 --- a/services/src/test/scala/cromwell/services/database/DatabaseSystem.scala +++ b/services/src/test/scala/cromwell/services/database/DatabaseSystem.scala @@ -1,56 +1,61 @@ package cromwell.services.database /** - * Cromwell supported DBMS. + * Cromwell unit tested DBMS. Each DBMS must match a database spun up in test.inc.sh. */ // Someday https://github.com/lloydmeta/enumeratum, someday... sealed trait DatabaseSystem { - val productName: String - val shortName: String - val configPath: String + val name: String + val platform: DatabasePlatform - override def toString: String = productName + override def toString: String = name } object DatabaseSystem { - def apply(productName: String): DatabaseSystem = { - productName match { - case MysqlDatabaseSystem.productName => MysqlDatabaseSystem - case HsqldbDatabaseSystem.productName => HsqldbDatabaseSystem - case PostgresqlDatabaseSystem.productName => PostgresqlDatabaseSystem - case MariadbDatabaseSystem.productName => MariadbDatabaseSystem - case _ => throw new UnsupportedOperationException(s"Unknown database system: $productName") - } - } - val All: Seq[DatabaseSystem] = List( HsqldbDatabaseSystem, - MariadbDatabaseSystem, - MysqlDatabaseSystem, - PostgresqlDatabaseSystem, + MariadbEarliestDatabaseSystem, + MariadbLatestDatabaseSystem, + MysqlEarliestDatabaseSystem, + MysqlLatestDatabaseSystem, + PostgresqlEarliestDatabaseSystem, + PostgresqlLatestDatabaseSystem, ) } case object HsqldbDatabaseSystem extends DatabaseSystem { - override val productName: String = "HSQL Database Engine" - override val shortName: String = "HSQLDB" - override val configPath: String = "database" + override val name: String = "HSQLDB" + override val platform: HsqldbDatabasePlatform.type = HsqldbDatabasePlatform +} + +sealed trait NetworkDatabaseSystem extends DatabaseSystem + +case object MariadbEarliestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "MariaDB" + override val platform: MariadbDatabasePlatform.type = MariadbDatabasePlatform +} + +case object MariadbLatestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "MariaDB (latest)" + override val platform: MariadbDatabasePlatform.type = MariadbDatabasePlatform +} + +case object MysqlEarliestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "MySQL" + override val platform: MysqlDatabasePlatform.type = MysqlDatabasePlatform } -case object MariadbDatabaseSystem extends DatabaseSystem { - override val productName: String = "MariaDB" - override val shortName = productName - override val configPath: String = "database-test-mariadb" +case object MysqlLatestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "MySQL (latest)" + override val platform: MysqlDatabasePlatform.type = MysqlDatabasePlatform } -case object MysqlDatabaseSystem extends DatabaseSystem { - override val productName: String = "MySQL" - override val shortName = productName - override val configPath: String = "database-test-mysql" +case object PostgresqlEarliestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "PostgreSQL" + override val platform: PostgresqlDatabasePlatform.type = PostgresqlDatabasePlatform } -case object PostgresqlDatabaseSystem extends DatabaseSystem { - override val productName: String = "PostgreSQL" - override val shortName = productName - override val configPath: String = "database-test-postgresql" +case object PostgresqlLatestDatabaseSystem extends NetworkDatabaseSystem { + override val name: String = "PostgreSQL (latest)" + override val platform: PostgresqlDatabasePlatform.type = PostgresqlDatabasePlatform } diff --git a/services/src/test/scala/cromwell/services/database/DatabaseTestKit.scala b/services/src/test/scala/cromwell/services/database/DatabaseTestKit.scala index ae3f41828a8..8c08bae25d8 100644 --- a/services/src/test/scala/cromwell/services/database/DatabaseTestKit.scala +++ b/services/src/test/scala/cromwell/services/database/DatabaseTestKit.scala @@ -1,22 +1,26 @@ package cromwell.services.database +import java.net.URLEncoder import java.sql.Connection import better.files._ import com.typesafe.config.{Config, ConfigFactory} +import com.typesafe.scalalogging.StrictLogging import cromwell.database.migration.liquibase.LiquibaseUtils import cromwell.database.slick.SlickDatabase +import cromwell.services.ServicesStore.EnhancedSqlDatabase import cromwell.services.{EngineServicesStore, MetadataServicesStore} import liquibase.snapshot.DatabaseSnapshot import liquibase.structure.core.Index import slick.jdbc.JdbcProfile import slick.jdbc.meta.{MIndexInfo, MPrimaryKey} -import cromwell.services.ServicesStore.EnhancedSqlDatabase import scala.concurrent.Await import scala.concurrent.duration.Duration -object DatabaseTestKit { +object DatabaseTestKit extends StrictLogging { + + private lazy val hsqldbDatabaseConfig = ConfigFactory.load().getConfig("database") /** * Lends a connection to a block of code. @@ -65,13 +69,7 @@ object DatabaseTestKit { * Creates a new in memory HSQLDB that should be closed after use. */ def inMemoryDatabase[A <: SlickDatabase](databaseType: CromwellDatabaseType[A], schemaManager: SchemaManager): A = { - val databaseConfig = ConfigFactory.parseString( - s"""|db.url = "jdbc:hsqldb:mem:$${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" - |db.driver = "org.hsqldb.jdbcDriver" - |db.connectionTimeout = 3000 - |profile = "slick.jdbc.HsqldbProfile$$" - |liquibase.updateSchema = false - |""".stripMargin) + val databaseConfig = ConfigFactory.parseString("liquibase.updateSchema = false") withFallback hsqldbDatabaseConfig val database = databaseType.newDatabase(databaseConfig) schemaManager match { case SlickSchemaManager => SlickDatabase.createSchema(database) @@ -84,7 +82,7 @@ object DatabaseTestKit { * Opens an initialized database. */ def initializedDatabaseFromConfig[A <: SlickDatabase](databaseType: CromwellDatabaseType[A], - databaseConfig: Config): A = { + databaseConfig: Config): A with TestSlickDatabase = { val database = databaseType.newDatabase(databaseConfig) database.initialized(databaseType.liquibaseSettings) } @@ -93,11 +91,173 @@ object DatabaseTestKit { * Opens an initialized database. */ def initializedDatabaseFromSystem[A <: SlickDatabase](databaseType: CromwellDatabaseType[A], - databaseSystem: DatabaseSystem): A = { - val databaseConfig = ConfigFactory.load.getConfig(databaseSystem.configPath) + databaseSystem: DatabaseSystem): A with TestSlickDatabase = { + val databaseConfig = getConfig(databaseSystem) initializedDatabaseFromConfig(databaseType, databaseConfig) } + /** + * Opens a database connection without any liquibase being performed. + */ + def schemalessDatabaseFromSystem(databaseSystem: DatabaseSystem): SchemalessSlickDatabase with TestSlickDatabase = { + val databaseConfig = getConfig(databaseSystem) + new SchemalessSlickDatabase(databaseConfig) with TestSlickDatabase + } + + private var configCache: Map[NetworkDatabaseSystem, Config] = Map.empty + private val configCacheMutex = new Object + + /** + * Returns a config for a DatabaseSystem. + */ + private def getConfig(databaseSystem: DatabaseSystem): Config = { + databaseSystem match { + case HsqldbDatabaseSystem => hsqldbDatabaseConfig + case networkDatabaseSystem: NetworkDatabaseSystem => + configCacheMutex synchronized { + configCache.get(networkDatabaseSystem) match { + case Some(config) => config + case None => + val config = getConfig(networkDatabaseSystem) + configCache += networkDatabaseSystem -> config + config + } + } + } + } + + private case class DatabaseSystemSettings(environmentKey: String, defaultPort: Int, dockerTag: String) + + /** + * Returns the network settings for a database system. + */ + private def getDatabaseSystemSettings(networkDatabaseSystem: NetworkDatabaseSystem): DatabaseSystemSettings = { + networkDatabaseSystem match { + // The below list of docker tags should be synced with the tags under "BUILD_TYPE=dbms" in .travis.yml + case MariadbEarliestDatabaseSystem => DatabaseSystemSettings("MARIADB", 23306, "5.5") + case MariadbLatestDatabaseSystem => DatabaseSystemSettings("MARIADB_LATEST", 33306, "latest") + case MysqlEarliestDatabaseSystem => DatabaseSystemSettings("MYSQL", 3306, "5.6") + case MysqlLatestDatabaseSystem => DatabaseSystemSettings("MYSQL_LATEST", 13306, "latest") + case PostgresqlEarliestDatabaseSystem => DatabaseSystemSettings("POSTGRESQL", 5432, "9.6") + case PostgresqlLatestDatabaseSystem => DatabaseSystemSettings("POSTGRESQL_LATEST", 15432, "latest") + // The list above of docker tags should be synced with the tags under "BUILD_TYPE=dbms" in .travis.yml + } + } + + /** + * Returns a config for a NetworkDatabaseSystem. + */ + private def getConfig(networkDatabaseSystem: NetworkDatabaseSystem): Config = { + val databaseSystemSettings = getDatabaseSystemSettings(networkDatabaseSystem) + val systemName = networkDatabaseSystem.name + val environmentKey = databaseSystemSettings.environmentKey + val jdbcPortDefault = databaseSystemSettings.defaultPort + val dockerTag = databaseSystemSettings.dockerTag + + val jdbcUsername = "cromwell" + val jdbcPassword = "test" + val jdbcSchema = "cromwell_test" + val jdbcHostname: String = sys.env.getOrElse(s"CROMWELL_BUILD_${environmentKey}_HOSTNAME", "localhost") + val jdbcPort = sys.env.get(s"CROMWELL_BUILD_${environmentKey}_PORT").map(_.toInt).getOrElse(jdbcPortDefault) + + def makeJdbcUrl(dbms: String, queryParams: Map[String, String]): String = { + s"jdbc:$dbms://$jdbcHostname:$jdbcPort/$jdbcSchema?" + + queryParams.map({ case (name, value) => queryEncode(name) + "=" + queryEncode(value) }).mkString("&") + } + + val (dockerHelp, resetHelp, slickProfile, jdbcDriver, jdbcUrl) = networkDatabaseSystem.platform match { + case HsqldbDatabasePlatform => throw new UnsupportedOperationException + case MariadbDatabasePlatform => ( + s"""|docker run \\ + | --detach --name cromwell_database_$jdbcPort \\ + | --env MYSQL_ROOT_PASSWORD=private \\ + | --env MYSQL_USER=$jdbcUsername \\ + | --env MYSQL_PASSWORD=$jdbcPassword \\ + | --env MYSQL_DATABASE=$jdbcSchema \\ + | --publish $jdbcPort:3306 \\ + | --volume $${PWD}/src/ci/docker-compose/mariadb-conf.d:/etc/mysql/conf.d \\ + | mariadb:$dockerTag + |""".stripMargin.trim, + s"""|mysql \\ + | --protocol=tcp --host=$jdbcHostname --port=$jdbcPort \\ + | --user=$jdbcUsername --password=$jdbcPassword \\ + | --execute='DROP DATABASE IF EXISTS $jdbcSchema; CREATE DATABASE $jdbcSchema;' + |""".stripMargin.trim, + "slick.jdbc.MySQLProfile$", + "org.mariadb.jdbc.Driver", + makeJdbcUrl("mysql", Map("rewriteBatchedStatements" -> "true")), + ) + case MysqlDatabasePlatform => ( + s"""|docker run \\ + | --detach --name cromwell_database_$jdbcPort \\ + | --env MYSQL_ROOT_PASSWORD=private \\ + | --env MYSQL_USER=$jdbcUsername \\ + | --env MYSQL_PASSWORD=$jdbcPassword \\ + | --env MYSQL_DATABASE=$jdbcSchema \\ + | --publish $jdbcPort:3306 \\ + | --volume $${PWD}/src/ci/docker-compose/mysql-conf.d:/etc/mysql/conf.d \\ + | mysql:$dockerTag + |""".stripMargin.trim, + s"""|mysql \\ + | --protocol=tcp --host=$jdbcHostname --port=$jdbcPort \\ + | --user=$jdbcUsername --password=$jdbcPassword \\ + | --execute='DROP DATABASE IF EXISTS $jdbcSchema; CREATE DATABASE $jdbcSchema;' + |""".stripMargin.trim, + "slick.jdbc.MySQLProfile$", + "com.mysql.cj.jdbc.Driver", + makeJdbcUrl("mysql", Map( + "rewriteBatchedStatements" -> "true", + "useSSL" -> "false", + "allowPublicKeyRetrieval" -> "true", + "serverTimezone" -> "UTC", + "useInformationSchema" -> "true", + )), + ) + case PostgresqlDatabasePlatform => ( + s"""|docker run \\ + | --detach --name cromwell_database_$jdbcPort \\ + | --env POSTGRES_USER=$jdbcUsername \\ + | --env POSTGRES_PASSWORD=$jdbcPassword \\ + | --env POSTGRES_DB=$jdbcSchema \\ + | --publish $jdbcPort:5432 \\ + | --volume $${PWD}/src/ci/docker-compose/postgresql-initdb.d:/docker-entrypoint-initdb.d \\ + | postgres:$dockerTag + |""".stripMargin.trim, + s"""|PGPASSWORD=$jdbcPassword psql \\ + | --host=localhost --port=5432 --username=$jdbcUsername \\ + | postgres <<< 'drop database if exists $jdbcSchema; create database $jdbcSchema;' + |""".stripMargin.trim, + "slick.jdbc.PostgresProfile$", + "org.postgresql.Driver", + makeJdbcUrl("postgresql", Map("reWriteBatchedInserts" -> "true")), + ) + } + + logger.info( + s"""|Run an example $systemName via docker using: + |$dockerHelp""".stripMargin) + logger.info( + s"""|The schema will be initialized when the docker container starts. If needed reset the schema using: + |$resetHelp""".stripMargin) + + ConfigFactory.parseString( + s"""|profile = "$slickProfile" + |db { + | driver = "$jdbcDriver" + | url = "$jdbcUrl" + | user = "$jdbcUsername" + | password = "$jdbcPassword" + | connectionTimeout = 5000 + |} + |""".stripMargin + ) + } + + /** + * Encode strings for URLs. + */ + private def queryEncode(string: String): String = URLEncoder.encode(string, "UTF-8") + /** * Run liquibase on a open database. */ @@ -116,6 +276,28 @@ object DatabaseTestKit { withConnection(database.dataAccess.driver, database.database)(LiquibaseUtils.getSnapshot) } + /** + * Returns the database connection metadata for an open Slick database. + */ + def connectionMetadata(database: SlickDatabase): ConnectionMetadata = { + withConnection(database.dataAccess.driver, database.database) { + connection => + val metadata = connection.getMetaData + ConnectionMetadata( + databaseProductName = metadata.getDatabaseProductName, + databaseProductVersion = metadata.getDatabaseProductVersion, + databaseMajorVersion = metadata.getDatabaseMajorVersion, + databaseMinorVersion = metadata.getDatabaseMinorVersion, + driverName = metadata.getDriverName, + driverVersion = metadata.getDriverVersion, + driverMajorVersion = metadata.getDriverMajorVersion, + driverMinorVersion = metadata.getDriverMinorVersion, + jdbcMajorVersion = metadata.getJDBCMajorVersion, + jdbcMinorVersion = metadata.getJDBCMinorVersion, + ) + } + } + /** * Returns a Liquibase snapshot of an in memory HSQLDB. */ diff --git a/services/src/test/scala/cromwell/services/database/LiquibaseComparisonSpec.scala b/services/src/test/scala/cromwell/services/database/LiquibaseComparisonSpec.scala index 1fa7cbff0ce..4b64d66171c 100644 --- a/services/src/test/scala/cromwell/services/database/LiquibaseComparisonSpec.scala +++ b/services/src/test/scala/cromwell/services/database/LiquibaseComparisonSpec.scala @@ -36,9 +36,10 @@ class LiquibaseComparisonSpec extends FlatSpec with Matchers with ScalaFutures { DatabaseSystem.All foreach { databaseSystem => - behavior of s"Liquibase Comparison for ${databaseType.name} ${databaseSystem.shortName}" + behavior of s"Liquibase Comparison for ${databaseType.name} ${databaseSystem.name}" lazy val liquibasedDatabase = DatabaseTestKit.initializedDatabaseFromSystem(databaseType, databaseSystem) + lazy val connectionMetadata = DatabaseTestKit.connectionMetadata(liquibasedDatabase) lazy val actualSnapshot = DatabaseTestKit.liquibaseSnapshot(liquibasedDatabase) lazy val actualColumns = get[Column](actualSnapshot) @@ -112,7 +113,12 @@ class LiquibaseComparisonSpec extends FlatSpec with Matchers with ScalaFutures { } // Verify that sequence widths are the same as columns - sequenceTypeValidationOption(expectedColumn, databaseSystem) foreach { expectedSequenceType => + val expectedSequenceTypeOption = sequenceTypeValidationOption( + expectedColumn, + databaseSystem, + connectionMetadata, + ) + expectedSequenceTypeOption foreach { expectedSequenceType => val dbio = sequenceTypeDbio(expectedColumn, databaseSystem, liquibasedDatabase) val future = liquibasedDatabase.database.run(dbio) val res = future.futureValue @@ -334,11 +340,11 @@ object LiquibaseComparisonSpec { * Returns the column mapping for the DBMS. */ private def getColumnMapping(databaseSystem: DatabaseSystem): ColumnMapping = { - databaseSystem match { - case HsqldbDatabaseSystem => HsqldbColumnMapping - case MariadbDatabaseSystem => MariadbColumnMapping - case MysqlDatabaseSystem => MysqldbColumnMapping - case PostgresqlDatabaseSystem => PostgresqlColumnMapping + databaseSystem.platform match { + case HsqldbDatabasePlatform => HsqldbColumnMapping + case MariadbDatabasePlatform => MariadbColumnMapping + case MysqlDatabasePlatform => MysqldbColumnMapping + case PostgresqlDatabasePlatform => PostgresqlColumnMapping } } @@ -363,8 +369,8 @@ object LiquibaseComparisonSpec { private def getAutoIncrementDefault(databaseSystem: DatabaseSystem, columnMapping: ColumnMapping, column: Column): ColumnDefault = { - databaseSystem match { - case PostgresqlDatabaseSystem => + databaseSystem.platform match { + case PostgresqlDatabasePlatform => val columnType = column.getType.getTypeName match { case "BIGINT" => ColumnType("BIGSERIAL", None) case "INTEGER" => ColumnType("SERIAL", None) @@ -394,8 +400,8 @@ object LiquibaseComparisonSpec { * This check also has to be done here, as Liquibase does not return the precision for Mysql datetime fields. */ private def columnTypeValidationOption(column: Column, databaseSystem: DatabaseSystem): Option[String] = { - databaseSystem match { - case MysqlDatabaseSystem | MariadbDatabaseSystem if column.getType.getTypeName == "TIMESTAMP" => + databaseSystem.platform match { + case MysqlDatabasePlatform | MariadbDatabasePlatform if column.getType.getTypeName == "TIMESTAMP" => Option("datetime(6)") case _ => None } @@ -405,8 +411,8 @@ object LiquibaseComparisonSpec { databaseSystem: DatabaseSystem, database: SlickDatabase): database.dataAccess.driver.api.DBIO[String] = { import database.dataAccess.driver.api._ - databaseSystem match { - case MysqlDatabaseSystem | MariadbDatabaseSystem if column.getType.getTypeName == "TIMESTAMP" => + databaseSystem.platform match { + case MysqlDatabasePlatform | MariadbDatabasePlatform if column.getType.getTypeName == "TIMESTAMP" => val getType = GetResult(_.rs.getString("Type")) //noinspection SqlDialectInspection @@ -427,9 +433,16 @@ object LiquibaseComparisonSpec { * https://stackoverflow.com/questions/52195303/postgresql-primary-key-id-datatype-from-serial-to-bigserial#answer-52195920 * https://www.postgresql.org/docs/11/datatype-numeric.html#DATATYPE-SERIAL */ - private def sequenceTypeValidationOption(column: Column, databaseSystem: DatabaseSystem): Option[String] = { - databaseSystem match { - case PostgresqlDatabaseSystem if column.isAutoIncrement => Option(column.getType.getTypeName.toLowerCase) + private def sequenceTypeValidationOption(column: Column, + databaseSystem: DatabaseSystem, + connectionMetadata: ConnectionMetadata, + ): Option[String] = { + databaseSystem.platform match { + case PostgresqlDatabasePlatform if column.isAutoIncrement && connectionMetadata.databaseMajorVersion <= 9 => + // "this is currently always bigint" --> https://www.postgresql.org/docs/9.6/infoschema-sequences.html + Option("bigint") + case PostgresqlDatabasePlatform if column.isAutoIncrement => + Option(column.getType.getTypeName.toLowerCase) case _ => None } } @@ -438,11 +451,10 @@ object LiquibaseComparisonSpec { databaseSystem: DatabaseSystem, database: SlickDatabase): database.dataAccess.driver.api.DBIO[String] = { import database.dataAccess.driver.api._ - databaseSystem match { - case PostgresqlDatabaseSystem if column.isAutoIncrement => - - //noinspection SqlDialectInspection - sql"""select data_type + databaseSystem.platform match { + case PostgresqlDatabasePlatform if column.isAutoIncrement => + //noinspection SqlDialectInspection + sql"""select data_type from INFORMATION_SCHEMA.sequences where sequence_name = '#${postgresqlSeqName(column)}' """.as[String].head @@ -453,7 +465,7 @@ object LiquibaseComparisonSpec { private def unsupportedColumnTypeException(column: Column, databaseSystem: DatabaseSystem): UnsupportedOperationException = { new UnsupportedOperationException( - s"${databaseSystem.shortName} ${column.getRelation.getName}.${column.getName}: ${column.getType.getTypeName}" + s"${databaseSystem.name} ${column.getRelation.getName}.${column.getName}: ${column.getType.getTypeName}" ) } @@ -466,8 +478,8 @@ object LiquibaseComparisonSpec { */ private def getNullTodos(databaseSystem: DatabaseSystem, databaseType: CromwellDatabaseType[_ <: SlickDatabase]): Seq[ColumnDescription] = { - (databaseSystem, databaseType) match { - case (MysqlDatabaseSystem, EngineDatabaseType) => + (databaseSystem.platform, databaseType) match { + case (MysqlDatabasePlatform, EngineDatabaseType) => List( ColumnDescription("CALL_CACHING_DETRITUS_ENTRY", "CALL_CACHING_ENTRY_ID"), ColumnDescription("CALL_CACHING_DETRITUS_ENTRY", "DETRITUS_KEY"), @@ -493,7 +505,7 @@ object LiquibaseComparisonSpec { ColumnDescription("WORKFLOW_STORE_ENTRY", "WORKFLOW_EXECUTION_UUID"), ColumnDescription("WORKFLOW_STORE_ENTRY", "WORKFLOW_STATE"), ) - case (MysqlDatabaseSystem, MetadataDatabaseType) => + case (MysqlDatabasePlatform, MetadataDatabaseType) => List( ColumnDescription("CUSTOM_LABEL_ENTRY", "CUSTOM_LABEL_KEY"), ColumnDescription("CUSTOM_LABEL_ENTRY", "CUSTOM_LABEL_VALUE"), diff --git a/services/src/test/scala/cromwell/services/database/LobSpec.scala b/services/src/test/scala/cromwell/services/database/LobSpec.scala index aafed159666..a4077468f64 100644 --- a/services/src/test/scala/cromwell/services/database/LobSpec.scala +++ b/services/src/test/scala/cromwell/services/database/LobSpec.scala @@ -22,7 +22,7 @@ class LobSpec extends FlatSpec with Matchers with ScalaFutures { DatabaseSystem.All foreach { databaseSystem => - behavior of s"CLOBs and BLOBs on ${databaseSystem.shortName}" + behavior of s"CLOBs and BLOBs on ${databaseSystem.name}" lazy val database = DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem) @@ -54,8 +54,8 @@ class LobSpec extends FlatSpec with Matchers with ScalaFutures { val workflowStoreEntries = Seq(workflowStoreEntry) - val future = databaseSystem match { - case MysqlDatabaseSystem => + val future = databaseSystem.platform match { + case MysqlDatabasePlatform => // MySQL crashes because it calls SerialBlob's getBytes instead of getBinaryStream database.addWorkflowStoreEntries(workflowStoreEntries).failed map { exception => exception should be(a[SerialException]) diff --git a/services/src/test/scala/cromwell/services/database/QueryTimeoutSpec.scala b/services/src/test/scala/cromwell/services/database/QueryTimeoutSpec.scala index 12bf73737ff..be938f11157 100644 --- a/services/src/test/scala/cromwell/services/database/QueryTimeoutSpec.scala +++ b/services/src/test/scala/cromwell/services/database/QueryTimeoutSpec.scala @@ -1,76 +1,64 @@ package cromwell.services.database import better.files._ -import com.typesafe.config.{Config, ConfigFactory} import cromwell.core.Tags.DbmsTest -import cromwell.database.slick.SlickDatabase -import cromwell.database.slick.tables.DataAccessComponent import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.concurrent.ScalaFutures import org.scalatest.{FlatSpec, Matchers} -import scala.concurrent.Future import scala.concurrent.duration._ class QueryTimeoutSpec extends FlatSpec with Matchers with ScalaFutures { - // HSQL does not document a SLEEP() function, which is essential for this test - // The functionality being tested is not relevant to an HSQL user, so the omission is probably acceptable - val insomniacDatabases = Seq(HsqldbDatabaseSystem) + DatabaseSystem.All foreach { databaseSystem => + testOption(databaseSystem) foreach { + case (sleepCommand, errorMessage) => + behavior of s"Query timeouts on ${databaseSystem.name}" - val databasesToTest = DatabaseSystem.All diff insomniacDatabases - - val sleepCommands = Seq( - "select sleep(10);", - "select sleep(10);", - "select pg_sleep(10);" - ) - - val expectedErrors = Seq( - Right(Option(1)), - Left("Statement cancelled due to timeout or client request"), - Left("ERROR: canceling statement due to user request"), - ) - - for (((db, sleepCommand), errorMsg) <- databasesToTest zip sleepCommands zip expectedErrors) { - behavior of s"${db.productName}" - - it should "fail with a timeout" taggedAs DbmsTest in { - checkDatabaseSystem(db, sleepCommand, errorMsg) + it should "fail with a timeout" taggedAs DbmsTest in { + checkDatabaseSystem(databaseSystem, sleepCommand, errorMessage) + } } } private def checkDatabaseSystem(databaseSystem: DatabaseSystem, sleepCommand: String, errorEither: Either[String, Option[Int]]): Unit = { - for { - testDatabase <- new TestDatabase(ConfigFactory.load.getConfig(databaseSystem.configPath)).autoClosed + testDatabase <- DatabaseTestKit.schemalessDatabaseFromSystem(databaseSystem).autoClosed } { import testDatabase.dataAccess.driver.api._ //noinspection SqlDialectInspection - val future = testDatabase.runTestTransaction(sql"""#$sleepCommand""".as[Int].headOption, 5.seconds) + val future = testDatabase.runTestTransaction(sql"""#$sleepCommand""".as[Int].headOption, timeout = 5.seconds) errorEither match { case Left(message) => future.failed.futureValue(Timeout(10.seconds)).getMessage should be(message) case Right(optionResult) => future.futureValue(Timeout(10.seconds)) should be(optionResult) } } } -} - -class TestDatabase(config: Config) extends SlickDatabase(config) { - override lazy val dataAccess: DataAccessComponent = new DataAccessComponent { - override lazy val driver = slickConfig.profile - override lazy val schema = driver.DDL("", "") - } - import dataAccess.driver.api._ - - /** - * Run a RepeatableRead transaction directly on the database with an optional timeout. - */ - def runTestTransaction[R](action: DBIO[R], timeout: Duration = Duration.Inf): Future[R] = { - runTransaction(action, timeout = timeout) + private def testOption(databaseSystem: DatabaseSystem): Option[(String, Either[String, Option[Int]])] = { + databaseSystem.platform match { + case HsqldbDatabasePlatform => + // HSQL does not document a SLEEP() function, which is essential for this test + // The functionality being tested is not relevant to an HSQL user, so the omission is probably acceptable + None + case MariadbDatabasePlatform => + Option(( + "select sleep(10);", + Right(Option(1)), + )) + case MysqlDatabasePlatform => + Option(( + "select sleep(10);", + Left("Statement cancelled due to timeout or client request"), + )) + case PostgresqlDatabasePlatform => + Option(( + "select pg_sleep(10);", + Left("ERROR: canceling statement due to user request"), + )) + } } } diff --git a/services/src/test/scala/cromwell/services/database/SchemaManagerSpec.scala b/services/src/test/scala/cromwell/services/database/SchemaManagerSpec.scala index 729d9006cfb..427c97e7da5 100644 --- a/services/src/test/scala/cromwell/services/database/SchemaManagerSpec.scala +++ b/services/src/test/scala/cromwell/services/database/SchemaManagerSpec.scala @@ -3,6 +3,7 @@ package cromwell.services.database import java.io.{ByteArrayOutputStream, PrintStream} import better.files._ +import cromwell.core.Tags.DbmsTest import cromwell.database.migration.liquibase.LiquibaseUtils import cromwell.database.slick.SlickDatabase import cromwell.services.database.DatabaseTestKit._ @@ -60,7 +61,7 @@ class SchemaManagerSpec extends FlatSpec with Matchers with ScalaFutures { val otherSchemaManager = schemaManager.other - it should s"have the same schema as ${databaseType.name} ${otherSchemaManager.name}" in { + it should s"have the same schema as ${databaseType.name} ${otherSchemaManager.name}" taggedAs DbmsTest in { for { actualDatabase <- inMemoryDatabase(databaseType, schemaManager).autoClosed expectedDatabase <- inMemoryDatabase(databaseType, otherSchemaManager).autoClosed @@ -105,7 +106,7 @@ class SchemaManagerSpec extends FlatSpec with Matchers with ScalaFutures { } } - it should "match expected generated names" in { + it should "match expected generated names" taggedAs DbmsTest in { var schemaMetadata: SchemaMetadata = null for { diff --git a/services/src/test/scala/cromwell/services/database/SchemalessSlickDatabase.scala b/services/src/test/scala/cromwell/services/database/SchemalessSlickDatabase.scala new file mode 100644 index 00000000000..71be8361443 --- /dev/null +++ b/services/src/test/scala/cromwell/services/database/SchemalessSlickDatabase.scala @@ -0,0 +1,15 @@ +package cromwell.services.database + +import com.typesafe.config.Config +import cromwell.database.slick.SlickDatabase +import cromwell.database.slick.tables.DataAccessComponent + +/** + * Connects to the passed in config, but does not use nor initialize the schema. + */ +class SchemalessSlickDatabase(config: Config) extends SlickDatabase(config) { + override lazy val dataAccess: DataAccessComponent = new DataAccessComponent { + override lazy val driver = slickConfig.profile + override lazy val schema = driver.DDL("", "") + } +} diff --git a/services/src/test/scala/cromwell/services/database/TestSlickDatabase.scala b/services/src/test/scala/cromwell/services/database/TestSlickDatabase.scala new file mode 100644 index 00000000000..c88ad81df79 --- /dev/null +++ b/services/src/test/scala/cromwell/services/database/TestSlickDatabase.scala @@ -0,0 +1,23 @@ +package cromwell.services.database + +import cromwell.database.slick.SlickDatabase +import slick.jdbc.TransactionIsolation + +import scala.concurrent.Future +import scala.concurrent.duration.Duration + +/** + * Exposes the protected method runTransaction via a runTestTransaction. + */ +trait TestSlickDatabase { + slickDatabase: SlickDatabase => + + import dataAccess.driver.api._ + + def runTestTransaction[R](action: DBIO[R], + isolationLevel: TransactionIsolation = TransactionIsolation.RepeatableRead, + timeout: Duration = Duration.Inf, + ): Future[R] = { + slickDatabase.runTransaction(action, isolationLevel, timeout) + } +} diff --git a/services/src/test/scala/cromwell/services/keyvalue/impl/KeyValueDatabaseSpec.scala b/services/src/test/scala/cromwell/services/keyvalue/impl/KeyValueDatabaseSpec.scala index 90939d266a0..efac82a224b 100644 --- a/services/src/test/scala/cromwell/services/keyvalue/impl/KeyValueDatabaseSpec.scala +++ b/services/src/test/scala/cromwell/services/keyvalue/impl/KeyValueDatabaseSpec.scala @@ -21,7 +21,7 @@ class KeyValueDatabaseSpec extends FlatSpec with Matchers with ScalaFutures with implicit val defaultPatience = PatienceConfig(scaled(Span(5, Seconds)), scaled(Span(100, Millis))) DatabaseSystem.All foreach { databaseSystem => - behavior of s"KeyValueDatabase on ${databaseSystem.shortName}" + behavior of s"KeyValueDatabase on ${databaseSystem.name}" lazy val dataAccess = DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem) val workflowId = WorkflowId.randomId().toString @@ -100,26 +100,30 @@ class KeyValueDatabaseSpec extends FlatSpec with Matchers with ScalaFutures with ex.getClass should be(getFailureClass(databaseSystem)) }).flatMap(_ => verifyValues).futureValue } + + it should "close the database" taggedAs DbmsTest in { + dataAccess.close() + } } } object KeyValueDatabaseSpec { private def getFailureRegex(databaseSystem: DatabaseSystem): String = { - databaseSystem match { - case HsqldbDatabaseSystem => + databaseSystem.platform match { + case HsqldbDatabasePlatform => "integrity constraint violation: NOT NULL check constraint; SYS_CT_10591 table: JOB_KEY_VALUE_ENTRY column: STORE_VALUE" - case MariadbDatabaseSystem => """\(conn=\d+\) Column 'STORE_VALUE' cannot be null""" - case MysqlDatabaseSystem => "Column 'STORE_VALUE' cannot be null" - case PostgresqlDatabaseSystem => """ERROR: null value in column "STORE_VALUE" violates not-null constraint""" + case MariadbDatabasePlatform => """\(conn=\d+\) Column 'STORE_VALUE' cannot be null""" + case MysqlDatabasePlatform => "Column 'STORE_VALUE' cannot be null" + case PostgresqlDatabasePlatform => """ERROR: null value in column "STORE_VALUE" violates not-null constraint""" } } private def getFailureClass(databaseSystem: DatabaseSystem): Class[_ <: Exception] = { - databaseSystem match { - case HsqldbDatabaseSystem => classOf[SQLIntegrityConstraintViolationException] - case MariadbDatabaseSystem => classOf[BatchUpdateException] - case MysqlDatabaseSystem => classOf[BatchUpdateException] - case PostgresqlDatabaseSystem => classOf[PSQLException] + databaseSystem.platform match { + case HsqldbDatabasePlatform => classOf[SQLIntegrityConstraintViolationException] + case MariadbDatabasePlatform => classOf[BatchUpdateException] + case MysqlDatabasePlatform => classOf[BatchUpdateException] + case PostgresqlDatabasePlatform => classOf[PSQLException] } } } diff --git a/services/src/test/scala/cromwell/services/metadata/MetadataQuerySpec.scala b/services/src/test/scala/cromwell/services/metadata/MetadataQuerySpec.scala new file mode 100644 index 00000000000..9a00f79ad63 --- /dev/null +++ b/services/src/test/scala/cromwell/services/metadata/MetadataQuerySpec.scala @@ -0,0 +1,58 @@ +package cromwell.services.metadata + +import akka.actor.{Actor, ActorRef, Props} +import akka.testkit.TestProbe +import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.TestKitSuite +import cromwell.services.metadata.MetadataQuerySpec.{CannedResponseReadMetadataWorker, MetadataServiceActor_CustomizeRead} +import cromwell.services.metadata.MetadataService.{MetadataReadAction, MetadataServiceResponse, QueryForWorkflowsMatchingParameters, WorkflowQueryResponse, WorkflowQuerySuccess} +import cromwell.services.metadata.impl.{MetadataServiceActor, MetadataServiceActorSpec} +import org.scalatest.{FlatSpecLike, Matchers} + +class MetadataQuerySpec extends TestKitSuite("MetadataQuerySpec") with FlatSpecLike with Matchers { + + it should "correctly forward requests to read workers and responses back to requesters" in { + + val request = QueryForWorkflowsMatchingParameters( + parameters = List(("paramName1", "paramValue1")) + ) + + val response = WorkflowQuerySuccess( + response = WorkflowQueryResponse(Seq.empty, 0), + meta = None + ) + + val requester = TestProbe("MetadataServiceClientProbe") + def readWorkerProps() = Props(new CannedResponseReadMetadataWorker(Map(request -> response))) + val serviceRegistry = TestProbe("ServiceRegistryProbe") + val metadataService = system.actorOf(MetadataServiceActor_CustomizeRead.props(readWorkerProps, serviceRegistry), "MetadataServiceUnderTest") + + requester.send(metadataService, request) + requester.expectMsg(response) + + } + +} + + +object MetadataQuerySpec { + final class MetadataServiceActor_CustomizeRead(config: Config, serviceRegistryActor: ActorRef, readWorkerMaker: () => Props) + extends MetadataServiceActor(MetadataServiceActorSpec.globalConfigToMetadataServiceConfig(config), config, serviceRegistryActor) { + + override def readMetadataWorkerActorProps(): Props = readWorkerMaker.apply.withDispatcher(cromwell.core.Dispatcher.ServiceDispatcher) + } + + object MetadataServiceActor_CustomizeRead { + val config = ConfigFactory.parseString(MetadataServiceActorSpec.ConfigWithoutSummarizer) + + def props(readActorProps: () => Props, serviceRegistryProbe: TestProbe) = + Props(new MetadataServiceActor_CustomizeRead(config, serviceRegistryProbe.ref, readActorProps)) + } + + + final class CannedResponseReadMetadataWorker(cannedResponses: Map[MetadataReadAction, MetadataServiceResponse]) extends Actor { + override def receive = { + case msg: MetadataReadAction => sender ! cannedResponses.getOrElse(msg, throw new Exception(s"Unexpected inbound message: $msg")) + } + } +} diff --git a/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala b/services/src/test/scala/cromwell/services/metadata/QueryForWorkflowsMatchingParametersSpec.scala similarity index 99% rename from services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala rename to services/src/test/scala/cromwell/services/metadata/QueryForWorkflowsMatchingParametersSpec.scala index dd0545b2082..d0deb2713a8 100644 --- a/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/QueryForWorkflowsMatchingParametersSpec.scala @@ -7,7 +7,7 @@ import cromwell.core.labels.Label import cromwell.services.metadata.WorkflowQueryKey._ import org.scalatest.{Matchers, WordSpec} -class WorkflowQueryParametersSpec extends WordSpec with Matchers { +class QueryForWorkflowsMatchingParametersSpec extends WordSpec with Matchers { val StartDateString = "2015-11-01T11:11:11Z" val EndDateString = "2015-11-01T12:12:12Z" diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala index d9bd802e8d1..425b3c03244 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala @@ -42,12 +42,12 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture implicit val defaultPatience = PatienceConfig(scaled(Span(30, Seconds)), scaled(Span(100, Millis))) DatabaseSystem.All foreach { databaseSystem => - behavior of s"MetadataDatabaseAccess on ${databaseSystem.shortName}" + behavior of s"MetadataDatabaseAccess on ${databaseSystem.name}" lazy val dataAccess = new MetadataDatabaseAccess with MetadataServicesStore { override val metadataDatabaseInterface: MetadataSlickDatabase = { // NOTE: EngineLiquibaseSettings **MUST** always run before the MetadataLiquibaseSettings - DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem) + DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem).close() DatabaseTestKit.initializedDatabaseFromSystem(MetadataDatabaseType, databaseSystem) } } diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala index 80d0a2cad5f..97dfb3bce88 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala @@ -4,20 +4,25 @@ import java.time.OffsetDateTime import akka.pattern._ import akka.testkit.TestProbe -import com.typesafe.config.ConfigFactory +import com.typesafe.config.{Config, ConfigFactory} import cromwell.core._ import cromwell.services.ServicesSpec import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ +import cromwell.services.metadata.impl.builder.MetadataBuilderActor.BuiltMetadataResponse +import cromwell.services.metadata.impl.MetadataServiceActorSpec._ + +import scala.concurrent.Await import org.scalatest.concurrent.Eventually._ import org.scalatest.concurrent.PatienceConfiguration.{Interval, Timeout} +import spray.json._ import scala.concurrent.duration._ class MetadataServiceActorSpec extends ServicesSpec("Metadata") { import MetadataServiceActorSpec.Config val config = ConfigFactory.parseString(Config) - val actor = system.actorOf(MetadataServiceActor.props(config, config, TestProbe().ref)) + val actor = system.actorOf(MetadataServiceActor.props(config, globalConfigToMetadataServiceConfig(config), TestProbe().ref), "MetadataServiceActor-for-MetadataServiceActorSpec") val workflowId = WorkflowId.randomId() @@ -37,41 +42,85 @@ class MetadataServiceActorSpec extends ServicesSpec("Metadata") { val event3_1 = MetadataEvent(key3, Option(MetadataValue("value3")), moment.plusSeconds(4)) val event3_2 = MetadataEvent(key3, None, moment.plusSeconds(5)) - "MetadataServiceActor" should { - "Store values for different keys and then retrieve those values" in { - val putAction1 = PutMetadataAction(event1_1) - val putAction2 = PutMetadataAction(event1_2) - val putAction3 = PutMetadataAction(event2_1, event3_1, event3_2) + override def beforeAll: Unit = { + + // Even though event1_1 arrives second, the older timestamp should mean it does not replace event1_2: + val putAction2 = PutMetadataAction(event1_2) + val putAction1 = PutMetadataAction(event1_1) + val putAction3 = PutMetadataAction(event2_1, event3_1, event3_2) - actor ! putAction1 - actor ! putAction2 - actor ! putAction3 + actor ! putAction1 + actor ! putAction2 + actor ! putAction3 + } + + val query1 = MetadataQuery.forKey(key1) + val query2 = MetadataQuery.forKey(key2) + val query3 = MetadataQuery.forKey(key3) + val query4 = MetadataQuery.forWorkflow(workflowId) + val query5 = MetadataQuery.forJob(workflowId, supJob) - val query1 = MetadataQuery.forKey(key1) - val query2 = MetadataQuery.forKey(key2) - val query3 = MetadataQuery.forKey(key3) - val query4 = MetadataQuery.forWorkflow(workflowId) - val query5 = MetadataQuery.forJob(workflowId, supJob) + def expectConstructedMetadata(query: MetadataQuery, expectation: String) = { - eventually(Timeout(10.seconds), Interval(2.seconds)) { - (for { - response1 <- (actor ? GetMetadataQueryAction(query1)).mapTo[MetadataServiceResponse] - _ = response1 shouldBe MetadataLookupResponse(query1, Seq(event1_1, event1_2)) + } - response2 <- (actor ? GetMetadataQueryAction(query2)).mapTo[MetadataServiceResponse] - _ = response2 shouldBe MetadataLookupResponse(query2, Seq(event2_1)) + val testCases = List[(String, MetadataQuery, String)] ( + ("query1", query1, s"""{ + | "key1": "value2", + | "calls": {}, + | "id": "$workflowId" + |}""".stripMargin), + ("query2", query2, s"""{ + | "key2": "value1", + | "calls": {}, + | "id": "$workflowId" + |}""".stripMargin), + ("query3", query3, s"""{ + | "calls": { + | "sup.sup": [{ + | "dog": {}, + | "attempt": 1, + | "shardIndex": -1 + | }] + | }, + | "id": "$workflowId" + |}""".stripMargin), + ("query4", query4, s"""{ + | "key1": "value2", + | "key2": "value1", + | "calls": { + | "sup.sup": [{ + | "dog": {}, + | "attempt": 1, + | "shardIndex": -1 + | }] + | }, + | "id": "$workflowId" + |}""".stripMargin), + ("query5", query5, s"""{ + | "calls": { + | "sup.sup": [{ + | "dog": {}, + | "attempt": 1, + | "shardIndex": -1 + | }] + | }, + | "id": "$workflowId" + |}""".stripMargin) + ) - response3 <- (actor ? GetMetadataQueryAction(query3)).mapTo[MetadataServiceResponse] - _ = response3 shouldBe MetadataLookupResponse(query3, Seq(event3_1, event3_2)) + "MetadataServiceActor" should { - response4 <- (actor ? GetMetadataQueryAction(query4)).mapTo[MetadataServiceResponse] - _ = response4 shouldBe MetadataLookupResponse(query4, Seq(event1_1, event1_2, event2_1, event3_1, event3_2)) + testCases foreach { case (name, query, expectation) => - response5 <- (actor ? GetMetadataQueryAction(query5)).mapTo[MetadataServiceResponse] - _ = response5 shouldBe MetadataLookupResponse(query5, Seq(event3_1, event3_2)) + s"perform $name correctly" in { + eventually(Timeout(10.seconds), Interval(2.seconds)) { + val response = Await.result((actor ? GetMetadataAction(query)).mapTo[BuiltMetadataResponse], 1.seconds) - } yield ()).futureValue + response.responseJson shouldBe expectation.parseJson + } } + } } } @@ -82,4 +131,15 @@ object MetadataServiceActorSpec { |services.MetadataService.db-batch-size = 3 |services.MetadataService.db-flush-rate = 100 millis """.stripMargin + + val ConfigWithoutSummarizer = Config + """ + |services.MetadataService.config.metadata-summary-refresh-interval = "Inf" + """.stripMargin + + // Use this to convert the above "global" configs into metadata service specific "service config"s: + def globalConfigToMetadataServiceConfig(config: Config): Config = if (config.hasPath("services.MetadataService.config")) { + config.getConfig("services.MetadataService.config") + } else { + ConfigFactory.empty() + } } diff --git a/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorBenchmark.scala b/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorBenchmark.scala index 0fa2434b511..13946d5e322 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorBenchmark.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorBenchmark.scala @@ -1,14 +1,13 @@ package cromwell.services.metadata.impl import akka.testkit.{TestFSMRef, TestProbe} -import com.typesafe.config.ConfigFactory import cromwell.core.Tags.IntegrationTest import cromwell.core.{TestKitSuite, WorkflowId} -import cromwell.database.slick.{EngineSlickDatabase, MetadataSlickDatabase} -import cromwell.services.ServicesStore.EnhancedSqlDatabase +import cromwell.database.slick.MetadataSlickDatabase +import cromwell.services.MetadataServicesStore +import cromwell.services.database.{DatabaseTestKit, EngineDatabaseType, MetadataDatabaseType, MysqlEarliestDatabaseSystem} import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} -import cromwell.services.{EngineServicesStore, MetadataServicesStore} import org.scalatest.concurrent.Eventually import org.scalatest.{FlatSpecLike, Matchers} @@ -33,18 +32,18 @@ class WriteMetadataActorBenchmark extends TestKitSuite with FlatSpecLike with Ev x } + private lazy val dataAccess = new MetadataDatabaseAccess with MetadataServicesStore { + override val metadataDatabaseInterface: MetadataSlickDatabase = { + val databaseSystem = MysqlEarliestDatabaseSystem + // NOTE: EngineLiquibaseSettings **MUST** always run before the MetadataLiquibaseSettings + DatabaseTestKit.initializedDatabaseFromSystem(EngineDatabaseType, databaseSystem).close() + DatabaseTestKit.initializedDatabaseFromSystem(MetadataDatabaseType, databaseSystem) + } + } + it should "provide good throughput" taggedAs IntegrationTest in { val writeActor = TestFSMRef(new WriteMetadataActor(1000, 5.seconds, registry, Int.MaxValue) { - override val metadataDatabaseInterface = { - val databaseConfig = ConfigFactory.load.getConfig("database-test-mysql") - - // NOTE: EngineLiquibaseSettings **MUST** always run before the MetadataLiquibaseSettings - new EngineSlickDatabase(databaseConfig) - .initialized(EngineServicesStore.EngineLiquibaseSettings) - - new MetadataSlickDatabase(databaseConfig) - .initialized(MetadataServicesStore.MetadataLiquibaseSettings) - } + override val metadataDatabaseInterface: MetadataSlickDatabase = dataAccess.metadataDatabaseInterface }) time("metadata write") { @@ -59,4 +58,8 @@ class WriteMetadataActorBenchmark extends TestKitSuite with FlatSpecLike with Ev } } } + + it should "close the database" taggedAs IntegrationTest in { + dataAccess.metadataDatabaseInterface.close() + } } diff --git a/src/ci/bin/test.inc.sh b/src/ci/bin/test.inc.sh index 9ebb960e4da..e65e1e2ca1e 100644 --- a/src/ci/bin/test.inc.sh +++ b/src/ci/bin/test.inc.sh @@ -68,6 +68,8 @@ cromwell::private::create_build_variables() { CROMWELL_BUILD_RESOURCES_SOURCES="${CROMWELL_BUILD_ROOT_DIRECTORY}/src/ci/resources" CROMWELL_BUILD_RESOURCES_DIRECTORY="${CROMWELL_BUILD_ROOT_DIRECTORY}/target/ci/resources" + CROMWELL_BUILD_GIT_SECRETS_DIRECTORY="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/git-secrets" + CROMWELL_BUILD_GIT_SECRETS_COMMIT="ad82d68ee924906a0401dfd48de5057731a9bc84" CROMWELL_BUILD_WAIT_FOR_IT_FILENAME="wait-for-it.sh" CROMWELL_BUILD_WAIT_FOR_IT_BRANCH="db049716e42767d39961e95dd9696103dca813f1" CROMWELL_BUILD_WAIT_FOR_IT_URL="https://raw.githubusercontent.com/vishnubob/wait-for-it/${CROMWELL_BUILD_WAIT_FOR_IT_BRANCH}/${CROMWELL_BUILD_WAIT_FOR_IT_FILENAME}" @@ -197,6 +199,13 @@ cromwell::private::create_build_variables() { hours_to_minutes=60 CROMWELL_BUILD_HEARTBEAT_MINUTES=$((20 * hours_to_minutes)) + local git_revision + if git_revision="$(git rev-parse --short=7 HEAD 2>/dev/null)"; then + CROMWELL_BUILD_GIT_HASH_SUFFIX="g${git_revision}" + else + CROMWELL_BUILD_GIT_HASH_SUFFIX="gUNKNOWN" + fi + export CROMWELL_BUILD_BACKEND_TYPE export CROMWELL_BUILD_BRANCH export CROMWELL_BUILD_BRANCH_PULL_REQUEST @@ -206,6 +215,9 @@ cromwell::private::create_build_variables() { export CROMWELL_BUILD_EVENT export CROMWELL_BUILD_EXIT_FUNCTIONS export CROMWELL_BUILD_GENERATE_COVERAGE + export CROMWELL_BUILD_GIT_HASH_SUFFIX + export CROMWELL_BUILD_GIT_SECRETS_COMMIT + export CROMWELL_BUILD_GIT_SECRETS_DIRECTORY export CROMWELL_BUILD_GIT_USER_EMAIL export CROMWELL_BUILD_GIT_USER_NAME export CROMWELL_BUILD_HEARTBEAT_MINUTES @@ -257,98 +269,99 @@ cromwell::private::echo_build_variables() { echo "CROMWELL_BUILD_URL='${CROMWELL_BUILD_URL}'" } +# Create environment variables used by the DatabaseTestKit and cromwell::private::create_centaur_variables() cromwell::private::create_database_variables() { + CROMWELL_BUILD_DATABASE_USERNAME="cromwell" + CROMWELL_BUILD_DATABASE_PASSWORD="test" + CROMWELL_BUILD_DATABASE_SCHEMA="cromwell_test" + case "${CROMWELL_BUILD_PROVIDER}" in "${CROMWELL_BUILD_PROVIDER_TRAVIS}") CROMWELL_BUILD_MARIADB_HOSTNAME="localhost" - CROMWELL_BUILD_MARIADB_PORT="13306" - CROMWELL_BUILD_MARIADB_USERNAME="cromwell" - CROMWELL_BUILD_MARIADB_PASSWORD="test" - CROMWELL_BUILD_MARIADB_SCHEMA="cromwell_test" + CROMWELL_BUILD_MARIADB_PORT="23306" CROMWELL_BUILD_MARIADB_DOCKER_TAG="${BUILD_MARIADB-}" + CROMWELL_BUILD_MARIADB_LATEST_HOSTNAME="localhost" + CROMWELL_BUILD_MARIADB_LATEST_PORT="33306" + CROMWELL_BUILD_MARIADB_LATEST_TAG="${BUILD_MARIADB_LATEST-}" CROMWELL_BUILD_MYSQL_HOSTNAME="localhost" CROMWELL_BUILD_MYSQL_PORT="3306" - CROMWELL_BUILD_MYSQL_USERNAME="cromwell" - CROMWELL_BUILD_MYSQL_PASSWORD="test" - CROMWELL_BUILD_MYSQL_SCHEMA="cromwell_test" CROMWELL_BUILD_MYSQL_DOCKER_TAG="${BUILD_MYSQL-}" + CROMWELL_BUILD_MYSQL_LATEST_HOSTNAME="localhost" + CROMWELL_BUILD_MYSQL_LATEST_PORT="13306" + CROMWELL_BUILD_MYSQL_LATEST_TAG="${BUILD_MYSQL_LATEST-}" CROMWELL_BUILD_POSTGRESQL_HOSTNAME="localhost" CROMWELL_BUILD_POSTGRESQL_PORT="5432" - CROMWELL_BUILD_POSTGRESQL_USERNAME="cromwell" - CROMWELL_BUILD_POSTGRESQL_PASSWORD="test" - CROMWELL_BUILD_POSTGRESQL_SCHEMA="cromwell_test" CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG="${BUILD_POSTGRESQL-}" + CROMWELL_BUILD_POSTGRESQL_LATEST_HOSTNAME="localhost" + CROMWELL_BUILD_POSTGRESQL_LATEST_PORT="15432" + CROMWELL_BUILD_POSTGRESQL_LATEST_TAG="${BUILD_POSTGRESQL_LATEST-}" ;; "${CROMWELL_BUILD_PROVIDER_JENKINS}") # NOTE: Jenkins uses src/ci/docker-compose/docker-compose.yml. # We don't define a docker tag because the docker-compose has already spun up the database containers by the # time this script is run. Other variables here must match the database service names and settings the yaml. - CROMWELL_BUILD_MARIADB_DOCKER_TAG="" CROMWELL_BUILD_MARIADB_HOSTNAME="mariadb-db" CROMWELL_BUILD_MARIADB_PORT="3306" - CROMWELL_BUILD_MARIADB_USERNAME="cromwell" - CROMWELL_BUILD_MARIADB_PASSWORD="test" - CROMWELL_BUILD_MARIADB_SCHEMA="cromwell_test" - CROMWELL_BUILD_MYSQL_DOCKER_TAG="" + CROMWELL_BUILD_MARIADB_DOCKER_TAG="" + CROMWELL_BUILD_MARIADB_LATEST_HOSTNAME="mariadb-db-latest" + CROMWELL_BUILD_MARIADB_LATEST_PORT="3306" + CROMWELL_BUILD_MARIADB_LATEST_TAG="" CROMWELL_BUILD_MYSQL_HOSTNAME="mysql-db" CROMWELL_BUILD_MYSQL_PORT="3306" - CROMWELL_BUILD_MYSQL_USERNAME="cromwell" - CROMWELL_BUILD_MYSQL_PASSWORD="test" - CROMWELL_BUILD_MYSQL_SCHEMA="cromwell_test" - CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG="" + CROMWELL_BUILD_MYSQL_DOCKER_TAG="" + CROMWELL_BUILD_MYSQL_LATEST_HOSTNAME="mysql-db-latest" + CROMWELL_BUILD_MYSQL_LATEST_PORT="3306" + CROMWELL_BUILD_MYSQL_LATEST_TAG="" CROMWELL_BUILD_POSTGRESQL_HOSTNAME="postgresql-db" CROMWELL_BUILD_POSTGRESQL_PORT="5432" - CROMWELL_BUILD_POSTGRESQL_USERNAME="cromwell" - CROMWELL_BUILD_POSTGRESQL_PASSWORD="test" - CROMWELL_BUILD_POSTGRESQL_SCHEMA="cromwell_test" + CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG="" + CROMWELL_BUILD_POSTGRESQL_LATEST_HOSTNAME="postgresql-db-latest" + CROMWELL_BUILD_POSTGRESQL_LATEST_PORT="3306" + CROMWELL_BUILD_POSTGRESQL_LATEST_TAG="" ;; *) CROMWELL_BUILD_MARIADB_HOSTNAME="${CROMWELL_BUILD_MARIADB_HOSTNAME-localhost}" CROMWELL_BUILD_MARIADB_PORT="${CROMWELL_BUILD_MARIADB_PORT-13306}" - CROMWELL_BUILD_MARIADB_USERNAME="${CROMWELL_BUILD_MARIADB_USERNAME-cromwell}" - CROMWELL_BUILD_MARIADB_PASSWORD="${CROMWELL_BUILD_MARIADB_PASSWORD-test}" - CROMWELL_BUILD_MARIADB_SCHEMA="${CROMWELL_BUILD_MARIADB_SCHEMA-cromwell_test}" CROMWELL_BUILD_MARIADB_DOCKER_TAG="" + CROMWELL_BUILD_MARIADB_LATEST_HOSTNAME="${CROMWELL_BUILD_MARIADB_LATEST_HOSTNAME-localhost}" + CROMWELL_BUILD_MARIADB_LATEST_PORT="${CROMWELL_BUILD_MARIADB_LATEST_PORT-13306}" + CROMWELL_BUILD_MARIADB_LATEST_TAG="" CROMWELL_BUILD_MYSQL_HOSTNAME="${CROMWELL_BUILD_MYSQL_HOSTNAME-localhost}" CROMWELL_BUILD_MYSQL_PORT="${CROMWELL_BUILD_MYSQL_PORT-3306}" - CROMWELL_BUILD_MYSQL_USERNAME="${CROMWELL_BUILD_MYSQL_USERNAME-cromwell}" - CROMWELL_BUILD_MYSQL_PASSWORD="${CROMWELL_BUILD_MYSQL_PASSWORD-test}" - CROMWELL_BUILD_MYSQL_SCHEMA="${CROMWELL_BUILD_MYSQL_SCHEMA-cromwell_test}" CROMWELL_BUILD_MYSQL_DOCKER_TAG="" + CROMWELL_BUILD_MYSQL_LATEST_HOSTNAME="${CROMWELL_BUILD_MYSQL_LATEST_HOSTNAME-localhost}" + CROMWELL_BUILD_MYSQL_LATEST_PORT="${CROMWELL_BUILD_MYSQL_LATEST_PORT-13306}" + CROMWELL_BUILD_MYSQL_LATEST_TAG="" CROMWELL_BUILD_POSTGRESQL_HOSTNAME="${CROMWELL_BUILD_POSTGRESQL_HOSTNAME-localhost}" CROMWELL_BUILD_POSTGRESQL_PORT="${CROMWELL_BUILD_POSTGRESQL_PORT-5432}" - CROMWELL_BUILD_POSTGRESQL_USERNAME="${CROMWELL_BUILD_POSTGRESQL_USERNAME-cromwell}" - CROMWELL_BUILD_POSTGRESQL_PASSWORD="${CROMWELL_BUILD_POSTGRESQL_PASSWORD-test}" - CROMWELL_BUILD_POSTGRESQL_SCHEMA="${CROMWELL_BUILD_POSTGRESQL_SCHEMA-cromwell_test}" CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG="" + CROMWELL_BUILD_POSTGRESQL_LATEST_HOSTNAME="${CROMWELL_BUILD_POSTGRESQL_LATEST_HOSTNAME-localhost}" + CROMWELL_BUILD_POSTGRESQL_LATEST_PORT="${CROMWELL_BUILD_POSTGRESQL_LATEST_PORT-13306}" + CROMWELL_BUILD_POSTGRESQL_LATEST_TAG="" ;; esac - CROMWELL_BUILD_MARIADB_JDBC_URL="jdbc:mariadb://${CROMWELL_BUILD_MARIADB_HOSTNAME}:${CROMWELL_BUILD_MARIADB_PORT}/${CROMWELL_BUILD_MARIADB_SCHEMA}?rewriteBatchedStatements=true" - CROMWELL_BUILD_MYSQL_JDBC_URL="jdbc:mysql://${CROMWELL_BUILD_MYSQL_HOSTNAME}:${CROMWELL_BUILD_MYSQL_PORT}/${CROMWELL_BUILD_MYSQL_SCHEMA}?useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" - CROMWELL_BUILD_POSTGRESQL_JDBC_URL="jdbc:postgresql://${CROMWELL_BUILD_POSTGRESQL_HOSTNAME}:${CROMWELL_BUILD_POSTGRESQL_PORT}/${CROMWELL_BUILD_POSTGRESQL_SCHEMA}?reWriteBatchedInserts=true" - + export CROMWELL_BUILD_DATABASE_USERNAME + export CROMWELL_BUILD_DATABASE_PASSWORD + export CROMWELL_BUILD_DATABASE_SCHEMA export CROMWELL_BUILD_MARIADB_DOCKER_TAG export CROMWELL_BUILD_MARIADB_HOSTNAME - export CROMWELL_BUILD_MARIADB_JDBC_URL - export CROMWELL_BUILD_MARIADB_PASSWORD + export CROMWELL_BUILD_MARIADB_LATEST_HOSTNAME + export CROMWELL_BUILD_MARIADB_LATEST_PORT + export CROMWELL_BUILD_MARIADB_LATEST_TAG export CROMWELL_BUILD_MARIADB_PORT - export CROMWELL_BUILD_MARIADB_SCHEMA - export CROMWELL_BUILD_MARIADB_USERNAME export CROMWELL_BUILD_MYSQL_DOCKER_TAG export CROMWELL_BUILD_MYSQL_HOSTNAME - export CROMWELL_BUILD_MYSQL_JDBC_URL - export CROMWELL_BUILD_MYSQL_PASSWORD + export CROMWELL_BUILD_MYSQL_LATEST_HOSTNAME + export CROMWELL_BUILD_MYSQL_LATEST_PORT + export CROMWELL_BUILD_MYSQL_LATEST_TAG export CROMWELL_BUILD_MYSQL_PORT - export CROMWELL_BUILD_MYSQL_SCHEMA - export CROMWELL_BUILD_MYSQL_USERNAME export CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG export CROMWELL_BUILD_POSTGRESQL_HOSTNAME - export CROMWELL_BUILD_POSTGRESQL_JDBC_URL - export CROMWELL_BUILD_POSTGRESQL_PASSWORD + export CROMWELL_BUILD_POSTGRESQL_LATEST_HOSTNAME + export CROMWELL_BUILD_POSTGRESQL_LATEST_PORT + export CROMWELL_BUILD_POSTGRESQL_LATEST_TAG export CROMWELL_BUILD_POSTGRESQL_PORT - export CROMWELL_BUILD_POSTGRESQL_SCHEMA - export CROMWELL_BUILD_POSTGRESQL_USERNAME } cromwell::private::create_centaur_variables() { @@ -395,6 +408,14 @@ cromwell::private::create_centaur_variables() { CROMWELL_BUILD_CENTAUR_TEST_RENDERED="${CROMWELL_BUILD_CENTAUR_TEST_DIRECTORY}/rendered" CROMWELL_BUILD_CENTAUR_LOG="${CROMWELL_BUILD_LOG_DIRECTORY}/centaur.log" + local mariadb_jdbc_url + local mysql_jdbc_url + local postgresql_jdbc_url + + mariadb_jdbc_url="jdbc:mariadb://${CROMWELL_BUILD_MARIADB_HOSTNAME}:${CROMWELL_BUILD_MARIADB_PORT}/${CROMWELL_BUILD_DATABASE_SCHEMA}?rewriteBatchedStatements=true" + mysql_jdbc_url="jdbc:mysql://${CROMWELL_BUILD_MYSQL_HOSTNAME}:${CROMWELL_BUILD_MYSQL_PORT}/${CROMWELL_BUILD_DATABASE_SCHEMA}?useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" + postgresql_jdbc_url="jdbc:postgresql://${CROMWELL_BUILD_POSTGRESQL_HOSTNAME}:${CROMWELL_BUILD_POSTGRESQL_PORT}/${CROMWELL_BUILD_DATABASE_SCHEMA}?reWriteBatchedInserts=true" + # Pick **one** of the databases to run Centaur against case "${CROMWELL_BUILD_PROVIDER}" in "${CROMWELL_BUILD_PROVIDER_TRAVIS}") @@ -402,23 +423,17 @@ cromwell::private::create_centaur_variables() { if [[ -n "${CROMWELL_BUILD_MYSQL_DOCKER_TAG:+set}" ]]; then CROMWELL_BUILD_CENTAUR_SLICK_PROFILE="slick.jdbc.MySQLProfile$" CROMWELL_BUILD_CENTAUR_JDBC_DRIVER="com.mysql.cj.jdbc.Driver" - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME="${CROMWELL_BUILD_MYSQL_USERNAME}" - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD="${CROMWELL_BUILD_MYSQL_PASSWORD}" - CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_MYSQL_JDBC_URL}" + CROMWELL_BUILD_CENTAUR_JDBC_URL="${mysql_jdbc_url}" elif [[ -n "${CROMWELL_BUILD_MARIADB_DOCKER_TAG:+set}" ]]; then CROMWELL_BUILD_CENTAUR_SLICK_PROFILE="slick.jdbc.MySQLProfile$" CROMWELL_BUILD_CENTAUR_JDBC_DRIVER="org.mariadb.jdbc.Driver" - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME="${CROMWELL_BUILD_MARIADB_USERNAME}" - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD="${CROMWELL_BUILD_MARIADB_PASSWORD}" - CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_MARIADB_JDBC_URL}" + CROMWELL_BUILD_CENTAUR_JDBC_URL="${mariadb_jdbc_url}" elif [[ -n "${CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG:+set}" ]]; then CROMWELL_BUILD_CENTAUR_SLICK_PROFILE="slick.jdbc.PostgresProfile$" CROMWELL_BUILD_CENTAUR_JDBC_DRIVER="org.postgresql.Driver" - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME="${CROMWELL_BUILD_POSTGRESQL_USERNAME}" - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD="${CROMWELL_BUILD_POSTGRESQL_PASSWORD}" - CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_POSTGRESQL_JDBC_URL}" + CROMWELL_BUILD_CENTAUR_JDBC_URL="${postgresql_jdbc_url}" else echo "Error: Unable to determine which RDBMS to use for Centaur." >&2 @@ -431,21 +446,28 @@ cromwell::private::create_centaur_variables() { "${CROMWELL_BUILD_PROVIDER_JENKINS}") CROMWELL_BUILD_CENTAUR_SLICK_PROFILE="slick.jdbc.MySQLProfile$" CROMWELL_BUILD_CENTAUR_JDBC_DRIVER="com.mysql.cj.jdbc.Driver" - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME="${CROMWELL_BUILD_MYSQL_USERNAME}" - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD="${CROMWELL_BUILD_MYSQL_PASSWORD}" - CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_MYSQL_JDBC_URL}" + CROMWELL_BUILD_CENTAUR_JDBC_URL="${mysql_jdbc_url}" CROMWELL_BUILD_CENTAUR_TEST_ADDITIONAL_PARAMETERS="${CENTAUR_TEST_ADDITIONAL_PARAMETERS-}" ;; *) CROMWELL_BUILD_CENTAUR_SLICK_PROFILE="${CROMWELL_BUILD_CENTAUR_SLICK_PROFILE-slick.jdbc.MySQLProfile\$}" CROMWELL_BUILD_CENTAUR_JDBC_DRIVER="${CROMWELL_BUILD_CENTAUR_JDBC_DRIVER-com.mysql.cj.jdbc.Driver}" - CROMWELL_BUILD_CENTAUR_JDBC_USERNAME="${CROMWELL_BUILD_CENTAUR_JDBC_USERNAME-${CROMWELL_BUILD_MYSQL_USERNAME}}" - CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD="${CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD-${CROMWELL_BUILD_MYSQL_PASSWORD}}" - CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_CENTAUR_JDBC_URL-${CROMWELL_BUILD_MYSQL_JDBC_URL}}" + CROMWELL_BUILD_CENTAUR_JDBC_URL="${CROMWELL_BUILD_CENTAUR_JDBC_URL-${mysql_jdbc_url}}" CROMWELL_BUILD_CENTAUR_TEST_ADDITIONAL_PARAMETERS= ;; esac + if [[ "${CROMWELL_BUILD_IS_CI}" == "true" ]]; then + CROMWELL_BUILD_CENTAUR_DOCKER_TAG="${CROMWELL_BUILD_PROVIDER}-${CROMWELL_BUILD_NUMBER}" + else + CROMWELL_BUILD_CENTAUR_DOCKER_TAG="${CROMWELL_BUILD_PROVIDER}-${CROMWELL_BUILD_TYPE}-${CROMWELL_BUILD_GIT_HASH_SUFFIX}" + fi + + # Trim and replace invalid characters in the docker tag + # https://docs.docker.com/engine/reference/commandline/tag/#extended-description + CROMWELL_BUILD_CENTAUR_DOCKER_TAG="${CROMWELL_BUILD_CENTAUR_DOCKER_TAG:0:128}" + CROMWELL_BUILD_CENTAUR_DOCKER_TAG="${CROMWELL_BUILD_CENTAUR_DOCKER_TAG//[^a-zA-Z0-9.-]/_}" + case "${CROMWELL_BUILD_CENTAUR_TYPE}" in "${CROMWELL_BUILD_CENTAUR_TYPE_INTEGRATION}") CROMWELL_BUILD_CENTAUR_READ_LINES_LIMIT=512000 @@ -461,15 +483,11 @@ cromwell::private::create_centaur_variables() { CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE="slick.jdbc.MySQLProfile$" CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER="com.mysql.cj.jdbc.Driver" - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_USERNAME="${CROMWELL_BUILD_MARIADB_USERNAME}" - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_PASSWORD="${CROMWELL_BUILD_MARIADB_PASSWORD}" - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_URL="jdbc:mysql://${CROMWELL_BUILD_MARIADB_HOSTNAME}:${CROMWELL_BUILD_MARIADB_PORT}/${CROMWELL_BUILD_MARIADB_SCHEMA}?useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" + CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_URL="jdbc:mysql://${CROMWELL_BUILD_MARIADB_HOSTNAME}:${CROMWELL_BUILD_MARIADB_PORT}/${CROMWELL_BUILD_DATABASE_SCHEMA}?useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" else CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE="${CROMWELL_BUILD_CENTAUR_PRIOR_SLICK_PROFILE-${CROMWELL_BUILD_CENTAUR_SLICK_PROFILE}}" CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER="${CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER-${CROMWELL_BUILD_CENTAUR_JDBC_DRIVER}}" - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_USERNAME="${CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_USERNAME-${CROMWELL_BUILD_CENTAUR_JDBC_USERNAME}}" - CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_PASSWORD="${CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_PASSWORD-${CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD}}" CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_URL="${CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_URL-${CROMWELL_BUILD_CENTAUR_JDBC_URL}}" fi @@ -477,6 +495,7 @@ cromwell::private::create_centaur_variables() { export CROMWELL_BUILD_CENTAUR_256_BITS_KEY export CROMWELL_BUILD_CENTAUR_CONFIG + export CROMWELL_BUILD_CENTAUR_DOCKER_TAG export CROMWELL_BUILD_CENTAUR_JDBC_DRIVER export CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD export CROMWELL_BUILD_CENTAUR_JDBC_URL @@ -613,13 +632,36 @@ cromwell::private::install_wait_for_it() { chmod +x "$CROMWELL_BUILD_WAIT_FOR_IT_SCRIPT" } +cromwell::private::install_git_secrets() { + # Only install git-secrets on CI. Users should have already installed the executable. + if [[ "${CROMWELL_BUILD_IS_CI}" == "true" ]]; then + git clone https://github.com/awslabs/git-secrets.git "${CROMWELL_BUILD_GIT_SECRETS_DIRECTORY}" + pushd "${CROMWELL_BUILD_GIT_SECRETS_DIRECTORY}" > /dev/null + git checkout "${CROMWELL_BUILD_GIT_SECRETS_COMMIT}" + export PATH="${PATH}:${PWD}" + popd > /dev/null + fi +} + +cromwell::private::install_minnie_kenny() { + # Only install minnie-kenny on CI. Users should have already run the script themselves. + if [[ "${CROMWELL_BUILD_IS_CI}" == "true" ]]; then + pushd "${CROMWELL_BUILD_ROOT_DIRECTORY}" > /dev/null + ./minnie-kenny.sh --force + popd > /dev/null + fi +} + cromwell::private::start_docker() { local docker_image + local docker_name local docker_cid_file - docker_image="${1:?foo called without a docker image}"; shift - docker_cid_file="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/$(echo "${docker_image}" | tr "/" "_" | tr ":" "-").cid.$$" + docker_image="${1:?start_docker called without a docker image}"; shift + docker_name="$(echo "${docker_image}" | tr "/" "_" | tr ":" "-")_$$" + docker_cid_file="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/${docker_name}.cid" - docker run --cidfile="${docker_cid_file}" --detach "$@" "${docker_image}" + docker run --name="${docker_name}" --cidfile="${docker_cid_file}" --detach "$@" "${docker_image}" + docker logs --follow "${docker_name}" 2>&1 | sed "s/^/$(tput setaf 5)${docker_name}$(tput sgr0) /" & cromwell::private::add_exit_function docker rm --force --volumes "$(cat "${docker_cid_file}")" cromwell::private::add_exit_function rm "${docker_cid_file}" @@ -627,16 +669,21 @@ cromwell::private::start_docker() { cromwell::private::start_docker_mysql() { if cromwell::private::is_xtrace_enabled; then - cromwell::private::exec_silent_function cromwell::private::start_docker_mysql + cromwell::private::exec_silent_function cromwell::private::start_docker_mysql "$@" - elif [[ -n "${CROMWELL_BUILD_MYSQL_DOCKER_TAG:+set}" ]]; then + else + local docker_tag + local docker_port + docker_tag="${1?start_docker_mysql called without a docker_tag}" + docker_port="${2?start_docker_mysql called without a docker_port}" + shift 2 cromwell::private::start_docker \ - mysql:"${CROMWELL_BUILD_MYSQL_DOCKER_TAG}" \ + mysql:"${docker_tag}" \ + --publish "${docker_port}":3306 \ --env MYSQL_ROOT_PASSWORD=private \ - --env MYSQL_USER="${CROMWELL_BUILD_MYSQL_USERNAME}" \ - --env MYSQL_PASSWORD="${CROMWELL_BUILD_MYSQL_PASSWORD}" \ - --env MYSQL_DATABASE="${CROMWELL_BUILD_MYSQL_SCHEMA}" \ - --publish "${CROMWELL_BUILD_MYSQL_PORT}":3306 \ + --env MYSQL_USER="${CROMWELL_BUILD_DATABASE_USERNAME}" \ + --env MYSQL_PASSWORD="${CROMWELL_BUILD_DATABASE_PASSWORD}" \ + --env MYSQL_DATABASE="${CROMWELL_BUILD_DATABASE_SCHEMA}" \ --volume "${CROMWELL_BUILD_DOCKER_DIRECTORY}"/mysql-conf.d:/etc/mysql/conf.d \ fi @@ -644,16 +691,21 @@ cromwell::private::start_docker_mysql() { cromwell::private::start_docker_mariadb() { if cromwell::private::is_xtrace_enabled; then - cromwell::private::exec_silent_function cromwell::private::start_docker_mariadb + cromwell::private::exec_silent_function cromwell::private::start_docker_mariadb "$@" - elif [[ -n "${CROMWELL_BUILD_MARIADB_DOCKER_TAG:+set}" ]]; then + else + local docker_tag + local docker_port + docker_tag="${1?start_docker_mariadb called without a docker_tag}" + docker_port="${2?start_docker_mariadb called without a docker_port}" + shift 2 cromwell::private::start_docker \ - mariadb:"${CROMWELL_BUILD_MARIADB_DOCKER_TAG}" \ + mariadb:"${docker_tag}" \ + --publish "${docker_port}":3306 \ --env MYSQL_ROOT_PASSWORD=private \ - --env MYSQL_USER="${CROMWELL_BUILD_MARIADB_USERNAME}" \ - --env MYSQL_PASSWORD="${CROMWELL_BUILD_MARIADB_PASSWORD}" \ - --env MYSQL_DATABASE="${CROMWELL_BUILD_MARIADB_SCHEMA}" \ - --publish "${CROMWELL_BUILD_MARIADB_PORT}":3306 \ + --env MYSQL_USER="${CROMWELL_BUILD_DATABASE_USERNAME}" \ + --env MYSQL_PASSWORD="${CROMWELL_BUILD_DATABASE_PASSWORD}" \ + --env MYSQL_DATABASE="${CROMWELL_BUILD_DATABASE_SCHEMA}" \ --volume "${CROMWELL_BUILD_DOCKER_DIRECTORY}"/mariadb-conf.d:/etc/mysql/conf.d \ fi @@ -661,20 +713,52 @@ cromwell::private::start_docker_mariadb() { cromwell::private::start_docker_postgresql() { if cromwell::private::is_xtrace_enabled; then - cromwell::private::exec_silent_function cromwell::private::start_docker_postgresql + cromwell::private::exec_silent_function cromwell::private::start_docker_postgresql "$@" - elif [[ -n "${CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG:+set}" ]]; then + else + local docker_tag + local docker_port + docker_tag="${1?start_docker_postgresql called without a docker_tag}" + docker_port="${2?start_docker_postgresql called without a docker_port}" + shift 2 cromwell::private::start_docker \ - postgres:"${CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG}" \ - --env POSTGRES_USER="${CROMWELL_BUILD_POSTGRESQL_USERNAME}" \ - --env POSTGRES_PASSWORD="${CROMWELL_BUILD_POSTGRESQL_PASSWORD}" \ - --env POSTGRES_DB="${CROMWELL_BUILD_POSTGRESQL_SCHEMA}" \ - --publish "${CROMWELL_BUILD_POSTGRESQL_PORT}":5432 \ + postgres:"${docker_tag}" \ + --publish "${docker_port}":5432 \ + --env POSTGRES_USER="${CROMWELL_BUILD_DATABASE_USERNAME}" \ + --env POSTGRES_PASSWORD="${CROMWELL_BUILD_DATABASE_PASSWORD}" \ + --env POSTGRES_DB="${CROMWELL_BUILD_DATABASE_SCHEMA}" \ --volume "${CROMWELL_BUILD_DOCKER_DIRECTORY}"/postgresql-initdb.d:/docker-entrypoint-initdb.d \ fi } +cromwell::private::start_docker_databases() { + if [[ -n "${CROMWELL_BUILD_MYSQL_DOCKER_TAG:+set}" ]]; then + cromwell::private::start_docker_mysql \ + "${CROMWELL_BUILD_MYSQL_DOCKER_TAG}" "${CROMWELL_BUILD_MYSQL_PORT}" + fi + if [[ -n "${CROMWELL_BUILD_MARIADB_DOCKER_TAG:+set}" ]]; then + cromwell::private::start_docker_mariadb \ + "${CROMWELL_BUILD_MARIADB_DOCKER_TAG}" "${CROMWELL_BUILD_MARIADB_PORT}" + fi + if [[ -n "${CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG:+set}" ]]; then + cromwell::private::start_docker_postgresql \ + "${CROMWELL_BUILD_POSTGRESQL_DOCKER_TAG}" "${CROMWELL_BUILD_POSTGRESQL_PORT}" + fi + if [[ -n "${CROMWELL_BUILD_MYSQL_LATEST_TAG:+set}" ]]; then + cromwell::private::start_docker_mysql \ + "${CROMWELL_BUILD_MYSQL_LATEST_TAG}" "${CROMWELL_BUILD_MYSQL_LATEST_PORT}" + fi + if [[ -n "${CROMWELL_BUILD_MARIADB_LATEST_TAG:+set}" ]]; then + cromwell::private::start_docker_mariadb \ + "${CROMWELL_BUILD_MARIADB_LATEST_TAG}" "${CROMWELL_BUILD_MARIADB_LATEST_PORT}" + fi + if [[ -n "${CROMWELL_BUILD_POSTGRESQL_LATEST_TAG:+set}" ]]; then + cromwell::private::start_docker_postgresql \ + "${CROMWELL_BUILD_POSTGRESQL_LATEST_TAG}" "${CROMWELL_BUILD_POSTGRESQL_LATEST_PORT}" + fi +} + cromwell::private::pull_common_docker_images() { # All tests use ubuntu:latest - make sure it's there before starting the tests # because pulling the image during some of the tests would cause them to fail @@ -926,6 +1010,7 @@ cromwell::private::start_build_heartbeat() { printf "${CROMWELL_BUILD_HEARTBEAT_PATTERN}" done & CROMWELL_BUILD_HEARTBEAT_PID=$! + cromwell::private::add_exit_function cromwell::private::kill_build_heartbeat } cromwell::private::start_cromwell_log_tail() { @@ -933,6 +1018,7 @@ cromwell::private::start_cromwell_log_tail() { sleep 2 done && tail -n 0 -f "${CROMWELL_BUILD_CROMWELL_LOG}" 2> /dev/null & CROMWELL_BUILD_CROMWELL_LOG_TAIL_PID=$! + cromwell::private::add_exit_function cromwell::private::kill_cromwell_log_tail } cromwell::private::start_centaur_log_tail() { @@ -940,6 +1026,7 @@ cromwell::private::start_centaur_log_tail() { sleep 2 done && tail -n 0 -f "${CROMWELL_BUILD_CENTAUR_LOG}" 2> /dev/null & CROMWELL_BUILD_CENTAUR_LOG_TAIL_PID=$! + cromwell::private::add_exit_function cromwell::private::kill_centaur_log_tail } cromwell::private::cat_centaur_log() { @@ -1017,7 +1104,6 @@ cromwell::private::kill_tree() { kill "${pid}" 2> /dev/null } - cromwell::private::start_conformance_cromwell() { # Start the Cromwell server in the directory containing input files so it can access them via their relative path pushd "${CROMWELL_BUILD_CWL_TEST_RESOURCES}" > /dev/null @@ -1035,6 +1121,8 @@ cromwell::private::start_conformance_cromwell() { CROMWELL_BUILD_CONFORMANCE_CROMWELL_PID=$! popd > /dev/null + + cromwell::private::add_exit_function cromwell::private::kill_conformance_cromwell } cromwell::private::kill_conformance_cromwell() { @@ -1073,6 +1161,8 @@ cromwell::build::setup_common_environment() { cromwell::private::verify_secure_build cromwell::private::verify_pull_request_build cromwell::private::make_build_directories + cromwell::private::install_git_secrets + cromwell::private::install_minnie_kenny cromwell::private::setup_secure_resources case "${CROMWELL_BUILD_PROVIDER}" in @@ -1083,9 +1173,7 @@ cromwell::build::setup_common_environment() { cromwell::private::upgrade_pip cromwell::private::pull_common_docker_images cromwell::private::install_wait_for_it - cromwell::private::start_docker_mysql - cromwell::private::start_docker_mariadb - cromwell::private::start_docker_postgresql + cromwell::private::start_docker_databases ;; "${CROMWELL_BUILD_PROVIDER_JENKINS}") cromwell::private::delete_boto_config @@ -1111,9 +1199,6 @@ cromwell::build::setup_centaur_environment() { if [[ "${CROMWELL_BUILD_IS_CI}" == "true" ]]; then cromwell::private::add_exit_function cromwell::private::cat_centaur_log fi - cromwell::private::add_exit_function cromwell::private::kill_build_heartbeat - cromwell::private::add_exit_function cromwell::private::kill_cromwell_log_tail - cromwell::private::add_exit_function cromwell::private::kill_centaur_log_tail } cromwell::build::setup_conformance_environment() { @@ -1126,12 +1211,10 @@ cromwell::build::setup_conformance_environment() { cromwell::private::write_cwl_test_inputs cromwell::private::start_build_heartbeat cromwell::private::add_exit_function cromwell::private::cat_conformance_log - cromwell::private::add_exit_function cromwell::private::kill_build_heartbeat } cromwell::build::setup_docker_environment() { cromwell::private::start_build_heartbeat - cromwell::private::add_exit_function cromwell::private::kill_build_heartbeat if [[ "${CROMWELL_BUILD_PROVIDER}" == "${CROMWELL_BUILD_PROVIDER_TRAVIS}" ]]; then # Upgrade docker-compose so that we get the correct exit codes @@ -1180,7 +1263,6 @@ cromwell::build::run_centaur() { cromwell::build::run_conformance() { cromwell::private::start_conformance_cromwell - cromwell::private::add_exit_function cromwell::private::kill_conformance_cromwell # Give cromwell time to start up sleep 30 @@ -1257,10 +1339,31 @@ cromwell::build::pip_install() { cromwell::private::pip_install "$@" } +cromwell::build::start_build_heartbeat() { + cromwell::private::start_build_heartbeat +} + cromwell::build::add_exit_function() { cromwell::private::add_exit_function "$1" } +cromwell::build::delete_docker_images() { + local docker_delete_function + local docker_image_file + docker_delete_function="${1:?delete_images called without a docker_delete_function}" + docker_image_file="${2:?delete_images called without a docker_image_file}" + shift + shift + + if [[ -f "${docker_image_file}" ]]; then + local docker_image + while read -r docker_image; do + ${docker_delete_function} "${docker_image}" || true + done < "${docker_image_file}" + rm "${docker_image_file}" || true + fi +} + cromwell::build::kill_tree() { cromwell::private::kill_tree "$1" } diff --git a/src/ci/bin/testCentaurEngineUpgradePapiV2.sh b/src/ci/bin/testCentaurEngineUpgradePapiV2.sh index 9419acded17..6f83dd16413 100755 --- a/src/ci/bin/testCentaurEngineUpgradePapiV2.sh +++ b/src/ci/bin/testCentaurEngineUpgradePapiV2.sh @@ -5,6 +5,8 @@ export CROMWELL_BUILD_REQUIRES_SECURE=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh if [ "${CROMWELL_BUILD_PROVIDER}" = "${CROMWELL_BUILD_PROVIDER_TRAVIS}" ] && [ -n "${TRAVIS_PULL_REQUEST_BRANCH}" ]; then @@ -14,6 +16,8 @@ if [ "${CROMWELL_BUILD_PROVIDER}" = "${CROMWELL_BUILD_PROVIDER_TRAVIS}" ] && [ - cromwell::build::assemble_jars + cromwell::build::papi::setup_papi_environment + cromwell::build::run_centaur \ -s "centaur.EngineUpgradeTestCaseSpec" \ -e localdockertest \ diff --git a/src/ci/bin/testCentaurHoricromtalEngineUpgradePapiV2.sh b/src/ci/bin/testCentaurHoricromtalEngineUpgradePapiV2.sh index c13ea3f15ee..1162623c5ac 100755 --- a/src/ci/bin/testCentaurHoricromtalEngineUpgradePapiV2.sh +++ b/src/ci/bin/testCentaurHoricromtalEngineUpgradePapiV2.sh @@ -6,6 +6,8 @@ export CROMWELL_BUILD_REQUIRES_PULL_REQUEST=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh cromwell::build::setup_common_environment @@ -13,6 +15,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + prior_version=$(cromwell::private::calculate_prior_version_tag) export TEST_CROMWELL_PRIOR_VERSION_TAG="${prior_version}" WOULD_BE_PRIOR_VERSION_CONF="papi_v2_${prior_version}_application.conf" diff --git a/src/ci/bin/testCentaurHoricromtalPapiV2.sh b/src/ci/bin/testCentaurHoricromtalPapiV2.sh index 4509ba312cf..08df732c577 100755 --- a/src/ci/bin/testCentaurHoricromtalPapiV2.sh +++ b/src/ci/bin/testCentaurHoricromtalPapiV2.sh @@ -5,6 +5,8 @@ export CROMWELL_BUILD_REQUIRES_SECURE=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh # Setting these variables should cause the associated config values to be rendered into centaur_application_horicromtal.conf # There should probably be more indirections in CI scripts but that can wait. @@ -17,6 +19,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + GOOGLE_AUTH_MODE="service-account" GOOGLE_REFRESH_TOKEN_PATH="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/papi_refresh_token.txt" diff --git a/src/ci/bin/testCentaurPapiUpgradeNewWorkflowsPapiV1.sh b/src/ci/bin/testCentaurPapiUpgradeNewWorkflowsPapiV1.sh index ae1ce769ef9..2efdccd91d0 100755 --- a/src/ci/bin/testCentaurPapiUpgradeNewWorkflowsPapiV1.sh +++ b/src/ci/bin/testCentaurPapiUpgradeNewWorkflowsPapiV1.sh @@ -6,6 +6,8 @@ export CROMWELL_BUILD_REQUIRES_PULL_REQUEST=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh cromwell::build::setup_common_environment @@ -13,6 +15,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + cromwell::build::run_centaur \ -p 100 \ -e localdockertest \ diff --git a/src/ci/bin/testCentaurPapiUpgradePapiV1.sh b/src/ci/bin/testCentaurPapiUpgradePapiV1.sh index bfe2ec5101a..42a60d87477 100755 --- a/src/ci/bin/testCentaurPapiUpgradePapiV1.sh +++ b/src/ci/bin/testCentaurPapiUpgradePapiV1.sh @@ -6,6 +6,8 @@ export CROMWELL_BUILD_REQUIRES_PULL_REQUEST=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh cromwell::build::setup_common_environment @@ -13,6 +15,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + cromwell::build::run_centaur \ -s "centaur.PapiUpgradeTestCaseSpec" \ -e localdockertest \ diff --git a/src/ci/bin/testCentaurPapiV1.sh b/src/ci/bin/testCentaurPapiV1.sh index 63885cff153..eef47a6315b 100755 --- a/src/ci/bin/testCentaurPapiV1.sh +++ b/src/ci/bin/testCentaurPapiV1.sh @@ -6,6 +6,8 @@ export CROMWELL_BUILD_REQUIRES_SECURE=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh cromwell::build::setup_common_environment @@ -13,6 +15,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + GOOGLE_AUTH_MODE="service-account" GOOGLE_REFRESH_TOKEN_PATH="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/papi_refresh_token.txt" GOOGLE_SERVICE_ACCOUNT_JSON="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/cromwell-service-account.json" diff --git a/src/ci/bin/testCentaurPapiV2.sh b/src/ci/bin/testCentaurPapiV2.sh index a2195b56d8d..bacc7fcc2c2 100755 --- a/src/ci/bin/testCentaurPapiV2.sh +++ b/src/ci/bin/testCentaurPapiV2.sh @@ -5,6 +5,8 @@ export CROMWELL_BUILD_REQUIRES_SECURE=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_papi.inc.sh" || source test_papi.inc.sh cromwell::build::setup_common_environment @@ -12,6 +14,8 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars +cromwell::build::papi::setup_papi_environment + GOOGLE_AUTH_MODE="service-account" GOOGLE_REFRESH_TOKEN_PATH="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/papi_refresh_token.txt" diff --git a/src/ci/bin/testCentaurTes.sh b/src/ci/bin/testCentaurTes.sh index 76f9ef3ee82..4f7ffade4cb 100755 --- a/src/ci/bin/testCentaurTes.sh +++ b/src/ci/bin/testCentaurTes.sh @@ -12,16 +12,33 @@ cromwell::build::setup_centaur_environment cromwell::build::assemble_jars -FUNNEL_PATH="${CROMWELL_BUILD_ROOT_DIRECTORY}/funnel" -FUNNEL_CONF="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/funnel.conf" +startup_funnel() { + local funnel_path + local funnel_conf + local funnel_tar_gz -# Increase max open files to the maximum allowed. Attempt to help on macos due to the default soft ulimit -n -S 256. -ulimit -n "$(ulimit -n -H)" -if [[ ! -f "${FUNNEL_PATH}" ]]; then - FUNNEL_TAR_GZ="funnel-${CROMWELL_BUILD_OS}-amd64-0.5.0.tar.gz" - curl "https://github.com/ohsu-comp-bio/funnel/releases/download/0.5.0/${FUNNEL_TAR_GZ}" -o "${FUNNEL_TAR_GZ}" -L - tar xzf "${FUNNEL_TAR_GZ}" -fi + funnel_path="${CROMWELL_BUILD_ROOT_DIRECTORY}/funnel" + funnel_conf="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/funnel.conf" + + # Increase max open files to the maximum allowed. Attempt to help on macos due to the default soft ulimit -n -S 256. + ulimit -n "$(ulimit -n -H)" + if [[ ! -f "${funnel_path}" ]]; then + funnel_tar_gz="funnel-${CROMWELL_BUILD_OS}-amd64-0.5.0.tar.gz" + curl \ + --location \ + --output "${funnel_tar_gz}" \ + "https://github.com/ohsu-comp-bio/funnel/releases/download/0.5.0/${funnel_tar_gz}" + tar xzf "${funnel_tar_gz}" + fi + + mkdir -p logs + nohup "${funnel_path}" server run --config "${funnel_conf}" &> logs/funnel.log & + + FUNNEL_PID=$! + export FUNNEL_PID + + cromwell::build::add_exit_function shutdown_funnel +} shutdown_funnel() { if [[ -n "${FUNNEL_PID+set}" ]]; then @@ -29,12 +46,7 @@ shutdown_funnel() { fi } -cromwell::build::add_exit_function shutdown_funnel - -mkdir -p logs -nohup "${FUNNEL_PATH}" server run --config "${FUNNEL_CONF}" &> logs/funnel.log & - -FUNNEL_PID=$! +startup_funnel # The following tests are skipped: # diff --git a/src/ci/bin/testCheckPublish.sh b/src/ci/bin/testCheckPublish.sh index c640215bd63..4118509e18b 100755 --- a/src/ci/bin/testCheckPublish.sh +++ b/src/ci/bin/testCheckPublish.sh @@ -11,3 +11,5 @@ cromwell::build::pip_install mkdocs mkdocs build -s sbt checkRestApiDocs +package assembly dockerPushCheck +doc + +git secrets --scan-history diff --git a/src/ci/bin/testDbms.sh b/src/ci/bin/testDbms.sh new file mode 100755 index 00000000000..1332f89f992 --- /dev/null +++ b/src/ci/bin/testDbms.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail +export CROMWELL_BUILD_OPTIONAL_SECURE=true +# import in shellcheck / CI / IntelliJ compatible ways +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_unit.inc.sh" || source test_unit.inc.sh + +cromwell::build::setup_common_environment + +cromwell::build::unit::setup_scale_factor + +CROMWELL_SBT_TEST_INCLUDE_TAGS="DbmsTest" \ +sbt \ + -Dakka.test.timefactor=${CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR} \ + -Dbackend.providers.Local.config.filesystems.local.localization.0=copy \ + coverage test + +cromwell::build::generate_code_coverage + +cromwell::build::publish_artifacts diff --git a/src/ci/bin/testSbt.sh b/src/ci/bin/testSbt.sh index 36f77d5fa95..859a8fc1445 100755 --- a/src/ci/bin/testSbt.sh +++ b/src/ci/bin/testSbt.sh @@ -5,19 +5,19 @@ export CROMWELL_BUILD_OPTIONAL_SECURE=true # import in shellcheck / CI / IntelliJ compatible ways # shellcheck source=/dev/null source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test_unit.inc.sh" || source test_unit.inc.sh cromwell::build::setup_common_environment -CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=1 +cromwell::build::unit::setup_scale_factor case "${CROMWELL_BUILD_PROVIDER}" in "${CROMWELL_BUILD_PROVIDER_TRAVIS}") - CROMWELL_SBT_TEST_EXCLUDE_TAGS="AwsTest,CromwellIntegrationTest,GcsIntegrationTest" - CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=2 + CROMWELL_SBT_TEST_EXCLUDE_TAGS="AwsTest,CromwellIntegrationTest,DbmsTest,GcsIntegrationTest" ;; "${CROMWELL_BUILD_PROVIDER_JENKINS}") - CROMWELL_SBT_TEST_EXCLUDE_TAGS="AwsTest,CromwellIntegrationTest,DockerTest,GcsIntegrationTest" - CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=10 + CROMWELL_SBT_TEST_EXCLUDE_TAGS="AwsTest,CromwellIntegrationTest,DockerTest,DbmsTest,GcsIntegrationTest" ;; *) # Use the full list of excludes listed in Testing.scala @@ -25,9 +25,11 @@ case "${CROMWELL_BUILD_PROVIDER}" in ;; esac export CROMWELL_SBT_TEST_EXCLUDE_TAGS -export CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR -sbt -Dakka.test.timefactor=${CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR} -Dbackend.providers.Local.config.filesystems.local.localization.0=copy coverage test +sbt \ + -Dakka.test.timefactor=${CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR} \ + -Dbackend.providers.Local.config.filesystems.local.localization.0=copy \ + coverage test cromwell::build::generate_code_coverage diff --git a/src/ci/bin/testSingleWorkflowRunner.sh b/src/ci/bin/testSingleWorkflowRunner.sh new file mode 100755 index 00000000000..aeb507ecf14 --- /dev/null +++ b/src/ci/bin/testSingleWorkflowRunner.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail +# import in shellcheck / CI / IntelliJ compatible ways +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh + +cromwell::build::setup_common_environment + +cromwell::build::start_build_heartbeat + +cromwell::build::assemble_jars + +java -jar $CROMWELL_BUILD_CROMWELL_JAR run ./centaur/src/main/resources/standardTestCases/hello/hello.wdl --inputs ./centaur/src/main/resources/standardTestCases/hello/hello.inputs --metadata-output ./run_mode_metadata.json | tee console_output.txt + +# grep exits 1 if no matches +grep "terminal state: WorkflowSucceededState" console_output.txt +grep "\"wf_hello.hello.salutation\": \"Hello m'Lord!\"" console_output.txt + +cat > expected.json < actual.json + +cmp <(jq -cS . actual.json) <(jq -cS . expected.json) diff --git a/src/ci/bin/test_bcs.inc.sh b/src/ci/bin/test_bcs.inc.sh index 3c9ad36559b..a415d0a49d2 100644 --- a/src/ci/bin/test_bcs.inc.sh +++ b/src/ci/bin/test_bcs.inc.sh @@ -88,6 +88,7 @@ cromwell::private::bcs::bcs_config() { cromwell::private::bcs::bcs_create_cluster() { cromwell::build::exec_retry_function cromwell::private::bcs::try_bcs_create_cluster + cromwell::build::add_exit_function cromwell::private::bcs::bcs_delete_cluster } cromwell::private::bcs::bcs_delete_cluster() { @@ -125,6 +126,5 @@ cromwell::build::bcs::setup_bcs_environment() { cromwell::private::bcs::bcs_delete_old_resources # Create the BCS cluster before sbt assembly as cluster creation takes a few minutes - cromwell::build::add_exit_function cromwell::private::bcs::bcs_delete_cluster cromwell::private::bcs::bcs_create_cluster } diff --git a/src/ci/bin/test_papi.inc.sh b/src/ci/bin/test_papi.inc.sh new file mode 100644 index 00000000000..2ca50586e6e --- /dev/null +++ b/src/ci/bin/test_papi.inc.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail +# import in shellcheck / CI / IntelliJ compatible ways +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh + +# A set of common Papi functions for use in other scripts. +# +# Functions: +# +# - cromwell::build::papi::* +# Functions for use in other Papi scripts +# +# - cromwell::private::papi::papi::* +# Functions for use only within this file by cromwell::build::papi::* functions +# + +cromwell::build::papi::setup_papi_environment() { + CROMWELL_BUILD_PAPI_AUTH_JSON="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/cromwell-service-account.json" + CROMWELL_BUILD_PAPI_CLIENT_EMAIL="$(jq --exit-status --raw-output .client_email "${CROMWELL_BUILD_PAPI_AUTH_JSON}")" + CROMWELL_BUILD_PAPI_PROJECT_ID="$(jq --exit-status --raw-output .project_id "${CROMWELL_BUILD_PAPI_AUTH_JSON}")" + CROMWELL_BUILD_PAPI_GCR_IMAGES="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/cromwell_build_papi_gcloud_images_temporary.$$" + CROMWELL_BUILD_PAPI_CLOUDSDK_CONFIG="${CROMWELL_BUILD_RESOURCES_DIRECTORY}/cromwell_build_papi_gcloud_config.$$" + + export CROMWELL_BUILD_PAPI_AUTH_JSON + export CROMWELL_BUILD_PAPI_CLIENT_EMAIL + export CROMWELL_BUILD_PAPI_CLIENT_EMAIL_ORIGINAL + export CROMWELL_BUILD_PAPI_CLOUDSDK_CONFIG + export CROMWELL_BUILD_PAPI_GCR_IMAGES + export CROMWELL_BUILD_PAPI_PROJECT_ID + + if [[ "${CROMWELL_BUILD_PROVIDER}" == "${CROMWELL_BUILD_PROVIDER_TRAVIS}" ]]; then + cromwell::private::papi::install_gcloud + fi + + # All `gcloud` commands should use this configuration directory. + # https://stackoverflow.com/questions/34883810/how-to-authenticate-google-apis-with-different-service-account-credentials + # https://github.com/googleapis/google-auth-library-java/issues/58 + export CLOUDSDK_CONFIG="${CROMWELL_BUILD_PAPI_CLOUDSDK_CONFIG}" + + cromwell::build::add_exit_function cromwell::private::papi::teardown_papi_environment + + gcloud auth activate-service-account --key-file="${CROMWELL_BUILD_PAPI_AUTH_JSON}" + gcloud config set account "${CROMWELL_BUILD_PAPI_CLIENT_EMAIL}" + gcloud config set project "${CROMWELL_BUILD_PAPI_PROJECT_ID}" + + if command -v docker; then + # Upload images built from this commit + gcloud auth configure-docker --quiet + CROMWELL_BUILD_PAPI_DOCKER_IMAGE_DRS="gcr.io/${CROMWELL_BUILD_PAPI_PROJECT_ID}/cromwell-drs-localizer:${CROMWELL_BUILD_CENTAUR_DOCKER_TAG}" + cromwell::private::papi::gcr_image_push cromwell-drs-localizer "${CROMWELL_BUILD_PAPI_DOCKER_IMAGE_DRS}" + else + # Just use the default images + CROMWELL_BUILD_PAPI_DOCKER_IMAGE_DRS="broadinstitute/cromwell-drs-localizer:45-d46ff9f" + fi + + export CROMWELL_BUILD_PAPI_DOCKER_IMAGE_DRS +} + +cromwell::private::papi::teardown_papi_environment() { + cromwell::build::delete_docker_images cromwell::private::papi::gcr_image_delete "${CROMWELL_BUILD_PAPI_GCR_IMAGES}" +} + +cromwell::private::papi::install_gcloud() { + echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list + sudo apt-get install -y apt-transport-https ca-certificates + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ + | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - + sudo apt-get update + sudo apt-get install -y google-cloud-sdk +} + +cromwell::private::papi::gcr_image_push() { + local executable_name + local docker_image + + executable_name="${1:?gcr_image_push called without an executable_name}" + docker_image="${2:?gcr_image_push called without an docker_image}" + shift + shift + + echo "${docker_image}" >> "${CROMWELL_BUILD_PAPI_GCR_IMAGES}" + + sbt \ + "set \`${executable_name}\`/docker/imageNames := List(ImageName(\"${docker_image}\"))" \ + "${executable_name}/dockerBuildAndPush" +} + +cromwell::private::papi::gcr_image_delete() { + local docker_image_name + docker_image_name="${1:?gcr_image_delete called without a docker_image_name}" + shift + gcloud container images delete "${docker_image_name}" --force-delete-tags --quiet +} diff --git a/src/ci/bin/test_unit.inc.sh b/src/ci/bin/test_unit.inc.sh new file mode 100644 index 00000000000..7ff7437bce4 --- /dev/null +++ b/src/ci/bin/test_unit.inc.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail +# import in shellcheck / CI / IntelliJ compatible ways +# shellcheck source=/dev/null +source "${BASH_SOURCE%/*}/test.inc.sh" || source test.inc.sh + +# A set of common unit testing functions for use in other scripts. +# +# Functions: +# +# - cromwell::build::unit::* +# Functions for use in other unit testing scripts +# +# - cromwell::private::unit::* +# Functions for use only within this file by cromwell::build::unit::* functions +# + +cromwell::build::unit::setup_scale_factor() { + case "${CROMWELL_BUILD_PROVIDER}" in + "${CROMWELL_BUILD_PROVIDER_TRAVIS}") + CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=2 + ;; + "${CROMWELL_BUILD_PROVIDER_JENKINS}") + CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=10 + ;; + *) + CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR=1 + ;; + esac + export CROMWELL_SBT_TEST_SPAN_SCALE_FACTOR +} diff --git a/src/ci/docker-compose/cromwell-test/docker-setup.sh b/src/ci/docker-compose/cromwell-test/docker-setup.sh index fb13b6bd714..7ceae546c09 100755 --- a/src/ci/docker-compose/cromwell-test/docker-setup.sh +++ b/src/ci/docker-compose/cromwell-test/docker-setup.sh @@ -13,22 +13,13 @@ apt-get install -y \ curl \ gnupg \ gnupg2 \ + jq \ mysql-client \ postgresql-client \ python-dev \ software-properties-common \ sudo \ -# install docker -curl -fsSL https://get.docker.com -o get-docker.sh -sh get-docker.sh - -cat </etc/init/docker-chown.conf -start on startup -task -exec chown root:docker /var/run/docker.sock -CONF - # install sbt echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 @@ -40,8 +31,15 @@ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py python get-pip.py pip install --upgrade --force-reinstall pyopenssl +# install gcloud +echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list +curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ + | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - +apt-get update +apt-get install -y google-cloud-sdk + useradd hoggett -usermod -aG docker hoggett echo "hoggett ALL=NOPASSWD: ALL" >> /etc/sudoers mkdir -p /home/hoggett chown hoggett:hoggett /home/hoggett diff --git a/src/ci/docker-compose/docker-compose.yml b/src/ci/docker-compose/docker-compose.yml index 14c7b07afd5..843d46e1736 100644 --- a/src/ci/docker-compose/docker-compose.yml +++ b/src/ci/docker-compose/docker-compose.yml @@ -16,7 +16,6 @@ services: working_dir: ${PWD} volumes: - ${PWD}:${PWD} - - /var/run/docker.sock:/var/run/docker.sock links: - mysql-db - mariadb-db diff --git a/src/ci/resources/cromwell_database.inc.conf b/src/ci/resources/cromwell_database.inc.conf index 18fdd41dbe4..7c6183b19ef 100644 --- a/src/ci/resources/cromwell_database.inc.conf +++ b/src/ci/resources/cromwell_database.inc.conf @@ -7,8 +7,6 @@ database { url = "jdbc:mysql://localhost:3306/cromwell_test?allowPublicKeyRetrieval=true&useSSL=false&rewriteBatchedStatements=true&serverTimezone=UTC&useInformationSchema=true" url = ${?CROMWELL_BUILD_CENTAUR_JDBC_URL} user = "cromwell" - user = ${?CROMWELL_BUILD_CENTAUR_JDBC_USERNAME} password = "test" - password = ${?CROMWELL_BUILD_CENTAUR_JDBC_PASSWORD} } } diff --git a/src/ci/resources/papi_application.inc.conf.ctmpl b/src/ci/resources/papi_application.inc.conf.ctmpl index 0f1a650d6ff..76f8dce816b 100644 --- a/src/ci/resources/papi_application.inc.conf.ctmpl +++ b/src/ci/resources/papi_application.inc.conf.ctmpl @@ -79,6 +79,7 @@ filesystems.drs.global.config.martha.url = "https://us-central1-broad-dsde-dev.c drs { localization { - docker-image = "broadinstitute/cromwell-drs-localizer:centaur-test-45-7216b84-SNAP" + docker-image = "broadinstitute/cromwell-drs-localizer:45-d46ff9f" + docker-image = ${?CROMWELL_BUILD_PAPI_DOCKER_IMAGE_DRS} } } diff --git a/src/ci/resources/papi_v2_43_application.conf b/src/ci/resources/papi_v2_43_application.conf index ec90d5be8c2..11aa08eb13e 100644 --- a/src/ci/resources/papi_v2_43_application.conf +++ b/src/ci/resources/papi_v2_43_application.conf @@ -5,7 +5,5 @@ database { db { driver = ${?CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_DRIVER} url = ${?CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_URL} - user = ${?CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_USERNAME} - password = ${?CROMWELL_BUILD_CENTAUR_PRIOR_JDBC_PASSWORD} } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala old mode 100644 new mode 100755 index 574842e8620..f361af4bfa2 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -49,6 +49,7 @@ import cromwell.backend.io.DirectoryFunctions import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncExecutionActorParams, StandardAsyncJob} import cromwell.core._ import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.core.io.DefaultIoCommandBuilder import cromwell.core.retry.SimpleExponentialBackoff import cromwell.filesystems.s3.S3Path import cromwell.filesystems.s3.batch.S3BatchCommandBuilder @@ -79,7 +80,10 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar extends BackendJobLifecycleActor with StandardAsyncExecutionActor with AwsBatchJobCachingActorHelper with KvClient with AskSupport { - override lazy val ioCommandBuilder = S3BatchCommandBuilder + override lazy val ioCommandBuilder = configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => S3BatchCommandBuilder + case _ => DefaultIoCommandBuilder + } val backendSingletonActor: ActorRef = standardParams.backendSingletonActorOption.getOrElse( @@ -104,8 +108,11 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) - private lazy val jobScriptMountPath = - AwsBatchWorkingDisk.MountPoint.resolve(jobPaths.script.pathWithoutScheme.stripPrefix("/")).pathAsString + private lazy val jobScriptMountPath = configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => AwsBatchWorkingDisk.MountPoint.resolve(jobPaths.script.pathWithoutScheme.stripPrefix("/")).pathAsString + case _ => jobPaths.script.pathWithoutScheme + } + private lazy val execScript = s"""|#!$jobShell @@ -195,7 +202,12 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar private def relativeLocalizationPath(file: WomFile): WomFile = { file.mapFile(value => getPath(value) match { - case Success(path) => path.pathWithoutScheme + case Success(path) => { + configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => path.pathWithoutScheme + case _ => path.toString + } + } case _ => value } ) @@ -247,8 +259,15 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar * @throws Exception if the `path` does not live in one of the supplied `disks` */ private def relativePathAndVolume(path: String, disks: Seq[AwsBatchVolume]): (Path, AwsBatchVolume) = { + + def getAbsolutePath(path: Path) = { + configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => AwsBatchWorkingDisk.MountPoint.resolve(path) + case _ => DefaultPathBuilder.get(configuration.root).resolve(path) + } + } val absolutePath = DefaultPathBuilder.get(path) match { - case p if !p.isAbsolute => AwsBatchWorkingDisk.MountPoint.resolve(p) + case p if !p.isAbsolute => getAbsolutePath(p) case p => p } @@ -339,14 +358,21 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar ) } - override lazy val commandDirectory: Path = AwsBatchWorkingDisk.MountPoint - - override def globParentDirectory(womGlobFile: WomGlobFile): Path = { - val (_, disk) = relativePathAndVolume(womGlobFile.value, runtimeAttributes.disks) - disk.mountPoint + override lazy val commandDirectory: Path = configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => AwsBatchWorkingDisk.MountPoint + case _ => jobPaths.callExecutionRoot } - override def isTerminal(runStatus: RunStatus): Boolean = { + override def globParentDirectory(womGlobFile: WomGlobFile): Path = + configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => { + val (_, disk) = relativePathAndVolume(womGlobFile.value, runtimeAttributes.disks) + disk.mountPoint + } + case _ => commandDirectory + } + + override def isTerminal(runStatus: RunStatus): Boolean = { runStatus match { case _: TerminalRunStatus => true case _ => false diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala old mode 100644 new mode 100755 index a766f94bca7..945af262c57 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala @@ -53,14 +53,15 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -case class AwsBatchAttributes(auth: AwsAuthMode, +case class AwsBatchAttributes(fileSystem: String, + auth: AwsAuthMode, executionBucket: String, duplicationStrategy: AwsBatchCacheHitDuplicationStrategy, submitAttempts: Int Refined Positive, createDefinitionAttempts: Int Refined Positive) object AwsBatchAttributes { - lazy val Logger = LoggerFactory.getLogger("AwsBatchAttributes") + lazy val Logger = LoggerFactory.getLogger(this.getClass) private val availableConfigKeys = Set( "concurrent-job-limit", @@ -69,8 +70,10 @@ object AwsBatchAttributes { "dockerhub.account", "dockerhub.token", "filesystems", + "filesystems.local.auth", "filesystems.s3.auth", "filesystems.s3.caching.duplication-strategy", + "filesystems.local.caching.duplication-strategy", "default-runtime-attributes", "default-runtime-attributes.disks", "default-runtime-attributes.memory", @@ -102,16 +105,30 @@ object AwsBatchAttributes { warnDeprecated(configKeys, deprecatedAwsBatchKeys, context, Logger) val executionBucket: ErrorOr[String] = validate { backendConfig.as[String]("root") } - val filesystemAuthMode: ErrorOr[AwsAuthMode] = + + val fileSysStr:ErrorOr[String] = validate {backendConfig.hasPath("filesystems.s3") match { + case true => "s3" + case false => "local" + }} + + val fileSysPath = backendConfig.hasPath("filesystems.s3") match { + case true => "filesystems.s3" + case false => "filesystems.local" + } + val filesystemAuthMode: ErrorOr[AwsAuthMode] = { (for { - authName <- validate { backendConfig.as[String]("filesystems.s3.auth") }.toEither + authName <- validate { + backendConfig.as[String](s"${fileSysPath}.auth") + }.toEither validAuth <- awsConfig.auth(authName).toEither } yield validAuth).toValidated + } + val duplicationStrategy: ErrorOr[AwsBatchCacheHitDuplicationStrategy] = validate { backendConfig. - as[Option[String]]("filesystems.s3.caching.duplication-strategy"). + as[Option[String]](s"${fileSysPath}.caching.duplication-strategy"). getOrElse("copy") match { case "copy" => CopyCachedOutputs case "reference" => UseOriginalCachedOutputs @@ -120,6 +137,7 @@ object AwsBatchAttributes { } ( + fileSysStr, filesystemAuthMode, executionBucket, duplicationStrategy, diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendInitializationData.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendInitializationData.scala old mode 100644 new mode 100755 index 2ce458874c6..5812a9de23d --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendInitializationData.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendInitializationData.scala @@ -43,4 +43,11 @@ case class AwsBatchBackendInitializationData // TODO: We'll need something specific for batch probably, but I need to // understand more about the genomics node first //genomics: Genomics -) extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, classOf[AwsBatchExpressionFunctions]) +) extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, AwsBatchBackendInitializationDataUtility.getExpressionFunctionsClass(configuration.fileSystem)) + +object AwsBatchBackendInitializationDataUtility { + def getExpressionFunctionsClass(fs: String) = fs match { + case AWSBatchStorageSystems.s3 => classOf[AwsBatchExpressionFunctions] + case _ => classOf[AwsBatchExpressionFunctionsForFS] + } +} diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendLifecycleActorFactory.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendLifecycleActorFactory.scala old mode 100644 new mode 100755 index eb23aa5b471..58036dcf6ae --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendLifecycleActorFactory.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchBackendLifecycleActorFactory.scala @@ -36,13 +36,11 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationDa import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardFinalizationActor, StandardFinalizationActorParams, StandardInitializationActor, StandardInitializationActorParams, StandardLifecycleActorFactory} import cromwell.core.CallOutputs import wom.graph.CommandCallNode -import org.slf4j.LoggerFactory case class AwsBatchBackendLifecycleActorFactory( name: String, configurationDescriptor: BackendConfigurationDescriptor) extends StandardLifecycleActorFactory { - lazy val Log = LoggerFactory.getLogger(AwsBatchBackendLifecycleActorFactory.getClass) override lazy val initializationActorClass: Class[_ <: StandardInitializationActor] = classOf[AwsBatchInitializationActor] @@ -63,7 +61,6 @@ case class AwsBatchBackendLifecycleActorFactory( calls: Set[CommandCallNode], serviceRegistryActor: ActorRef, restart: Boolean): StandardInitializationActorParams = { - Log.debug("Initializing AwsBatchBackendLifecycleActorFactory") AwsBatchInitializationActorParams(workflowDescriptor, ioActor, calls, configuration, serviceRegistryActor, restart) } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala old mode 100644 new mode 100755 index 5c174fee869..2bc76d4bb0b --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala @@ -34,6 +34,7 @@ package cromwell.backend.impl.aws import cromwell.filesystems.s3.S3PathBuilderFactory import cromwell.backend.BackendConfigurationDescriptor import cromwell.core.{BackendDockerConfiguration} +import cromwell.core.path.PathBuilderFactory import cromwell.cloudsupport.aws.AwsConfiguration class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { @@ -45,5 +46,22 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes val batchAttributes = AwsBatchAttributes.fromConfigs(awsConfig, configurationDescriptor.backendConfig) val awsAuth = batchAttributes.auth val dockerCredentials = BackendDockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials - val pathBuilderFactory = S3PathBuilderFactory(configurationDescriptor.globalConfig, configurationDescriptor.backendConfig) + val fileSystem = + configurationDescriptor.backendConfig.hasPath("filesystems.s3") match { + case true => "s3" + case false => "local" + } + val pathBuilderFactory = configurationDescriptor.backendConfig.hasPath("filesystems.s3") match { + case true => S3PathBuilderFactory(configurationDescriptor.globalConfig, configurationDescriptor.backendConfig) + case false => + PathBuilderFactory + } } + +object AWSBatchStorageSystems { + val s3:String = "s3" + val efs:String = "efs" + val ebs:String = "ebs" + val local:String = "local" +} + diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchExpressionFunctions.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchExpressionFunctions.scala old mode 100644 new mode 100755 index 42a7a011d84..02e2d2253d8 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchExpressionFunctions.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchExpressionFunctions.scala @@ -49,3 +49,8 @@ class AwsBatchExpressionFunctions(standardParams: StandardExpressionFunctionsPar } } } + +class AwsBatchExpressionFunctionsForFS(standardParams: StandardExpressionFunctionsParams) + extends StandardExpressionFunctions(standardParams) { + +} diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchFinalizationActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchFinalizationActor.scala old mode 100644 new mode 100755 index 9bf81524d29..6fdccf995eb --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchFinalizationActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchFinalizationActor.scala @@ -36,6 +36,7 @@ import cromwell.backend._ import cromwell.backend.standard.{StandardFinalizationActor, StandardFinalizationActorParams} import cromwell.core.CallOutputs import cromwell.core.io.AsyncIoActorClient +import cromwell.core.io.DefaultIoCommandBuilder import wom.graph.CommandCallNode import cromwell.filesystems.s3.batch.S3BatchCommandBuilder @@ -57,7 +58,10 @@ class AwsBatchFinalizationActor(val params: AwsBatchFinalizationActorParams) lazy val configuration: AwsBatchConfiguration = params.configuration - override lazy val ioCommandBuilder = S3BatchCommandBuilder + override lazy val ioCommandBuilder = configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => S3BatchCommandBuilder + case _ => DefaultIoCommandBuilder + } override def ioActor: ActorRef = params.ioActor } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala old mode 100644 new mode 100755 index 8a9ae750148..fb2e3aa64d9 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala @@ -41,10 +41,10 @@ import cromwell.backend.standard.{StandardInitializationActor, StandardValidatedRuntimeAttributesBuilder} import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.core.io.DefaultIoCommandBuilder import cromwell.core.io.AsyncIoActorClient import cromwell.core.path.Path import wom.graph.CommandCallNode - import scala.concurrent.Future case class AwsBatchInitializationActorParams @@ -86,5 +86,14 @@ class AwsBatchInitializationActor(params: AwsBatchInitializationActorParams) creds <- credentials } yield AwsBatchBackendInitializationData(workflowPaths, runtimeAttributesBuilder, configuration, creds) - override lazy val ioCommandBuilder = S3BatchCommandBuilder + override lazy val ioCommandBuilder = { + val conf = Option(configuration) match { + case Some(cf) => cf + case None => new AwsBatchConfiguration(params.configurationDescriptor) + } + conf.fileSystem match { + case AWSBatchStorageSystems.s3 => S3BatchCommandBuilder + case _ => DefaultIoCommandBuilder + } + } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala old mode 100644 new mode 100755 index b62cbf567f3..33c0fec6207 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -180,9 +180,13 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL implicit async: Async[F], timer: Timer[F]): Aws[F, String] = ReaderT { awsBatchAttributes => val jobDefinitionBuilder = StandardAwsBatchJobDefinitionBuilder + val commandStr = awsBatchAttributes.fileSystem match { + case AWSBatchStorageSystems.s3 => reconfiguredScript + case _ => script + } val jobDefinitionContext = AwsBatchJobDefinitionContext(runtimeAttributes, taskId, - reconfiguredScript, + commandStr, dockerRc, dockerStdout, dockerStderr, diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala old mode 100644 new mode 100755 index 5589c73b035..2ae096841b5 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala @@ -32,7 +32,8 @@ package cromwell.backend.impl.aws import akka.actor.Actor -import cromwell.backend.impl.aws.io.{AwsBatchVolume, AwsBatchWorkingDisk} +import cromwell.backend.impl.aws.io.AwsBatchVolume +import cromwell.backend.impl.aws.io.AwsBatchWorkingDisk import cromwell.backend.standard.StandardCachingActorHelper import cromwell.core.logging.JobLogging import cromwell.core.path.Path @@ -51,11 +52,14 @@ trait AwsBatchJobCachingActorHelper extends StandardCachingActorHelper { lazy val runtimeAttributes = AwsBatchRuntimeAttributes(validatedRuntimeAttributes, configuration.runtimeConfig) - lazy val workingDisk: AwsBatchVolume = runtimeAttributes.disks.find(_.name == AwsBatchWorkingDisk.Name).get + lazy val workingDisk: AwsBatchVolume = runtimeAttributes.disks.find(x => configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => x.name == AwsBatchWorkingDisk.Name + case _ => configuration.root.startsWith(x.mountPoint.pathAsString) + }).get + lazy val callRootPath: Path = callPaths.callExecutionRoot lazy val returnCodeFilename: String = callPaths.returnCodeFilename - // lazy val returnCodePath: Path = callPaths.returnCode lazy val attributes: AwsBatchAttributes = configuration.batchAttributes } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala old mode 100644 new mode 100755 index 3c5d62ecae7..e150671b9b9 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -37,9 +37,9 @@ import cromwell.backend.BackendJobDescriptor import cromwell.backend.io.JobPaths import software.amazon.awssdk.services.batch.model.{ContainerProperties, KeyValuePair} import wdl4s.parser.MemoryUnit +import cromwell.backend.impl.aws.io.AwsBatchVolume import scala.collection.JavaConverters._ - import java.io.ByteArrayOutputStream import java.util.zip.GZIPOutputStream import com.google.common.io.BaseEncoding @@ -59,6 +59,7 @@ sealed trait AwsBatchJobDefinition { } trait AwsBatchJobDefinitionBuilder { + /** Gets a builder, seeded with appropriate portions of the container properties * * @param commandLine command line to execute within the container. Will be run in a shell context @@ -72,36 +73,44 @@ trait AwsBatchJobDefinitionBuilder { def buildKVPair(key: String, value: String): KeyValuePair = KeyValuePair.builder.name(key).value(value).build + def buildResources(builder: ContainerProperties.Builder, context: AwsBatchJobDefinitionContext): ContainerProperties.Builder = { // The initial buffer should only contain one item - the hostpath of the // local disk mount point, which will be needed by the docker container // that copies data around - val environment = - context.runtimeAttributes.disks.collect{ - case d if d.name == "local-disk" => - buildKVPair("AWS_CROMWELL_LOCAL_DISK", d.mountPoint.toString) - }.toBuffer val outputinfo = context.outputs.map(o => "%s,%s,%s,%s".format(o.name, o.s3key, o.local, o.mount)) - .mkString(";") + .mkString(";") val inputinfo = context.inputs.collect{case i: AwsBatchFileInput => i} - .map(i => "%s,%s,%s,%s".format(i.name, i.s3key, i.local, i.mount)) - .mkString(";") - - environment.append(buildKVPair("AWS_CROMWELL_PATH",context.uniquePath)) - environment.append(buildKVPair("AWS_CROMWELL_RC_FILE",context.dockerRcPath)) - environment.append(buildKVPair("AWS_CROMWELL_STDOUT_FILE",context.dockerStdoutPath)) - environment.append(buildKVPair("AWS_CROMWELL_STDERR_FILE",context.dockerStderrPath)) - environment.append(buildKVPair("AWS_CROMWELL_CALL_ROOT",context.jobPaths.callExecutionRoot.toString)) - environment.append(buildKVPair("AWS_CROMWELL_WORKFLOW_ROOT",context.jobPaths.workflowPaths.workflowRoot.toString)) - environment.append(gzipKeyValuePair("AWS_CROMWELL_INPUTS", inputinfo)) - environment.append(buildKVPair("AWS_CROMWELL_OUTPUTS",outputinfo)) + .map(i => "%s,%s,%s,%s".format(i.name, i.s3key, i.local, i.mount)) + .mkString(";") + + val environment = + context.runtimeAttributes.disks.collect{ + case d if d.name == "local-disk" => // this has s3 fiel system, needs all the env for the ecs-proxy + List(buildKVPair("AWS_CROMWELL_LOCAL_DISK", d.mountPoint.toString), + buildKVPair("AWS_CROMWELL_PATH",context.uniquePath), + buildKVPair("AWS_CROMWELL_RC_FILE",context.dockerRcPath), + buildKVPair("AWS_CROMWELL_STDOUT_FILE",context.dockerStdoutPath), + buildKVPair("AWS_CROMWELL_STDERR_FILE",context.dockerStderrPath), + buildKVPair("AWS_CROMWELL_CALL_ROOT",context.jobPaths.callExecutionRoot.toString), + buildKVPair("AWS_CROMWELL_WORKFLOW_ROOT",context.jobPaths.workflowPaths.workflowRoot.toString), + gzipKeyValuePair("AWS_CROMWELL_INPUTS", inputinfo), + buildKVPair("AWS_CROMWELL_OUTPUTS",outputinfo)) + }.flatten + + def getVolPath(d:AwsBatchVolume) : Option[String] = { + d.fsType match { + case "efs" => None + case _ => Option(context.uniquePath) + } + } builder .command(packCommand("/bin/bash", "-c", context.commandText).asJava) .memory(context.runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt) .vcpus(context.runtimeAttributes.cpu##) - .volumes(context.runtimeAttributes.disks.map(_.toVolume(context.uniquePath)).asJava) + .volumes(context.runtimeAttributes.disks.map(d => d.toVolume(getVolPath(d))).asJava) .mountPoints(context.runtimeAttributes.disks.map(_.toMountPoint).asJava) .environment(environment.asJava) } @@ -114,6 +123,7 @@ trait AwsBatchJobDefinitionBuilder { BaseEncoding.base64().encode(byteArrayOutputStream.toByteArray()) } + private def packCommand(shell: String, options: String, mainCommand: String): Seq[String] = { val rc = new ListBuffer[String]() val lim = 1024 diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala old mode 100644 new mode 100755 index ad8c0872dd5..e96336e332c --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala @@ -54,7 +54,8 @@ case class AwsBatchRuntimeAttributes(cpu: Int Refined Positive, queueArn: String, failOnStderr: Boolean, continueOnReturnCode: ContinueOnReturnCode, - noAddress: Boolean) + noAddress: Boolean, + fileSystem:String= "s3") object AwsBatchRuntimeAttributes { @@ -95,7 +96,6 @@ object AwsBatchRuntimeAttributes { MemoryValidation.configDefaultString(RuntimeAttributesKeys.MemoryKey, runtimeConfig) getOrElse MemoryDefaultValue) } - private def memoryMinValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[MemorySize] = { MemoryValidation.withDefaultMemory( RuntimeAttributesKeys.MemoryMinKey, @@ -212,7 +212,7 @@ object DisksValidation extends RuntimeAttributesValidation[Seq[AwsBatchVolume]] private def addDefault(disksNel: ErrorOr[Seq[AwsBatchVolume]]): ErrorOr[Seq[AwsBatchVolume]] = { disksNel map { - case disks if disks.exists(_.name == AwsBatchWorkingDisk.Name) => disks + case disks if disks.exists(_.name == AwsBatchWorkingDisk.Name) || disks.exists(_.fsType == "efs") => disks case disks => disks :+ AwsBatchWorkingDisk.Default } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchWorkflowPaths.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchWorkflowPaths.scala old mode 100644 new mode 100755 index f63d309c7d3..7715ecec35b --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchWorkflowPaths.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchWorkflowPaths.scala @@ -36,47 +36,23 @@ import akka.actor.ActorSystem import com.typesafe.config.Config import cromwell.backend.io.WorkflowPaths import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.cloudsupport.aws.s3.S3Storage import cromwell.core.WorkflowOptions -import cromwell.core.path.{Path, PathBuilder} -import cromwell.filesystems.s3.S3PathBuilder - -import scala.language.postfixOps +import cromwell.core.path.PathBuilder +import cromwell.filesystems.s3.S3PathBuilderFactory object AwsBatchWorkflowPaths { private val RootOptionKey = "aws_s3_root" - private val AuthFilePathOptionKey = "auth_bucket" } case class AwsBatchWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, credentials: AwsCredentials, configuration: AwsBatchConfiguration)(implicit actorSystem: ActorSystem) extends WorkflowPaths { - override lazy val executionRootString: String = - workflowDescriptor.workflowOptions.getOrElse(AwsBatchWorkflowPaths.RootOptionKey, configuration.root) - - private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions - - private val pathBuilder: S3PathBuilder = configuration.pathBuilderFactory.fromCredentials(workflowOptions, credentials) - - val authFilePath: Path = { - // The default auth file bucket is always at the root of the root workflow - val defaultBucket = executionRoot.resolve(workflowDescriptor.rootWorkflow.name).resolve(workflowDescriptor.rootWorkflowId.toString) - val bucket = workflowDescriptor.workflowOptions.get(AwsBatchWorkflowPaths.AuthFilePathOptionKey) getOrElse defaultBucket.pathAsString - - val pathBuilderWithAuth = S3PathBuilder.fromCredentials( - credentials, - S3Storage.DefaultConfiguration, - workflowOptions, - configuration.awsConfig.region - ) - - val authBucket = pathBuilderWithAuth.build(bucket) recover { - case ex => throw new Exception(s"Invalid s3 auth_bucket path $bucket", ex) - } get - - authBucket.resolve(s"${workflowDescriptor.rootWorkflowId}_auth.json") + override lazy val executionRootString: String = configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => workflowDescriptor.workflowOptions.getOrElse(AwsBatchWorkflowPaths.RootOptionKey, configuration.root) + case _ => configuration.root } + private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions override def toJobPaths(workflowPaths: WorkflowPaths, jobKey: BackendJobDescriptorKey): AwsBatchJobPaths = { new AwsBatchJobPaths(workflowPaths.asInstanceOf[AwsBatchWorkflowPaths], jobKey) @@ -85,5 +61,10 @@ case class AwsBatchWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, override protected def withDescriptor(workflowDescriptor: BackendWorkflowDescriptor): WorkflowPaths = this.copy(workflowDescriptor = workflowDescriptor) override def config: Config = configuration.configurationDescriptor.backendConfig - override def pathBuilders: List[PathBuilder] = List(pathBuilder) + override def pathBuilders: List[PathBuilder] = { + if (configuration.fileSystem == "s3") { + List(configuration.pathBuilderFactory.asInstanceOf[S3PathBuilderFactory].fromCredentials(workflowOptions, credentials)) + } else { + WorkflowPaths.DefaultPathBuilders} + } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendCacheHitCopyingActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendCacheHitCopyingActor.scala old mode 100644 new mode 100755 index 6efc62c3bdc..02fc8bed530 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendCacheHitCopyingActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendCacheHitCopyingActor.scala @@ -33,8 +33,10 @@ package cromwell.backend.impl.aws.callcaching import com.google.cloud.storage.contrib.nio.CloudStorageOptions import cromwell.backend.BackendInitializationData import cromwell.backend.impl.aws.AwsBatchBackendInitializationData +import cromwell.backend.impl.aws.AWSBatchStorageSystems import cromwell.backend.io.JobPaths import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} +import cromwell.core.io.DefaultIoCommandBuilder import cromwell.core.CallOutputs import cromwell.core.io.{IoCommand, IoTouchCommand} import cromwell.core.path.Path @@ -47,53 +49,59 @@ import scala.language.postfixOps import scala.util.Try class AwsBatchBackendCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) extends StandardCacheHitCopyingActor(standardParams) { - override protected val commandBuilder = S3BatchCommandBuilder - private val cachingStrategy = BackendInitializationData + private val batchAttributes = BackendInitializationData .as[AwsBatchBackendInitializationData](standardParams.backendInitializationDataOption) - .configuration.batchAttributes.duplicationStrategy + .configuration.batchAttributes - override def processSimpletons(womValueSimpletons: Seq[WomValueSimpleton], sourceCallRootPath: Path) = cachingStrategy match { - case CopyCachedOutputs => super.processSimpletons(womValueSimpletons, sourceCallRootPath) - case UseOriginalCachedOutputs => - val touchCommands: Seq[Try[IoTouchCommand]] = womValueSimpletons collect { - case WomValueSimpleton(_, wdlFile: WomFile) => getPath(wdlFile.value) map S3BatchCommandBuilder.touchCommand - } + override protected val commandBuilder = batchAttributes.fileSystem match { + case AWSBatchStorageSystems.s3 => S3BatchCommandBuilder + case _ => DefaultIoCommandBuilder + } + private val cachingStrategy = batchAttributes.duplicationStrategy - TryUtil.sequence(touchCommands) map { - WomValueBuilder.toJobOutputs(jobDescriptor.taskCall.outputPorts, womValueSimpletons) -> _.toSet - } + override def processSimpletons(womValueSimpletons: Seq[WomValueSimpleton], sourceCallRootPath: Path) = (batchAttributes.fileSystem, cachingStrategy) match { + case (AWSBatchStorageSystems.s3 , UseOriginalCachedOutputs) => { + val touchCommands: Seq[Try[IoTouchCommand]] = womValueSimpletons collect { + case WomValueSimpleton(_, wdlFile: WomFile) => getPath(wdlFile.value) map S3BatchCommandBuilder.touchCommand + } + + TryUtil.sequence(touchCommands) map { + WomValueBuilder.toJobOutputs(jobDescriptor.taskCall.outputPorts, womValueSimpletons) -> _.toSet + } + } + case (_, _) => super.processSimpletons(womValueSimpletons, sourceCallRootPath) } - override def processDetritus(sourceJobDetritusFiles: Map[String, String]) = cachingStrategy match { - case CopyCachedOutputs => super.processDetritus(sourceJobDetritusFiles) - case UseOriginalCachedOutputs => - // apply getPath on each detritus string file - val detritusAsPaths = detritusFileKeys(sourceJobDetritusFiles).toSeq map { key => - key -> getPath(sourceJobDetritusFiles(key)) - } toMap + override def processDetritus(sourceJobDetritusFiles: Map[String, String]) = (batchAttributes.fileSystem, cachingStrategy) match { + case (AWSBatchStorageSystems.s3, UseOriginalCachedOutputs) => { + // apply getPath on each detritus string file + val detritusAsPaths = detritusFileKeys(sourceJobDetritusFiles).toSeq map { key => + key -> getPath(sourceJobDetritusFiles(key)) + } toMap - // Don't forget to re-add the CallRootPathKey that has been filtered out by detritusFileKeys - TryUtil.sequenceMap(detritusAsPaths, "Failed to make paths out of job detritus") map { newDetritus => - (newDetritus + (JobPaths.CallRootPathKey -> destinationCallRootPath)) -> newDetritus.values.map(S3BatchCommandBuilder.touchCommand).toSet + // Don't forget to re-add the CallRootPathKey that has been filtered out by detritusFileKeys + TryUtil.sequenceMap(detritusAsPaths, "Failed to make paths out of job detritus") map { newDetritus => + (newDetritus + (JobPaths.CallRootPathKey -> destinationCallRootPath)) -> newDetritus.values.map(S3BatchCommandBuilder.touchCommand).toSet + } } - } + case (_, _) => super.processDetritus(sourceJobDetritusFiles) + } override protected def additionalIoCommands(sourceCallRootPath: Path, originalSimpletons: Seq[WomValueSimpleton], newOutputs: CallOutputs, originalDetritus: Map[String, String], - newDetritus: Map[String, Path]): List[Set[IoCommand[_]]] = { - cachingStrategy match { - case UseOriginalCachedOutputs => - val content = - s""" - |This directory does not contain any output files because this job matched an identical job that was previously run, thus it was a cache-hit. - |Cromwell is configured to not copy outputs during call caching. To change this, edit the filesystems.aws.caching.duplication-strategy field in your backend configuration. - |The original outputs can be found at this location: ${sourceCallRootPath.pathAsString} + newDetritus: Map[String, Path]): List[Set[IoCommand[_]]] = (batchAttributes.fileSystem, cachingStrategy) match { + case (AWSBatchStorageSystems.s3, UseOriginalCachedOutputs) => + val content = + s""" + |This directory does not contain any output files because this job matched an identical job that was previously run, thus it was a cache-hit. + |Cromwell is configured to not copy outputs during call caching. To change this, edit the filesystems.aws.caching.duplication-strategy field in your backend configuration. + |The original outputs can be found at this location: ${sourceCallRootPath.pathAsString} """.stripMargin - List(Set(S3BatchCommandBuilder.writeCommand(jobPaths.callExecutionRoot / "call_caching_placeholder.txt", content, Seq(CloudStorageOptions.withMimeType("text/plain"))))) - case CopyCachedOutputs => List.empty + List(Set(S3BatchCommandBuilder.writeCommand(jobPaths.callExecutionRoot / "call_caching_placeholder.txt", content, Seq(CloudStorageOptions.withMimeType("text/plain"))))) + case (AWSBatchStorageSystems.s3, CopyCachedOutputs) => List.empty + case (_, _) => super.additionalIoCommands(sourceCallRootPath,originalSimpletons, newOutputs, originalDetritus,newDetritus) } - } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendFileHashingActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendFileHashingActor.scala old mode 100644 new mode 100755 index b5f41fbabf9..f731fb1b44a --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendFileHashingActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/callcaching/AwsBatchBackendFileHashingActor.scala @@ -32,7 +32,16 @@ package cromwell.backend.impl.aws.callcaching import cromwell.backend.standard.callcaching.{StandardFileHashingActor, StandardFileHashingActorParams} import cromwell.filesystems.s3.batch.S3BatchCommandBuilder +import cromwell.backend.BackendInitializationData +import cromwell.backend.impl.aws.AwsBatchBackendInitializationData +import cromwell.backend.impl.aws.AWSBatchStorageSystems +import cromwell.core.io.DefaultIoCommandBuilder class AwsBatchBackendFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) { - override val ioCommandBuilder = S3BatchCommandBuilder + + override val ioCommandBuilder = BackendInitializationData.as[AwsBatchBackendInitializationData](standardParams.backendInitializationDataOption) + .configuration.batchAttributes.fileSystem match { + case AWSBatchStorageSystems.s3 => S3BatchCommandBuilder + case _ => DefaultIoCommandBuilder + } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/io/AwsBatchVolume.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/io/AwsBatchVolume.scala old mode 100644 new mode 100755 index d66d3bc3dc5..769c2c2f5d8 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/io/AwsBatchVolume.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/io/AwsBatchVolume.scala @@ -65,8 +65,8 @@ object AwsBatchVolume { // In addition to the AWS-specific patterns above, we can also fall back to PAPI-style patterns and ignore the size case DiskPatterns.WorkingDiskPattern(_, _) => Valid(AwsBatchWorkingDisk()) - case DiskPatterns.MountedDiskPattern(mountPoint, _, _) => - Valid(AwsBatchEmptyMountedDisk(DefaultPathBuilder.get(mountPoint))) + case DiskPatterns.MountedDiskPattern(mountPoint, _, fsType) => + Valid(AwsBatchEmptyMountedDisk(DefaultPathBuilder.get(mountPoint),fsType)) case _ => s"Disk strings should be of the format 'local-disk' or '/mount/point' but got: '$s'".invalidNel } @@ -85,11 +85,18 @@ object AwsBatchVolume { trait AwsBatchVolume { def name: String def mountPoint: Path - def toVolume(id: String): Volume = { + def fsType: String + def getHostPath(id: Option[String]) : String = { + id match { + case Some(id) => mountPoint.toAbsolutePath.pathAsString + "/" + id + case None => mountPoint.toAbsolutePath.pathAsString + } + } + def toVolume(id: Option[String]=None): Volume = { Volume .builder .name(name) - .host(Host.builder.sourcePath(mountPoint.toAbsolutePath.pathAsString + "/" + id).build) + .host(Host.builder.sourcePath(getHostPath(id)).build) .build } def toMountPoint: MountPoint = { @@ -101,19 +108,22 @@ trait AwsBatchVolume { } } -case class AwsBatchEmptyMountedDisk(mountPoint: Path) extends AwsBatchVolume { +case class AwsBatchEmptyMountedDisk(mountPoint: Path, ftype:String="ebs") extends AwsBatchVolume { val name = s"d-${mountPoint.pathAsString.md5Sum}" + val fsType = ftype.toLowerCase override def toString: String = s"$name $mountPoint" } object AwsBatchWorkingDisk { val MountPoint: Path = DefaultPathBuilder.get("/cromwell_root") val Name = "local-disk" + val fsType= "ebs" val Default = AwsBatchWorkingDisk() } case class AwsBatchWorkingDisk() extends AwsBatchVolume { val mountPoint = AwsBatchWorkingDisk.MountPoint val name = AwsBatchWorkingDisk.Name + val fsType = AwsBatchWorkingDisk.fsType override def toString: String = s"$name $mountPoint" } diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchConfigurationSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchConfigurationSpec.scala old mode 100644 new mode 100755 diff --git a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsBackendLifecycleActorFactory.scala b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsBackendLifecycleActorFactory.scala index 46cd4d13cc4..053b3b4cadb 100644 --- a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsBackendLifecycleActorFactory.scala +++ b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsBackendLifecycleActorFactory.scala @@ -31,7 +31,10 @@ final case class BcsBackendLifecycleActorFactory(val name: String, val configura override def dockerHashCredentials(workflowDescriptor: BackendWorkflowDescriptor, initializationData: Option[BackendInitializationData]) = { Try(BackendInitializationData.as[BcsBackendInitializationData](initializationData)) match { case Success(bcsData) => - List(bcsData.bcsConfiguration.dockerCredentials).flatten + bcsData.bcsConfiguration.dockerHashEndpoint match { + case Some(endpoint) => List(bcsData.bcsConfiguration.dockerCredentials, Option(endpoint)).flatten + case None => List(bcsData.bcsConfiguration.dockerCredentials).flatten + } case _ => List.empty[Any] } } diff --git a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsClusterIdOrConfiguration.scala b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsClusterIdOrConfiguration.scala index 15b12a72de8..047af4dabf2 100644 --- a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsClusterIdOrConfiguration.scala +++ b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsClusterIdOrConfiguration.scala @@ -38,7 +38,7 @@ object BcsClusterIdOrConfiguration { val attachClusterPattern = s"""$resourceAndInstanceAndImagePattern\\s+$idPattern""".r - val attachCllusterSpotPattern = s"""$spotPattern\\s+$idPattern""".r + val attachClusterSpotPattern = s"""$spotPattern\\s+$idPattern""".r def parse(cluster: String): Try[BcsClusterIdOrConfiguration] = { @@ -49,7 +49,7 @@ object BcsClusterIdOrConfiguration { case resourceAndInstanceAndImagePattern(resourceType, instanceType, imageId) => Success(Right(AutoClusterConfiguration(resourceType, instanceType, imageId))) case attachClusterPattern(resourceType, instanceType, imageId, clusterId) => Success(Right(AutoClusterConfiguration(resourceType, instanceType, imageId, clusterId = Option(clusterId)))) case spotPattern(resourceType, instanceType, imageId, spotStrategy, spotPriceLimit) => Success(Right(AutoClusterConfiguration(resourceType, instanceType, imageId, Option(spotStrategy), Option(spotPriceLimit.toFloat)))) - case attachCllusterSpotPattern(resourceType, instanceType, imageId, spotStrategy, spotPriceLimit, clusterId) => Success(Right(AutoClusterConfiguration(resourceType, instanceType, imageId, Option(spotStrategy), Option(spotPriceLimit.toFloat), Option(clusterId)))) + case attachClusterSpotPattern(resourceType, instanceType, imageId, spotStrategy, spotPriceLimit, clusterId) => Success(Right(AutoClusterConfiguration(resourceType, instanceType, imageId, Option(spotStrategy), Option(spotPriceLimit.toFloat), Option(clusterId)))) case _ => Failure(new IllegalArgumentException("must be some string like 'cls-xxxx' or 'OnDemand ecs.s1.large img-ubuntu' or 'OnDemand ecs.s1.large img-ubuntu cls-xxxx'")) } } diff --git a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsConfiguration.scala b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsConfiguration.scala index f0de1049e6e..c3119079211 100644 --- a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsConfiguration.scala +++ b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsConfiguration.scala @@ -44,6 +44,7 @@ final class BcsConfiguration(val configurationDescriptor: BackendConfigurationDe lazy val dockerHashAccessId = DockerConfiguration.dockerHashLookupConfig.as[Option[String]]("alibabacloudcr.auth.access-id") lazy val dockerHashAccessKey = DockerConfiguration.dockerHashLookupConfig.as[Option[String]]("alibabacloudcr.auth.access-key") lazy val dockerHashSecurityToken = DockerConfiguration.dockerHashLookupConfig.as[Option[String]]("alibabacloudcr.auth.security-token") + lazy val dockerHashEndpoint = DockerConfiguration.dockerHashLookupConfig.as[Option[String]]("alibabacloudcr.auth.endpoint") val dockerCredentials = { for { diff --git a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsJob.scala b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsJob.scala index 2dd005556c2..50a1105ccba 100644 --- a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsJob.scala +++ b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsJob.scala @@ -205,7 +205,7 @@ final case class BcsJob(name: String, private def handleAutoCluster(config: AutoClusterConfiguration): Unit = { val autoCluster = new AutoCluster - autoCluster.setImageId(config.imageId) + autoCluster.setImageId(runtime.imageId.getOrElse(config.imageId)) autoCluster.setInstanceType(config.instanceType) autoCluster.setResourceType(config.resourceType) diff --git a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributes.scala b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributes.scala index 8f6d02e99af..72d3b056266 100644 --- a/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributes.scala +++ b/supportedBackends/bcs/src/main/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributes.scala @@ -34,6 +34,7 @@ final case class BcsRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode mounts: Option[Seq[BcsMount]], userData: Option[Seq[BcsUserData]], cluster: Option[BcsClusterIdOrConfiguration], + imageId: Option[String], systemDisk: Option[BcsSystemDisk], dataDisk: Option[BcsDataDisk], reserveOnFail: Option[Boolean], @@ -90,6 +91,7 @@ object BcsRuntimeAttributes { private def tagValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[String] = TagValidation.optionalWithDefault(runtimeConfig) + private def imageIdValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[String] = ImageIdValidation.optionalWithDefault(runtimeConfig) def runtimeAttributesBuilder(backendRuntimeConfig: Option[Config]): StandardValidatedRuntimeAttributesBuilder = { val defaults = StandardValidatedRuntimeAttributesBuilder.default(backendRuntimeConfig).withValidation( @@ -103,7 +105,8 @@ object BcsRuntimeAttributes { timeoutValidation(backendRuntimeConfig), verboseValidation(backendRuntimeConfig), vpcValidation(backendRuntimeConfig), - tagValidation(backendRuntimeConfig) + tagValidation(backendRuntimeConfig), + imageIdValidation(backendRuntimeConfig) ) // TODO: docker trips up centaur testing, for now https://github.com/broadinstitute/cromwell/issues/3518 @@ -126,6 +129,7 @@ object BcsRuntimeAttributes { val userData: Option[Seq[BcsUserData]] = RuntimeAttributesValidation.extractOption(userDataValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) val cluster: Option[BcsClusterIdOrConfiguration] = RuntimeAttributesValidation.extractOption(clusterValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) + val imageId: Option[String] = RuntimeAttributesValidation.extractOption(imageIdValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) val dockerTag: Option[BcsDocker] = RuntimeAttributesValidation.extractOption(dockerTagValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) val docker: Option[BcsDocker] = RuntimeAttributesValidation.extractOption(dockerValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) val systemDisk: Option[BcsSystemDisk] = RuntimeAttributesValidation.extractOption(systemDiskValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) @@ -146,6 +150,7 @@ object BcsRuntimeAttributes { mounts, userData, cluster, + imageId, systemDisk, dataDisk, reserveOnFail, @@ -273,8 +278,6 @@ class ClusterValidation(override val config: Option[Config]) extends RuntimeAttr { override def key: String = "cluster" - override def usedInCallCaching: Boolean = true - override def coercion: Traversable[WomType] = Set(WomStringType) override def validateValue: PartialFunction[WomValue, ErrorOr[BcsClusterIdOrConfiguration]] = { @@ -365,3 +368,12 @@ object TagValidation { class TagValidation(override val config: Option[Config]) extends StringRuntimeAttributesValidation("tag") with OptionalWithDefault[String] +object ImageIdValidation { + def optionalWithDefault(config: Option[Config]): OptionalRuntimeAttributesValidation[String] = new ImageIdValidation(config).optional +} + +class ImageIdValidation(override val config: Option[Config]) extends StringRuntimeAttributesValidation("imageId") with OptionalWithDefault[String] +{ + override def usedInCallCaching: Boolean = true +} + diff --git a/supportedBackends/bcs/src/test/resources/application.conf b/supportedBackends/bcs/src/test/resources/application.conf deleted file mode 100644 index 218d12ee470..00000000000 --- a/supportedBackends/bcs/src/test/resources/application.conf +++ /dev/null @@ -1,77 +0,0 @@ -#include required(classpath("application")) - - -call-caching { - # Allows re-use of existing results for jobs you've already run - # (default: false) - enabled = true - - # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies - # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users): - # (default: true) - invalidate-bad-cache-results = true - -} - -docker { - hash-lookup { - enable = true - - # How should docker hashes be looked up. Possible values are "local" and "remote" - # "local": Lookup hashes on the local docker daemon using the cli - # "remote": Lookup hashes on docker hub and gcr - method = "remote" - alibabacloudcr { - num-threads = 5 - auth { - access-id = "test-access-id" - access-key = "test-access-key" - security-token = "test-security-token" - } - } - } -} - -backend { - default = "BCS" - - providers { - BCS { - actor-factory = "cromwell.backend.impl.bcs.BcsBackendLifecycleActorFactory" - config { - root = "oss://my-bucket/cromwell_dir" - region = "cn-shanghai" - access-id = "test-access-id" - access-key = "test-access-key" - security-token = "test-security-token" - - filesystems { - oss { - auth { - endpoint = "oss-cn-shanghai.aliyuncs.com" - access-id = "test-access-id" - access-key = "test-access-key" - security-token = "test-security-token" - } - - caching { - # When a cache hit is found, the following duplication strategy will be followed to use the cached outputs - # Possible values: "copy", "reference". Defaults to "copy" - # "copy": Copy the output files - # "reference": DO NOT copy the output files but point to the original output files instead. - # Will still make sure than all the original output files exist and are accessible before - # going forward with the cache hit. - duplication-strategy = "reference" - } - } - } - - default-runtime-attributes { - failOnStderr: false - continueOnReturnCode: 0 - vpc: "192.168.0.0/16" - } - } - } - } -} diff --git a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsJobSpec.scala b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsJobSpec.scala index 589b1860856..13ed05bbb0c 100644 --- a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsJobSpec.scala +++ b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsJobSpec.scala @@ -72,6 +72,7 @@ class BcsJobSpec extends BcsTestUtilSpec { val spotStrategy = "SpotWithPriceLimit" val spotPriceLimit = 0.12 val cluster = s"$resourceType $instanceType $imageId $spotStrategy $spotPriceLimit" + val imageIdForCallCaching = "img-ubuntu-vpc" val reserveOnFail = true val cidr = "172.16.16.0/20" val vpcId = "vpc-test" @@ -89,7 +90,8 @@ class BcsJobSpec extends BcsTestUtilSpec { "vpc" -> WomString(s"$cidr $vpcId"), "systemDisk" -> WomString(s"$systemDiskType $systemDiskSize"), "dataDisk" -> WomString(s"$dataDiskType $dataDiskSize $dataDiskMountPoint"), - "userData" -> WomString(s"$userDataKey $userDataValue") + "userData" -> WomString(s"$userDataKey $userDataValue"), + "imageId" -> WomString(s"$imageIdForCallCaching") ) val task = taskWithRuntime(runtime) @@ -97,7 +99,7 @@ class BcsJobSpec extends BcsTestUtilSpec { val autoCluster = task.getAutoCluster autoCluster.isReserveOnFail shouldEqual reserveOnFail - autoCluster.getImageId shouldEqual imageId + autoCluster.getImageId shouldEqual imageIdForCallCaching autoCluster.getResourceType shouldEqual resourceType autoCluster.getInstanceType shouldEqual instanceType autoCluster.getSpotStrategy shouldEqual spotStrategy @@ -122,8 +124,8 @@ class BcsJobSpec extends BcsTestUtilSpec { private def withRuntime(runtime: Map[String, WomValue] = Map.empty[String, WomValue]): BcsJob = { - val rumtimeAttributes = createBcsRuntimeAttributes(runtime) - BcsJob(name, description, command, packagePath, rumtimeAttributes.mounts.getOrElse(mounts), envs, rumtimeAttributes, None, None, mockBcsClient) + val runtimeAttributes = createBcsRuntimeAttributes(runtime) + BcsJob(name, description, command, packagePath, runtimeAttributes.mounts.getOrElse(mounts), envs, runtimeAttributes, None, None, mockBcsClient) } private def taskWithRuntime(runtime: Map[String, WomValue] = Map.empty[String, WomValue]): TaskDescription = { diff --git a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributesSpec.scala b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributesSpec.scala index 399c9c854ca..012c6a15b3b 100644 --- a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributesSpec.scala +++ b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsRuntimeAttributesSpec.scala @@ -13,13 +13,13 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse dockerTag without docker path" in { val runtime = Map("dockerTag" -> WomString("ubuntu/latest")) - val expected = expectedRuntimeAttributes.copy(dockerTag = Some(BcsDockerWithoutPath("ubuntu/latest"))) + val expected = expectedRuntimeAttributes.copy(dockerTag = Option(BcsDockerWithoutPath("ubuntu/latest"))) createBcsRuntimeAttributes(runtime) shouldEqual(expected) } it should "parse dockerTag with path" in { val runtime = Map("dockerTag" -> WomString("centos/latest oss://bcs-dir/registry/")) - val expected = expectedRuntimeAttributes.copy(dockerTag = Some(BcsDockerWithPath("centos/latest", "oss://bcs-dir/registry/"))) + val expected = expectedRuntimeAttributes.copy(dockerTag = Option(BcsDockerWithPath("centos/latest", "oss://bcs-dir/registry/"))) createBcsRuntimeAttributes(runtime) shouldEqual(expected) } @@ -30,13 +30,13 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse docker" in { val runtime = Map("docker" -> WomString("registry.cn-beijing.aliyuncs.com/test/testubuntu:0.2")) - val expected = expectedRuntimeAttributes.copy(docker = Some(BcsDockerWithoutPath("registry.cn-beijing.aliyuncs.com/test/testubuntu:0.2"))) + val expected = expectedRuntimeAttributes.copy(docker = Option(BcsDockerWithoutPath("registry.cn-beijing.aliyuncs.com/test/testubuntu:0.2"))) createBcsRuntimeAttributes(runtime) shouldEqual(expected) } it should "parse correct user data" in { val runtime = Map("userData" -> WomString("key value1")) - val expected = expectedRuntimeAttributes.copy(userData = Some(Vector(BcsUserData("key", "value1")))) + val expected = expectedRuntimeAttributes.copy(userData = Option(Vector(BcsUserData("key", "value1")))) createBcsRuntimeAttributes(runtime) shouldEqual(expected) } @@ -47,13 +47,13 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse correct input mount" in { val runtime = Map("mounts" -> WomString("oss://bcs-dir/bcs-file /home/inputs/input_file false")) - val expected = expectedRuntimeAttributes.copy(mounts = Some(Vector(BcsInputMount(Left(mockPathBuilder.build("oss://bcs-dir/bcs-file").get), Right("/home/inputs/input_file"), false)))) + val expected = expectedRuntimeAttributes.copy(mounts = Option(Vector(BcsInputMount(Left(mockPathBuilder.build("oss://bcs-dir/bcs-file").get), Right("/home/inputs/input_file"), false)))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct out mount" in { val runtime = Map("mounts" -> WomString("/home/outputs/ oss://bcs-dir/outputs/ true")) - val expected = expectedRuntimeAttributes.copy(mounts = Some(Vector(BcsOutputMount(Right("/home/outputs/"), Left(mockPathBuilder.build("oss://bcs-dir/outputs/").get), true)))) + val expected = expectedRuntimeAttributes.copy(mounts = Option(Vector(BcsOutputMount(Right("/home/outputs/"), Left(mockPathBuilder.build("oss://bcs-dir/outputs/").get), true)))) createBcsRuntimeAttributes(runtime) shouldEqual expected } @@ -64,44 +64,44 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse correct cluster id" in { val runtime = Map("cluster" -> WomString("cls-1")) - val expected = expectedRuntimeAttributes.copy(cluster = Some(Left("cls-1"))) + val expected = expectedRuntimeAttributes.copy(cluster = Option(Left("cls-1"))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct ondemand auto cluster configuration" in { val runtime = Map("cluster" -> WomString("OnDemand ecs.s1.large img-ubuntu")) - val expected = expectedRuntimeAttributes.copy(cluster = Some(Right(AutoClusterConfiguration("OnDemand", "ecs.s1.large", "img-ubuntu")))) + val expected = expectedRuntimeAttributes.copy(cluster = Option(Right(AutoClusterConfiguration("OnDemand", "ecs.s1.large", "img-ubuntu")))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct spot auto cluster configuration" in { val runtime = Map("cluster" -> WomString("Spot ecs.s1.large img-ubuntu")) - val expected = expectedRuntimeAttributes.copy(cluster = Some(Right(AutoClusterConfiguration("Spot", "ecs.s1.large", "img-ubuntu")))) + val expected = expectedRuntimeAttributes.copy(cluster = Option(Right(AutoClusterConfiguration("Spot", "ecs.s1.large", "img-ubuntu")))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct spot auto cluster price option" in { val runtime = Map("cluster" -> WomString("Spot ecs.s1.large img-ubuntu SpotWithPriceLimit 0.1")) - val expected = expectedRuntimeAttributes.copy(cluster = Some(Right(AutoClusterConfiguration("Spot", "ecs.s1.large", "img-ubuntu", Some("SpotWithPriceLimit"), Some(0.1.toFloat))))) + val expected = expectedRuntimeAttributes.copy(cluster = Option(Right(AutoClusterConfiguration("Spot", "ecs.s1.large", "img-ubuntu", Option("SpotWithPriceLimit"), Some(0.1.toFloat))))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct vpc cidr block" in { val runtime = Map("vpc" -> WomString("172.16.16.0/20")) - val expected = expectedRuntimeAttributes.copy(vpc = Some(BcsVpcConfiguration(Some("172.16.16.0/20")))) + val expected = expectedRuntimeAttributes.copy(vpc = Option(BcsVpcConfiguration(Option("172.16.16.0/20")))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct vpc id" in { val runtime = Map("vpc" -> WomString("vpc-xxxx")) - val expected = expectedRuntimeAttributes.copy(vpc = Some(BcsVpcConfiguration(vpcId = Some("vpc-xxxx")))) + val expected = expectedRuntimeAttributes.copy(vpc = Option(BcsVpcConfiguration(vpcId = Option("vpc-xxxx")))) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct system disk" in { val runtime = Map("systemDisk" -> WomString("cloud_efficiency 250")) - val expected = expectedRuntimeAttributes.copy(systemDisk = Some(BcsSystemDisk("cloud_efficiency", 250))) + val expected = expectedRuntimeAttributes.copy(systemDisk = Option(BcsSystemDisk("cloud_efficiency", 250))) createBcsRuntimeAttributes(runtime) shouldEqual expected } @@ -112,7 +112,7 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse correct data disk" in { val runtime = Map("dataDisk" -> WomString("cloud 400 /home/data/")) - val expected = expectedRuntimeAttributes.copy(dataDisk = Some(BcsDataDisk("cloud", 400, "/home/data/"))) + val expected = expectedRuntimeAttributes.copy(dataDisk = Option(BcsDataDisk("cloud", 400, "/home/data/"))) createBcsRuntimeAttributes(runtime) shouldEqual expected } @@ -123,25 +123,25 @@ class BcsRuntimeAttributesSpec extends BcsTestUtilSpec { it should "parse correct reserve on fail option" in { val runtime = Map("reserveOnFail" -> WomBoolean(false)) - val expected = expectedRuntimeAttributes.copy(reserveOnFail = Some(false)) + val expected = expectedRuntimeAttributes.copy(reserveOnFail = Option(false)) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct auto release option" in { val runtime = Map("autoReleaseJob" -> WomBoolean(false)) - val expected = expectedRuntimeAttributes.copy(autoReleaseJob = Some(false)) + val expected = expectedRuntimeAttributes.copy(autoReleaseJob = Option(false)) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct verbose option" in { val runtime = Map("verbose" -> WomBoolean(false)) - val expected = expectedRuntimeAttributes.copy(verbose = Some(false)) + val expected = expectedRuntimeAttributes.copy(verbose = Option(false)) createBcsRuntimeAttributes(runtime) shouldEqual expected } it should "parse correct time out" in { val runtime = Map("timeout" -> WomInteger(3000)) - val expected = expectedRuntimeAttributes.copy(timeout = Some(3000)) + val expected = expectedRuntimeAttributes.copy(timeout = Option(3000)) createBcsRuntimeAttributes(runtime) shouldEqual expected } diff --git a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsTestUtilSpec.scala b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsTestUtilSpec.scala index 7116d7826a4..e68652780f6 100644 --- a/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsTestUtilSpec.scala +++ b/supportedBackends/bcs/src/test/scala/cromwell/backend/impl/bcs/BcsTestUtilSpec.scala @@ -35,6 +35,7 @@ object BcsTestUtilSpec { | timeout: 3000 | vpc: "192.168.0.0/16 vpc-xxxx" | tag: "jobTag" + | imageId: "img-ubuntu-vpc" |} """.stripMargin @@ -134,25 +135,26 @@ trait BcsTestUtilSpec extends TestKitSuite with FlatSpecLike with Matchers with val expectedContinueOnReturn = ContinueOnReturnCodeSet(Set(0)) - val expectedDockerTag = Some(BcsDockerWithPath("ubuntu/latest", "oss://bcs-reg/ubuntu/")) - val expectedDocker = Some(BcsDockerWithoutPath("registry.cn-beijing.aliyuncs.com/test/testubuntu:0.1")) + val expectedDockerTag = Option(BcsDockerWithPath("ubuntu/latest", "oss://bcs-reg/ubuntu/")) + val expectedDocker = Option(BcsDockerWithoutPath("registry.cn-beijing.aliyuncs.com/test/testubuntu:0.1")) val expectedFailOnStderr = false - val expectedUserData = Some(Vector(new BcsUserData("key", "value"))) - val expectedMounts = Some(Vector(new BcsInputMount(Left(mockPathBuilder.build("oss://bcs-bucket/bcs-dir/").get), Right("/home/inputs/"), false))) - val expectedCluster = Some(Left("cls-mycluster")) - val expectedSystemDisk = Some(BcsSystemDisk("cloud", 50)) - val expectedDataDsik = Some(BcsDataDisk("cloud", 250, "/home/data/")) + val expectedUserData = Option(Vector(new BcsUserData("key", "value"))) + val expectedMounts = Option(Vector(new BcsInputMount(Left(mockPathBuilder.build("oss://bcs-bucket/bcs-dir/").get), Right("/home/inputs/"), false))) + val expectedCluster = Option(Left("cls-mycluster")) + val expectedImageId = Option("img-ubuntu-vpc") + val expectedSystemDisk = Option(BcsSystemDisk("cloud", 50)) + val expectedDataDisk = Option(BcsDataDisk("cloud", 250, "/home/data/")) - val expectedReserveOnFail = Some(true) - val expectedAutoRelease = Some(true) - val expectedTimeout = Some(3000) - val expectedVerbose = Some(false) - val expectedVpc = Some(BcsVpcConfiguration(Some("192.168.0.0/16"), Some("vpc-xxxx"))) - val expectedTag = Some("jobTag") + val expectedReserveOnFail = Option(true) + val expectedAutoRelease = Option(true) + val expectedTimeout = Option(3000) + val expectedVerbose = Option(false) + val expectedVpc = Option(BcsVpcConfiguration(Option("192.168.0.0/16"), Option("vpc-xxxx"))) + val expectedTag = Option("jobTag") val expectedRuntimeAttributes = new BcsRuntimeAttributes(expectedContinueOnReturn, expectedDockerTag, expectedDocker, expectedFailOnStderr, expectedMounts, expectedUserData, expectedCluster, - expectedSystemDisk, expectedDataDsik, expectedReserveOnFail, expectedAutoRelease, expectedTimeout, expectedVerbose, expectedVpc, expectedTag) + expectedImageId, expectedSystemDisk, expectedDataDisk, expectedReserveOnFail, expectedAutoRelease, expectedTimeout, expectedVerbose, expectedVpc, expectedTag) protected def createBcsRuntimeAttributes(runtimeAttributes: Map[String, WomValue]): BcsRuntimeAttributes = { diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiRuntimeAttributes.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiRuntimeAttributes.scala index cb36dff48c0..73d992ae1bb 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiRuntimeAttributes.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiRuntimeAttributes.scala @@ -20,7 +20,7 @@ import wom.types._ import wom.values._ object GpuResource { - val DefaultNvidiaDriverVersion = "390.46" + val DefaultNvidiaDriverVersion = "418.87.00" final case class GpuType(name: String) { override def toString: String = name diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/resources/gcs_transfer.sh b/supportedBackends/google/pipelines/v2alpha1/src/main/resources/gcs_transfer.sh index 1f1912845bf..d4e5106d606 100755 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/resources/gcs_transfer.sh +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/resources/gcs_transfer.sh @@ -2,25 +2,13 @@ # The `papi_v2_log` Centaur test is opinionated about the number of log messages around localization/delocalization. # The trace logging of `set -x` must be turned off for the `papi_v2_log` test to pass. set +x +set -euo pipefail -gsutil_log=$(mktemp /tmp/gsutil.XXXXXXXXXXXXXXXX) +gsutil_log=gsutil.log +NO_REQUESTER_PAYS_COMMAND="" +REQUESTER_PAYS_COMMAND="" -private::localize_file() { - local cloud="$1" - local container="$2" - local rpflag="$3" - # Do not quote rpflag, when that is set it will be -u project which should be two distinct arguments. - rm -f "$HOME/.config/gcloud/gce" && gsutil ${rpflag} -m cp "$cloud" "$container" > "$gsutil_log" 2>&1 -} - -private::localize_directory() { - local cloud="$1" - local container="$2" - local rpflag="$3" - # Do not quote rpflag, when that is set it will be -u project which should be two distinct arguments. - mkdir -p "${container}" && rm -f "$HOME/.config/gcloud/gce" && gsutil ${rpflag} -m rsync -r "${cloud}" "${container}" > "$gsutil_log" 2>&1 -} private::delocalize_file() { local cloud="$1" @@ -57,6 +45,7 @@ private::delocalize_file() { fi } + private::delocalize_directory() { local cloud="$1" local container="$2" @@ -77,6 +66,7 @@ private::delocalize_directory() { fi } + private::delocalize_file_or_directory() { local cloud="$1" local container="$2" @@ -95,10 +85,12 @@ private::delocalize_file_or_directory() { fi } + private::timestamped_message() { printf '%s %s\n' "$(date -u '+%Y/%m/%d %H:%M:%S')" "$1" } + private::localize_message() { local cloud="$1" local container="$2" @@ -106,6 +98,7 @@ private::localize_message() { private::timestamped_message "${message}" } + private::delocalize_message() { local cloud="$1" local container="$2" @@ -113,67 +106,199 @@ private::delocalize_message() { private::timestamped_message "${message}" } -# Transfer a bundle of files or directories to or from the same GCS bucket. -transfer() { - # Begin the transfer with uncertain requester pays status and first attempting transfers without requester pays. - private::transfer false false "$@" + +# Requires both NO_REQUESTER_PAYS_COMMAND and USE_REQUESTER_PAYS_COMMAND to be set. +private::determine_requester_pays() { + local max_attempts="$1" + local attempt=1 + shift + + local command="$NO_REQUESTER_PAYS_COMMAND" + local use_requester_pays=false + # assume the worst + USE_REQUESTER_PAYS=error + + while [[ ${attempt} -le ${max_attempts} ]]; do + if eval ${command} > ${gsutil_log} 2>&1 ; then + USE_REQUESTER_PAYS=${use_requester_pays} + break + elif [[ "$use_requester_pays" = "false" ]]; then + if grep -q "Bucket is requester pays bucket but no user project provided." ${gsutil_log}; then + use_requester_pays=true + command="$REQUESTER_PAYS_COMMAND" + else + attempt=$((attempt + 1)) + fi + else + attempt=$((attempt + 1)) + fi + done + + if [[ ${attempt} -gt ${max_attempts} ]]; then + echo "Error attempting to localize file with command: '$command'" + cat ${gsutil_log} + fi } -private::transfer() { - local rp_status_certain="$1" - local use_requester_pays="$2" - local direction="$3" - local project="$4" - local max_attempts="$5" - shift 5 # rp_status_certain + use_requester_pays + direction + project + max_attempts +localize_files() { + local project="$1" + local max_attempts="$2" + local container_parent="$3" + local first_cloud_file="$4" + shift 4 + + local num_cpus=$(grep -c ^processor /proc/cpuinfo) + # 32 is the max component count currently supported by gsutil cp. + if [[ ${num_cpus} -gt 32 ]]; then + num_cpus=32 + fi - if [[ "$direction" != "localize" && "$direction" != "delocalize" ]]; then - echo "direction must be 'localize' or 'delocalize' but got '$direction'" + # We need to determine requester pays status of the first file attempting at most `max_attempts` times. + NO_REQUESTER_PAYS_COMMAND="mkdir -p '$container_parent' && gsutil -o 'GSUtil:parallel_thread_count=1' -o 'GSUtil:sliced_object_download_max_components=${num_cpus}' cp '$first_cloud_file' '$container_parent'" + REQUESTER_PAYS_COMMAND="gsutil -o 'GSUtil:parallel_thread_count=1' -o 'GSUtil:sliced_object_download_max_components=${num_cpus}' -u $project cp '$first_cloud_file' '$container_parent'" + + basefile=$(basename "$first_cloud_file") + private::localize_message "$first_cloud_file" "${container_parent}${basefile}" + private::determine_requester_pays ${max_attempts} + + if [[ ${USE_REQUESTER_PAYS} = true ]]; then + rpflag="-u $project" + elif [[ ${USE_REQUESTER_PAYS} = false ]]; then + rpflag="" + else + # error exit 1 fi - # Whether the requester pays status of the GCS bucket is certain. rp status is presumed false until proven otherwise. - local message_fn="private::${direction}_message" + if [[ $# -gt 0 ]]; then + touch files_to_localize.txt + while [[ $# -gt 0 ]]; do + cloud="$1" + basefile=$(basename "$cloud") + container="${container_parent}${basefile}" + private::localize_message "$cloud" "$container" + echo "$cloud" >> files_to_localize.txt + shift + done + + attempt=1 + while [[ ${attempt} -le ${max_attempts} ]]; do + # parallel transfer the remaining files + if cat files_to_localize.txt | gsutil -o "GSUtil:parallel_thread_count=1" -o "GSUtil:sliced_object_download_max_components=${num_cpus}" -m ${rpflag} cp -I "$container_parent"; then + break + else + attempt=$((attempt + 1)) + fi + done + if [[ ${attempt} -gt ${max_attempts} ]]; then exit 1; fi + rm -f files_to_localize.txt + fi +} + + +# Requires known requester pays status. +private::localize_directory() { + local cloud="$1" + local container="$2" + local max_attempts="$3" + local rpflag="$4" + + local attempt=1 + private::localize_message "$cloud" "$container" + while [[ ${attempt} -lt ${max_attempts} ]]; do + # Do not quote rpflag, when that is set it will be -u project which should be two distinct arguments. + if mkdir -p "${container}" && rm -f "$HOME/.config/gcloud/gce" && gsutil ${rpflag} -m rsync -r "${cloud}" "${container}" > /dev/null 2>&1; then + break + else + attempt=$(($attempt + 1)) + fi + done + + if [[ ${attempt} -gt ${max_attempts} ]]; then + exit 1 + fi +} + + +# Called from the localization script with unknown requester pays status on the source bucket. This attempts to localize +# the first input directory without requester pays. If that fails with a requester pays error, this attempts again with +# the project flag required for requester pays. Both no-requester-pays and requester-pays attempts are retried up to +# max_attempts times. Once requester pays status is determined via the first directory the remaining files are localized +# with or without the project flag as appropriate. +localize_directories() { + local project="$1" + local max_attempts="$2" + local cloud_directory="$3" + local container_directory="$4" + shift 4 + + BASE_COMMAND="private::localize_directory '${cloud_directory}' '${container_directory}' '${max_attempts}'" + NO_REQUESTER_PAYS_COMMAND="${BASE_COMMAND} ''" + REQUESTER_PAYS_COMMAND="${BASE_COMMAND} '-u $project'" + + private::determine_requester_pays ${max_attempts} + + if [[ ${USE_REQUESTER_PAYS} = true ]]; then + rpflag="-u $project" + elif [[ ${USE_REQUESTER_PAYS} = false ]]; then + rpflag="" + else + exit 1 + fi + + while [[ $# -gt 0 ]]; do + cloud_directory="$1" + container_directory="$2" + shift 2 + private::localize_directory "$cloud_directory" "$container_directory" "$max_attempts" "$rpflag" + done +} + + +# Handles all delocalizations for a transfer bundle (a grouping of file, directories, or files_or_directories targeting +# a single GCS bucket). +delocalize() { + local project="$1" + local max_attempts="$2" + + shift 2 + + # Whether the requester pays status of the GCS bucket is certain. rp status is presumed false until proven otherwise. + local rp_status_certain=false + local use_requester_pays=false - # If requester pays status is unknown, loop through the items in the transfer bundle until requester pays status is determined. - # Once determined, the remaining items can be transferred in parallel. while [[ $# -gt 0 ]]; do file_or_directory="$1" cloud="$2" container="$3" + required="$4" + content_type="$5" + + shift 5 if [[ "$file_or_directory" = "file" ]]; then - transfer_fn_name="private::${direction}_file" + transfer_fn_name="private::delocalize_file" elif [[ "$file_or_directory" = "directory" ]]; then - transfer_fn_name="private::${direction}_directory" - elif [[ "$direction" = "delocalize" && "$file_or_directory" = "file_or_directory" ]]; then + transfer_fn_name="private::delocalize_directory" + elif [[ "$file_or_directory" = "file_or_directory" ]]; then transfer_fn_name="private::delocalize_file_or_directory" else - echo "file_or_directory must be 'file' or 'directory' or (for delocalization only) 'file_or_directory' but got '$file_or_directory' with direction = '$direction'" + echo "file_or_directory must be 'file' or 'directory' or 'file_or_directory' but got '$file_or_directory'" exit 1 fi - content_type="" - required="" - if [[ "${direction}" = "delocalize" ]]; then - # 'required' and 'content type' only appear in delocalization bundles. - required="$4" - content_type="$5" - if [[ "$required" != "required" && "$required" != "optional" ]]; then - echo "'required' must be 'required' or 'optional' but got '$required'" - exit 1 - elif [[ "$required" = "required" && "$file_or_directory" = "file_or_directory" ]]; then - echo "Invalid combination of required = required and file_or_directory = file_or_directory, file_or_directory only valid with optional secondary outputs" - exit 1 - fi - shift 2 # required + content_type + if [[ "$required" != "required" && "$required" != "optional" ]]; then + echo "'required' must be 'required' or 'optional' but got '$required'" + exit 1 + elif [[ "$required" = "required" && "$file_or_directory" = "file_or_directory" ]]; then + echo "Invalid combination of required = required and file_or_directory = file_or_directory, file_or_directory only valid with optional secondary outputs" + exit 1 fi - shift 3 # file_or_directory + cloud + container - # Log what is being localized or delocalized (at least one test depends on this). - ${message_fn} "$cloud" "$container" + # Log what is being delocalized (at least one test depends on this). + private::delocalize_message "$cloud" "$container" attempt=1 transfer_rc=0 @@ -187,15 +312,14 @@ private::transfer() { fi # Note the localization versions of transfer functions are passed "required" and "content_type" parameters they will not use. - ${transfer_fn_name} "$cloud" "$container" "$rpflag" "$required" "$content_type" - transfer_rc=$? - - # Do not set rp_status_certain=true if an optional file was absent and no transfer was attempted. - if [[ ${transfer_rc} = 0 && "$required" = "false" && ! -e "$container" ]]; then - break - elif [[ ${transfer_rc} = 0 ]]; then - rp_status_certain=true - break + if ${transfer_fn_name} "$cloud" "$container" "$rpflag" "$required" "$content_type"; then + if [[ "$required" = "false" && ! -e "$container" ]]; then + # Do not set rp_status_certain=true if an optional file was absent and no transfer was attempted. + break + else + rp_status_certain=true + break + fi else private::timestamped_message "${transfer_fn_name} \"$cloud\" \"$container\" \"$rpflag\" \"$required\" \"$content_type\" failed" @@ -227,3 +351,21 @@ private::transfer() { rm -f "${gsutil_log}" } + + +# Required for files whose names are not consistent between cloud and container. There should be very few of these, +# the monitoring script being the single known example. +localize_singleton_file() { + local project="$1" + local max_attempts="$2" + local cloud="$3" + local container="$4" + + local container_parent=$(dirname "$container") + + private::localize_message "$cloud" "$container" + NO_REQUESTER_PAYS_COMMAND="mkdir -p '$container_parent' && gsutil cp '$cloud' '$container'" + REQUESTER_PAYS_COMMAND="gsutil -u $project cp '$cloud' '$container'" + # As a side effect of determining requester pays this one file will be localized. + private::determine_requester_pays ${max_attempts} +} diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index f1e60a67c9d..9cfa80a2447 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -5,6 +5,7 @@ import cats.instances.list._ import cats.instances.map._ import cats.syntax.foldable._ import com.google.cloud.storage.contrib.nio.CloudStorageOptions +import common.util.StringUtil._ import cromwell.backend.BackendJobDescriptor import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.LocalizationConfiguration import cromwell.backend.google.pipelines.common._ @@ -58,27 +59,72 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe private def gcsLocalizationTransferBundle[T <: PipelinesApiInput](localizationConfiguration: LocalizationConfiguration)(bucket: String, inputs: NonEmptyList[T]): String = { val project = inputs.head.cloudPath.asInstanceOf[GcsPath].projectId val maxAttempts = localizationConfiguration.localizationAttempts - val transferItems = inputs.toList.flatMap { i => - val kind = i match { - case _: PipelinesApiFileInput => "file" - case _: PipelinesApiDirectoryInput => "directory" - } - List(kind, i.cloudPath, i.containerPath) - } mkString("\"", "\"\n| \"", "\"") - // Use a digest as bucket names can contain characters that are not legal in bash identifiers. - val arrayIdentifier = s"localize_" + DigestUtils.md5Hex(bucket) - s""" - |# $bucket - |$arrayIdentifier=( - | "localize" # direction - | "$project" # project - | "$maxAttempts" # max attempts - | $transferItems - |) - | - |transfer "$${$arrayIdentifier[@]}" - """.stripMargin + // Split files and directories out so files can possibly benefit from a `gsutil -m cp -I ...` optimization + // on a per-container-parent-directory basis. + val (files, directories) = inputs.toList partition { _.isInstanceOf[PipelinesApiFileInput] } + + // Files with different names between cloud and container are not eligible for bulk copying. + val (filesWithSameNames, filesWithDifferentNames) = files partition { f => + f.cloudPath.asInstanceOf[GcsPath].nioPath.getFileName.toString == f.containerPath.getFileName.toString + } + + val filesByContainerParentDirectory = filesWithSameNames.groupBy(_.containerPath.parent.toString) + // Deduplicate any inputs since parallel localization can't deal with this. + val uniqueFilesByContainerParentDirectory = filesByContainerParentDirectory map { case (p, fs) => p -> fs.toSet } + + val filesWithSameNamesTransferBundles: List[String] = uniqueFilesByContainerParentDirectory.toList map { case (containerParent, filesWithSameParent) => + val arrayIdentifier = s"files_to_localize_" + DigestUtils.md5Hex(bucket + containerParent) + val entries = filesWithSameParent.map(_.cloudPath) mkString("\"", "\"\n| \"", "\"") + + s""" + |# Localize files from source bucket '$bucket' to container parent directory '$containerParent'. + |$arrayIdentifier=( + | "$project" # project to use if requester pays + | "$maxAttempts" # max transfer attempts + | "${containerParent.ensureSlashed}" # container parent directory + | $entries + |) + | + |localize_files "$${$arrayIdentifier[@]}" + """.stripMargin + } + + val filesWithDifferentNamesTransferBundles = filesWithDifferentNames map { f => + val arrayIdentifier = s"singleton_file_to_localize_" + DigestUtils.md5Hex(f.cloudPath.pathAsString + f.containerPath.pathAsString) + s""" + |# Localize singleton file '${f.cloudPath.pathAsString}' to '${f.containerPath.pathAsString}'. + |$arrayIdentifier=( + | "$project" + | "$maxAttempts" + | "${f.cloudPath}" + | "${f.containerPath}" + |) + | + |localize_singleton_file "$${$arrayIdentifier[@]}" + """.stripMargin + } + + // Only write a transfer bundle for directories if there are directories to be localized. Emptiness isn't a concern + // for files since there is always at least the command script to be localized. + val directoryTransferBundle = if (directories.isEmpty) "" else { + val entries = directories flatMap { i => List(i.cloudPath, i.containerPath) } mkString("\"", "\"\n| \"", "\"") + + val arrayIdentifier = s"directories_to_localize_" + DigestUtils.md5Hex(bucket) + + s""" + |# Directories from source bucket '$bucket'. + |$arrayIdentifier=( + | "$project" # project to use if requester pays + | "$maxAttempts" # max transfer attempts + | $entries + |) + | + |localize_directories "$${$arrayIdentifier[@]}" + """.stripMargin + } + + (directoryTransferBundle :: (filesWithSameNamesTransferBundles ++ filesWithDifferentNamesTransferBundles)) mkString "\n\n" } private def gcsDelocalizationTransferBundle[T <: PipelinesApiOutput](localizationConfiguration: LocalizationConfiguration)(bucket: String, outputs: NonEmptyList[T]): String = { @@ -104,13 +150,12 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe s""" |# $bucket |$arrayIdentifier=( - | "delocalize" # direction | "$project" # project | "$maxAttempts" # max attempts | $transferItems |) | - |transfer "$${$arrayIdentifier[@]}" + |delocalize "$${$arrayIdentifier[@]}" """.stripMargin } diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/Deserialization.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/Deserialization.scala index 545af3dd195..2be96b06516 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/Deserialization.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/Deserialization.scala @@ -106,7 +106,7 @@ private [api] object Deserialization { */ private [api] def deserializeTo[T <: GenericJson](attributes: JMap[String, Object])(implicit tag: ClassTag[T]): Try[T] = Try { // Create a new instance, because it's a GenericJson there's always a 0-arg constructor - val newT = tag.runtimeClass.asInstanceOf[Class[T]].newInstance() + val newT = tag.runtimeClass.asInstanceOf[Class[T]].getConstructor().newInstance() // Optionally returns the field with the given name def field(name: String) = Option(newT.getClassInfo.getField(name)) diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/ErrorReporter.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/ErrorReporter.scala index 4bd25447360..2b1d025f22c 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/ErrorReporter.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/ErrorReporter.scala @@ -5,7 +5,8 @@ import cats.data.Validated.{Invalid, Valid} import com.google.api.services.genomics.v2alpha1.model._ import common.validation.ErrorOr.ErrorOr import common.validation.Validation._ -import cromwell.backend.google.pipelines.common.api.RunStatus.{Cancelled, Failed, Preempted} +import cromwell.backend.google.pipelines.common.api.RunStatus.{Cancelled, Failed, Preempted, UnsuccessfulRunStatus} +import cromwell.backend.google.pipelines.common.PipelinesApiAsyncBackendJobExecutionActor import cromwell.backend.google.pipelines.v2alpha1.api.ActionBuilder.Labels.Key import cromwell.backend.google.pipelines.v2alpha1.api.Deserialization._ import cromwell.backend.google.pipelines.v2alpha1.api.request.RequestHandler.logger @@ -57,7 +58,7 @@ class ErrorReporter(machineType: Option[String], workflowId: WorkflowId) { import ErrorReporter._ - def toUnsuccessfulRunStatus(error: Status, events: List[Event]) = { + def toUnsuccessfulRunStatus(error: Status, events: List[Event]): UnsuccessfulRunStatus = { // If for some reason the status is null, set it as UNAVAILABLE val statusOption = for { errorValue <- Option(error) @@ -67,6 +68,7 @@ class ErrorReporter(machineType: Option[String], val builder = status match { case GStatus.UNAVAILABLE if wasPreemptible => Preempted.apply _ case GStatus.CANCELLED => Cancelled.apply _ + case GStatus.ABORTED if Option(error.getMessage).exists(_.contains(PipelinesApiAsyncBackendJobExecutionActor.FailedV2Style)) => Preempted.apply _ case _ => Failed.apply _ } diff --git a/supportedBackends/google/pipelines/v2alpha1/src/test/resources/reference.conf b/supportedBackends/google/pipelines/v2alpha1/src/test/resources/reference.conf index 0d57f772594..dca01308ed6 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/test/resources/reference.conf +++ b/supportedBackends/google/pipelines/v2alpha1/src/test/resources/reference.conf @@ -1,2 +1,2 @@ -drs.localization.docker-image = "somerepo/dos-downloader:tagged" +drs.localization.docker-image = "somerepo/drs-downloader:tagged" drs.localization.command-template = "/path/to/some_executable before args ${drsPath} middle args ${containerPath} ends args" diff --git a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesConversionsSpec.scala b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesConversionsSpec.scala index 1cbead8ed00..21ee3561d2c 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesConversionsSpec.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesConversionsSpec.scala @@ -44,7 +44,7 @@ class PipelinesConversionsSpec extends FlatSpec with Matchers { new DrsCloudNioFileSystemProvider(marthaConfig, fakeCredentials, httpClientBuilder, drsReadInterpreter), None, ) - val drsPath = drsPathBuilder.build("dos://dos.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba").get + val drsPath = drsPathBuilder.build("drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba").get val containerRelativePath = DefaultPathBuilder.get("path/to/file.bai") val mount = PipelinesApiWorkingDisk(DiskType.LOCAL, 1) val input = PipelinesApiFileInput("example", drsPath, containerRelativePath, mount) @@ -59,7 +59,7 @@ class PipelinesConversionsSpec extends FlatSpec with Matchers { logging.get("commands") should be(a[java.util.List[_]]) logging.get("commands").asInstanceOf[java.util.List[_]] should contain( """printf '%s %s\n' "$(date -u '+%Y/%m/%d %H:%M:%S')" """ + - """Localizing\ input\ dos://dos.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba\ """ + + """Localizing\ input\ drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba\ """ + """-\>\ /cromwell_root/path/to/file.bai""" ) @@ -83,14 +83,14 @@ class PipelinesConversionsSpec extends FlatSpec with Matchers { action.get("commands") should be(a[java.util.List[_]]) action.get("commands").asInstanceOf[java.util.List[_]] should contain theSameElementsAs List( - "dos://dos.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba", + "drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba", "/cromwell_root/path/to/file.bai" ) action.get("mounts") should be(a[java.util.List[_]]) action.get("mounts").asInstanceOf[java.util.List[_]] should be (empty) - action.get("imageUri") should be("somerepo/dos-downloader:tagged") + action.get("imageUri") should be("somerepo/drs-downloader:tagged") val actionLabels = action.get("labels").asInstanceOf[java.util.Map[_, _]] actionLabels.keySet.asScala should contain theSameElementsAs List("tag", "inputName") diff --git a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/GetRequestHandlerSpec.scala b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/GetRequestHandlerSpec.scala index 74cee16f1fd..85c74785d1b 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/GetRequestHandlerSpec.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/request/GetRequestHandlerSpec.scala @@ -1,6 +1,7 @@ package cromwell.backend.google.pipelines.v2alpha1.api.request import java.net.URL +import java.time.OffsetDateTime import akka.actor.ActorRef import com.google.api.client.http.GenericUrl @@ -10,7 +11,7 @@ import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestManager.P import cromwell.backend.google.pipelines.common.api.RunStatus._ import cromwell.backend.standard.StandardAsyncJob import cromwell.cloudsupport.gcp.auth.GoogleAuthMode -import cromwell.core.WorkflowId +import cromwell.core.{ExecutionEvent, WorkflowId} import io.grpc.Status import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} @@ -100,6 +101,89 @@ class GetRequestHandlerSpec extends FlatSpec with Matchers with TableDrivenPrope |""".stripMargin, Failed(Status.UNAVAILABLE, None, Nil, Nil, None, None, None) ), + ("Check that we classify error code 10 as a preemption", + """{ + | "done": true, + | "error": { + | "code": 10, + | "message": "The assigned worker has failed to complete the operation" + | }, + | "metadata": { + | "@type": "type.googleapis.com/google.genomics.v2alpha1.Metadata", + | "createTime": "2019-08-18T12:04:38.082650Z", + | "endTime": "2019-08-18T15:58:26.659602622Z", + | "events": [], + | "labels": { + | "cromwell-sub-workflow-name": "bamtocram", + | "cromwell-workflow-id": "asdfasdf", + | "wdl-call-alias": "validatecram", + | "wdl-task-name": "validatesamfile" + | }, + | "pipeline": { + | "actions": [], + | "environment": {}, + | "resources": { + | "projectId": "", + | "regions": [], + | "virtualMachine": { + | "accelerators": [], + | "bootDiskSizeGb": 11, + | "bootImage": "asdfasdf", + | "cpuPlatform": "", + | "disks": [ + | { + | "name": "local-disk", + | "sizeGb": 41, + | "sourceImage": "", + | "type": "pd-standard" + | } + | ], + | "enableStackdriverMonitoring": false, + | "labels": { + | "cromwell-sub-workflow-name": "bamtocram", + | "cromwell-workflow-id": "asdfasdf", + | "goog-pipelines-worker": "true", + | "wdl-call-alias": "validatecram", + | "wdl-task-name": "validatesamfile" + | }, + | "machineType": "custom-2-7168", + | "network": { + | "name": "", + | "subnetwork": "", + | "usePrivateAddress": false + | }, + | "nvidiaDriverVersion": "", + | "preemptible": true, + | "serviceAccount": { + | "email": "default", + | "scopes": [ + | "https://www.googleapis.com/auth/genomics", + | "https://www.googleapis.com/auth/compute", + | "https://www.googleapis.com/auth/devstorage.full_control", + | "https://www.googleapis.com/auth/cloudkms", + | "https://www.googleapis.com/auth/userinfo.email", + | "https://www.googleapis.com/auth/userinfo.profile", + | "https://www.googleapis.com/auth/monitoring.write", + | "https://www.googleapis.com/auth/cloud-platform" + | ] + | } + | }, + | "zones": [ + | "us-central1-a", + | "us-central1-b", + | "us-east1-d", + | "us-central1-c", + | "us-central1-f", + | "us-east1-c" + | ] + | }, + | "timeout": "604800s" + | }, + | "startTime": "2019-08-18T12:04:39.192909594Z" + | }, + | "name": "asdfasdf" + |}""".stripMargin, Preempted(Status.ABORTED, None, Nil, List(ExecutionEvent("waiting for quota", OffsetDateTime.parse("2019-08-18T12:04:38.082650Z"),None)), Some("custom-2-7168"), None, None) + ) ) forAll(interpretedStatus) { (description, json, expectedStatus) => diff --git a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/graph/LinkedGraphMaker.scala b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/graph/LinkedGraphMaker.scala index 12b6ac14a8f..22862031a16 100644 --- a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/graph/LinkedGraphMaker.scala +++ b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/graph/LinkedGraphMaker.scala @@ -41,7 +41,7 @@ object LinkedGraphMaker { def getOrdering(linkedGraph: LinkedGraph): ErrorOr[List[WorkflowGraphElement]] = { - def nodeName(workflowGraphElement: WorkflowGraphElement): String = workflowGraphElement.toWdlV1.lines.toList.headOption.getOrElse("Unnamed Element").replace("\"", "") + def nodeName(workflowGraphElement: WorkflowGraphElement): String = workflowGraphElement.toWdlV1.linesIterator.toList.headOption.getOrElse("Unnamed Element").replace("\"", "") // Find the topological order in which we must create the graph nodes: val edges = linkedGraph.edges map { case LinkedGraphEdge(from, to) => DiEdge(from, to) } diff --git a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/wdlom2wdl/WdlWriter.scala b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/wdlom2wdl/WdlWriter.scala index 6018791f7e9..54330e27b5c 100644 --- a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/wdlom2wdl/WdlWriter.scala +++ b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/wdlom2wdl/WdlWriter.scala @@ -10,7 +10,7 @@ trait WdlWriter[A] { object WdlWriter { // Stolen from WomGraph.scala - def indent(s: String) = s.lines.map(x => s" $x").mkString(System.lineSeparator) + def indent(s: String) = s.linesIterator.map(x => s" $x").mkString(System.lineSeparator) def combine(ss: Iterable[String]) = ss.mkString(start="", sep=System.lineSeparator, end=System.lineSeparator) def indentAndCombine(ss: Iterable[String]) = combine(ss.map(indent)) } diff --git a/wom/src/main/scala/wom/WomFileMapper.scala b/wom/src/main/scala/wom/WomFileMapper.scala index 1e3e9d7907d..13abea12b57 100644 --- a/wom/src/main/scala/wom/WomFileMapper.scala +++ b/wom/src/main/scala/wom/WomFileMapper.scala @@ -55,6 +55,7 @@ object WomFileMapper { o map Option.apply recover { case _: FileNotFoundException => None } map buildWomOptionalValue case None => Success(buildWomOptionalValue(None)) } + case coproduct: WomCoproductValue => mapWomFiles(mapper, exceptions)(coproduct.womValue) case other => Success(other) } } diff --git a/womtool/src/main/scala/womtool/graph/WomGraph.scala b/womtool/src/main/scala/womtool/graph/WomGraph.scala index 02c60de376c..459aab8f930 100644 --- a/womtool/src/main/scala/womtool/graph/WomGraph.scala +++ b/womtool/src/main/scala/womtool/graph/WomGraph.scala @@ -31,7 +31,7 @@ import scala.collection.JavaConverters._ class WomGraph(graphName: String, graph: Graph) { - def indent(s: String) = s.lines.map(x => s" $x").mkString(System.lineSeparator) + def indent(s: String) = s.linesIterator.map(x => s" $x").mkString(System.lineSeparator) def combine(ss: Iterable[String]) = ss.mkString(start="", sep=System.lineSeparator, end=System.lineSeparator) def indentAndCombine(ss: Iterable[String]) = combine(ss.map(indent)) implicit val monoid = cats.derived.MkMonoid[NodesAndLinks]